├── .gitignore ├── .gitmodules ├── .travis.yml ├── CMakeLists.txt ├── LICENSE ├── README.md ├── bitbucket-pipelines.yml ├── doc ├── CMakeLists.txt ├── Doxyfile.in └── posit_with_nan.png ├── extern └── args.hxx ├── include ├── FindInt128.cmake ├── anyfloat.hpp ├── binary8.hpp ├── bithippop.hpp ├── catch.hpp ├── exp2.hpp ├── exp2t.hpp ├── fixedtraits.hpp ├── float12.hpp ├── float16native32.hpp ├── float2posit.hpp ├── floatconst2bits.hpp ├── floattraits.hpp ├── posit.h ├── posit10.hpp ├── posit12.hpp ├── posit8.hpp ├── positeigen.h ├── positf.h ├── simd │ ├── asimd_16.h │ ├── asimd_32.h │ ├── asimd_8.h │ ├── asimd_all.h │ ├── asimd_base.h │ ├── asimd_d.h │ └── asimd_f.h ├── simdposit8.hpp ├── softfloat.hpp ├── tposit.hpp ├── tvalids.hpp ├── typehelpers.hpp └── unpacked.h ├── jupyter ├── .ipynb_checkpoints │ └── first-checkpoint.ipynb └── first.ipynb ├── makeone.sh ├── matlab ├── Lenna.png ├── cellindex.m ├── cellvcat.m ├── compareposits.m ├── compareposits2.m ├── describetab.m ├── dump2table.m ├── fromindex.m ├── halfinrange.m ├── halfprecision │ ├── halfprecision.c │ ├── halfprecision.m │ ├── halfprecisionmax.m │ ├── halfprecisionmin.m │ ├── ieeehalfprecision.c │ ├── license.txt │ └── origin.txt ├── limittab.m ├── loadpositdump.m ├── meandiff.m ├── mex_make.m ├── morton.m ├── normr.m ├── p8Tofloat.cpp ├── p8binop.cpp ├── p8unop.cpp ├── sift_fea.m ├── test_p8conv.m ├── testfloat11.m └── toindex.m ├── preamble.txt ├── scripts ├── float2bin.py ├── listpositany.sh ├── listposits8_16.sh ├── listposits8_16bin.sh ├── punum.py ├── valid2tvalid.py └── validsgentable.py ├── src ├── binary8.cpp ├── binary8_gen.cpp ├── floatTop8.cpp ├── listposits.cpp ├── main.cpp ├── maineigen.cpp ├── posit10.cpp ├── posit10_gen.cpp ├── posit10_tbl.cpp ├── posit12.cpp ├── posit12_gen.cpp ├── posit12_tbl.cpp ├── posit8.cpp ├── posit8_gen.cpp ├── posit8_tbl.cpp ├── simdposit8.cpp └── softfloat_gen.cpp └── tests ├── itest_anyfloat.cpp ├── itest_anyvsreallimit.cpp ├── itest_constexpr.cpp ├── itest_float16native32.cpp ├── itest_lowlimits.cpp ├── itest_positf.cpp ├── itest_unpackfixed.cpp ├── itest_unpackfloat.cpp ├── makelimits.sh ├── minimal.cpp ├── test_dot.cpp ├── test_numeric_limits.cpp ├── testbit.cpp ├── testeps.cpp ├── testfind.cpp ├── testnextprev.cpp ├── testposit.cpp ├── testposit10.cpp ├── testposit12.cpp ├── testposit8.cpp ├── testregime.cpp ├── testsimdposit8.cpp ├── testsoftfloat.cpp ├── testsposit.hpp └── testunpacked.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | */posit12_tbl.cpp 2 | */posit16_tbl.cpp 3 | */float12_tbl.cpp 4 | */float16_tbl.cpp 5 | # Prerequisites 6 | *.d 7 | 8 | # Compiled Object files 9 | *.slo 10 | *.lo 11 | *.o 12 | *.obj 13 | 14 | # Precompiled Headers 15 | *.gch 16 | *.pch 17 | 18 | # Compiled Dynamic libraries 19 | *.so 20 | *.dylib 21 | *.dll 22 | 23 | # Fortran module files 24 | *.mod 25 | *.smod 26 | 27 | # Compiled Static libraries 28 | *.lai 29 | *.la 30 | *.a 31 | *.lib 32 | 33 | # Executables 34 | *.exe 35 | *.out 36 | *.app 37 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "softfloat"] 2 | path = extern/softfloat 3 | url = https://github.com/eruffaldi/berkeley-softfloat-3.git 4 | 5 | [submodule "extern/fixed_point"] 6 | path = extern/fixed_point 7 | url = https://github.com/johnmcfarlane/fixed_point 8 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | script: cmake 3 | compiler: g++ 4 | dist: xenial 5 | 6 | before_script: 7 | - gcc --version 8 | - sudo unlink /usr/bin/gcc && sudo ln -s /usr/bin/gcc-5 /usr/bin/gcc 9 | - gcc --version 10 | # create a build folder for the out-of-source build 11 | - mkdir build 12 | # switch to build directory 13 | - cd build 14 | # run cmake; here we assume that the project's 15 | # top-level CMakeLists.txt is located at '..' 16 | - cmake -DSOFTFLOAT=OFF .. 17 | 18 | script: 19 | # once CMake has done its job we just build using make as usual 20 | - make 21 | # if the project uses ctest we can run the tests like this 22 | #- make test 23 | 24 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0) 2 | project(cppposit) 3 | 4 | #Issues with clang. Build soft float manually 5 | #add_subdirectory(extern/softfloat) 6 | #link_directories(extern/lib) 7 | 8 | #set(SYSPROC arm) 9 | if(SYSPROC MATCHES "arm") 10 | add_definitions (-D__arm__ ) 11 | ENDIF() 12 | 13 | if(MSVC) 14 | # Force to always compile with W4 15 | if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]") 16 | string(REGEX REPLACE "/W[0-4]" /WX "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") 17 | else() 18 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /WX") 19 | endif() 20 | else() 21 | # Update if necessary 22 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wall -Wno-long-long -pedantic") 23 | endif() 24 | 25 | option(USE_CXX14 "C++14" ) 26 | if(USE_CXX14) 27 | add_definitions(--std=c++14) 28 | else() 29 | add_definitions(--std=c++11) 30 | endif() 31 | 32 | option(SOFTFLOAT "softfloat" TRUE) 33 | if(SOFTFLOAT) 34 | add_definitions(-DWITH_SOFTFLOAT) 35 | endif() 36 | add_definitions(-march=native) 37 | 38 | include_directories(include) 39 | find_package(Eigen3 ) 40 | include_directories(${EIGEN3_INCLUDE_DIRS}) 41 | add_executable(main src/main.cpp) 42 | #add_executable(maineigen src/maineigen.cpp) 43 | add_executable(testunpacked tests/testunpacked.cpp) 44 | add_executable(testposit tests/testposit.cpp) 45 | 46 | # TABULATED posits 47 | # TODO Generalize to any arbitrary configuration of posit: given a posit configuration then 48 | # automatically make the generator, the table file and the cmake 49 | 50 | option(POSIT12 "posit12 tabulated" TRUE) 51 | option(POSIT10 "posit10 tabulated" TRUE) 52 | option(POSIT8 "posit8 tabulated" TRUE) 53 | option(BINARY8 "binary8_5 tabulated" TRUE) 54 | if(POSIT12) 55 | add_executable(posit12_gen src/posit12_gen.cpp) 56 | set(POSIT12_SRC src/posit12.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/posit12_tbl.cpp) 57 | if(NOT CMAKE_GENERATOR STREQUAL Xcode) 58 | add_custom_command(OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/src/posit12_tbl.cpp COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/src/posit12_tbl.cpp DEPENDS posit12_gen ) 59 | endif() 60 | endif() 61 | 62 | if(POSIT10) 63 | add_executable(posit10_gen src/posit10_gen.cpp) 64 | set(POSIT10_SRC src/posit10.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/posit10_tbl.cpp) 65 | if(NOT CMAKE_GENERATOR STREQUAL Xcode) 66 | add_custom_command(OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/src/posit10_tbl.cpp COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/src/posit10_tbl.cpp DEPENDS posit10_gen ) 67 | endif() 68 | endif() 69 | 70 | if(POSIT8) 71 | add_executable(posit8_gen src/posit8_gen.cpp) 72 | set(POSIT8_SRC src/posit8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/posit8_tbl.cpp) 73 | if(NOT CMAKE_GENERATOR STREQUAL Xcode) 74 | add_custom_command(OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/src/posit8_tbl.cpp COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/src/posit8_tbl.cpp DEPENDS posit8_gen ) 75 | endif() 76 | endif() 77 | 78 | if(BINARY8) 79 | add_executable(binary8_gen src/binary8_gen.cpp) 80 | set(BINARY8_SRC src/binary8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/binary8_tbl.cpp) 81 | if(NOT CMAKE_GENERATOR STREQUAL Xcode) 82 | add_custom_command(OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/src/binary8_tbl.cpp COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/src/binary8_tbl.cpp DEPENDS binary8_gen ) 83 | endif() 84 | endif() 85 | 86 | if(BINARY8 OR POSIT8 OR POSIT10 OR POSIT12) 87 | set(POSITTABTARGET posittab) 88 | add_library(posittab STATIC ${POSIT8_SRC} ${BINARY8_SRC} ${POSIT10_SRC} ${POSIT12_SRC}) 89 | set_property(TARGET posittab PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_CURRENT_SOURCE_DIR}/include) 90 | endif() 91 | 92 | #if(BINARY8) 93 | #add_executable(testbinary8 tests/testbinary8.cpp) 94 | #target_link_libraries(testbinary8 posittab) 95 | #endif() 96 | 97 | 98 | if(POSIT8) 99 | add_executable(testposit8 tests/testposit8.cpp) 100 | target_link_libraries(testposit8 posittab) 101 | endif() 102 | 103 | if(POSIT10) 104 | add_executable(testposit10 tests/testposit10.cpp) 105 | target_link_libraries(testposit10 posittab) 106 | endif() 107 | 108 | if(POSIT12) 109 | add_executable(testposit12 tests/testposit12.cpp) 110 | target_link_libraries(testposit12 posittab) 111 | endif() 112 | 113 | #if(SYSPROC MATCHES "arm") 114 | #else() 115 | #include_directories(include/simd) 116 | #add_executable(testsimdposit8 tests/testsimdposit8.cpp src/simdposit8.cpp) 117 | #target_link_libraries(testsimdposit8 ${POSITTABTARGET}) 118 | #endif() 119 | add_library(posit INTERFACE) 120 | set_property(TARGET posit PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_CURRENT_SOURCE_DIR}/include) 121 | 122 | if(${POSITTABTARGET}) 123 | add_executable(listposits src/listposits.cpp) 124 | target_link_libraries(listposits posittab) 125 | endif() 126 | 127 | if(SOFTFLOAT) 128 | link_directories(${CMAKE_CURRENT_SOURCE_DIR}/extern) 129 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/extern) 130 | add_library(p::softfloat INTERFACE IMPORTED) 131 | set_property(TARGET p::softfloat PROPERTY INTERFACE_LINK_LIBRARIES softfloat.a) 132 | set_property(TARGET p::softfloat PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_CURRENT_SOURCE_DIR}/extern) 133 | 134 | add_executable(softfloat_gen src/softfloat_gen.cpp) 135 | target_link_libraries(softfloat_gen p::softfloat) 136 | 137 | if(NOT CMAKE_GENERATOR STREQUAL Xcode) 138 | add_custom_command(OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/src/float16_tbl.cpp COMMAND DYLD_LIBRARY_PATH=${CMAKE_CURRENT_SOURCE_DIR}/extern LD_LIBRARY_PATH=${CMAKE_CURRENT_SOURCE_DIR}/extern $ ${CMAKE_CURRENT_SOURCE_DIR}/src/float16_tbl.cpp DEPENDS softfloat_gen) 139 | endif() 140 | add_library(softfloat16 STATIC ${CMAKE_CURRENT_SOURCE_DIR}/src/float16_tbl.cpp) 141 | set_property(TARGET softfloat16 PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_CURRENT_SOURCE_DIR}/extern) 142 | target_link_libraries(softfloat16 p::softfloat) 143 | 144 | 145 | add_executable(testsoftfloat tests/testsoftfloat.cpp) 146 | target_link_libraries(testsoftfloat softfloat16) 147 | 148 | #set_property(TARGET softfloat16 PROPERTY INTERFACE_LINK_LIBRARIES ${CMAKE_CURRENT_SOURCE_DIR}/extern/softfloat) 149 | 150 | install (TARGETS softfloat16 DESTINATION lib) 151 | endif() 152 | 153 | #export(EXPORT cppPosit FILE cppPosit.cmake) 154 | export(TARGETS ${POSITTABTARGET} posit FILE cppPosit.cmake) 155 | include_directories(extern) 156 | 157 | install (TARGETS ${POSITTABTARGET} DESTINATION lib) #INTERFACE_INCLUDE_DIRECTORIES include ) 158 | install(DIRECTORY include/ DESTINATION include/cppPosit) 159 | #install(EXPORT cppPosit DESTINATION lib/cppPosit ) 160 | 161 | add_subdirectory(doc EXCLUDE_FROM_ALL) 162 | 163 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | COPYRIGHT 2 | 3 | All contributions by Emanuele Ruffaldi 4 | Copyright (c) 2016-2019, E 5 | All rights reserved. 6 | 7 | All other contributions: 8 | Copyright (c) 2019, the respective contributors. 9 | All rights reserved. 10 | 11 | Each contributor holds copyright over their respective contributions. 12 | The project versioning (Git) records all such contribution source information. 13 | 14 | LICENSE 15 | 16 | The BSD 3-Clause License 17 | 18 | Redistribution and use in source and binary forms, with or without 19 | modification, are permitted provided that the following conditions are met: 20 | 21 | * Redistributions of source code must retain the above copyright notice, this 22 | list of conditions and the following disclaimer. 23 | 24 | * Redistributions in binary form must reproduce the above copyright notice, 25 | this list of conditions and the following disclaimer in the documentation 26 | and/or other materials provided with the distribution. 27 | 28 | * Neither the name of tiny-dnn nor the names of its 29 | contributors may be used to endorse or promote products derived from 30 | this software without specific prior written permission. 31 | 32 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 33 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 34 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 35 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 36 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 37 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 38 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 39 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 40 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 41 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 | -------------------------------------------------------------------------------- /bitbucket-pipelines.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eruffaldi/cppPosit/3c497041063dc87de2f9615a52a55b0ed5222b12/bitbucket-pipelines.yml -------------------------------------------------------------------------------- /doc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(cppPositDoc) 2 | 3 | option(EIGEN_INTERNAL_DOCUMENTATION "Build internal documentation" OFF) 4 | 5 | 6 | # Set some Doxygen flags 7 | set(DOXY_PROJECT_NAME "cppPosit") 8 | set(DOXY_PROJECT_VERSION "") 9 | set(DOXY_OUTPUT_DIRECTORY_SUFFIX "") 10 | set(DOXY_BINARY_DIR "\"${CMAKE_CURRENT_BINARY_DIR}\"") 11 | set(DOXY_INPUT "\"${CMAKE_CURRENT_SOURCE_DIR}/../include\" \"${CMAKE_CURRENT_SOURCE_DIR}\"") 12 | set(DOXY_HTML_COLORSTYLE_HUE "220") 13 | set(DOXY_TAGFILES "") 14 | if(INTERNAL_DOCUMENTATION) 15 | set(DOXY_INTERNAL "YES") 16 | else(EINTERNAL_DOCUMENTATION) 17 | set(DOXY_INTERNAL "NO") 18 | endif(INTERNAL_DOCUMENTATION) 19 | 20 | configure_file( 21 | ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in 22 | ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile 23 | ) 24 | 25 | add_custom_target(doc ALL 26 | COMMAND doxygen 27 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) -------------------------------------------------------------------------------- /doc/posit_with_nan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eruffaldi/cppPosit/3c497041063dc87de2f9615a52a55b0ed5222b12/doc/posit_with_nan.png -------------------------------------------------------------------------------- /include/FindInt128.cmake: -------------------------------------------------------------------------------- 1 | # - this module looks for 128 bit integer support. It sets up the 2 | # type defs in util/int128_types.hpp. Simply add ${INT128_FLAGS} to the 3 | # compiler flags. 4 | 5 | include(CheckTypeSize) 6 | 7 | MACRO(CHECK_128_BIT_HASH_FUNCTION VAR_NAME DEF_NAME) 8 | 9 | message("Testing for presence of 128 bit unsigned integer hash function for ${VAR_NAME}.") 10 | 11 | 12 | CHECK_CXX_SOURCE_COMPILES(" 13 | #include 14 | #include 15 | int main(int argc, char** argv) { 16 | std::hash<${VAR_NAME}>()(0); 17 | return 0; 18 | }" 19 | has_hash_${VAR_NAME}) 20 | 21 | if(has_hash_${VAR_NAME}) 22 | message("std::hash<${VAR_NAME}> defined.") 23 | SET(${DEF_NAME} 1) 24 | else() 25 | message("std::hash<${VAR_NAME}> not defined.") 26 | endif() 27 | endmacro() 28 | 29 | 30 | MACRO(CHECK_INT128 INT128_NAME VARIABLE DEFINE_NAME) 31 | 32 | if(NOT INT128_FOUND) 33 | message("Testing for 128 bit integer support with ${INT128_NAME}.") 34 | check_type_size("${INT128_NAME}" int128_t_${DEFINE_NAME}) 35 | if(HAVE_int128_t_${DEFINE_NAME}) 36 | if(int128_t_${DEFINE_NAME} EQUAL 16) 37 | message("Found: Enabling support for 128 bit integers using ${INT128_NAME}.") 38 | SET(INT128_FOUND 1) 39 | CHECK_128_BIT_HASH_FUNCTION(${INT128_NAME} HAS_INT128_STD_HASH) 40 | 41 | SET(${VARIABLE} "${DEFINE_NAME}") 42 | else() 43 | message("${INT128_NAME} has incorrect size, can't use.") 44 | endif() 45 | endif() 46 | endif() 47 | endmacro() 48 | 49 | MACRO(CHECK_UINT128 UINT128_NAME VARIABLE DEFINE_NAME) 50 | 51 | if(NOT UINT128_FOUND) 52 | message("Testing for 128 bit unsigned integer support with ${UINT128_NAME}.") 53 | check_type_size("${UINT128_NAME}" uint128_t_${DEFINE_NAME}) 54 | if(HAVE_uint128_t_${DEFINE_NAME}) 55 | if(uint128_t_${DEFINE_NAME} EQUAL 16) 56 | message("Found: Enabling support for 128 bit integers using ${UINT128_NAME}.") 57 | SET(UINT128_FOUND 1) 58 | CHECK_128_BIT_HASH_FUNCTION(${UINT128_NAME} HAS_UINT128_STD_HASH) 59 | SET(${VARIABLE} "${DEFINE_NAME}") 60 | else() 61 | message("${UINT128_NAME} has incorrect size, can't use.") 62 | endif() 63 | endif() 64 | endif() 65 | endmacro() 66 | 67 | MACRO(FIND_INT128_TYPES) 68 | 69 | Check_Int128("long long" INT128_DEF "HAVEint128_as_long_long") 70 | Check_Int128("int128_t" INT128_DEF "HAVEint128_t") 71 | Check_Int128("__int128_t" INT128_DEF "HAVE__int128_t") 72 | Check_Int128("__int128" INT128_DEF "HAVE__int128") 73 | Check_Int128("int128" INT128_DEF "HAVEint128") 74 | 75 | if(INT128_FOUND) 76 | set(INT128_FLAGS "-D${INT128_DEF}") 77 | 78 | if(HAS_INT128_STD_HASH) 79 | set(INT128_FLAGS "${INT128_FLAGS} -DHASH_FOR_INT128_DEFINED") 80 | endif() 81 | 82 | else() 83 | message("Compiler/platform support for 128 bit integers not found, falling back to boost mpfr.") 84 | set(INT128_FLAGS "") 85 | endif() 86 | 87 | Check_UInt128("unsigned long long" UINT128_DEF "HAVEuint128_as_u_long_long") 88 | Check_UInt128("uint128_t" UINT128_DEF "HAVEuint128_t") 89 | Check_UInt128("__uint128_t" UINT128_DEF "HAVE__uint128_t") 90 | Check_UInt128("__uint128" UINT128_DEF "HAVE__uint128") 91 | Check_UInt128("uint128" UINT128_DEF "HAVEuint128") 92 | Check_UInt128("unsigned __int128_t" UINT128_DEF "HAVEunsigned__int128_t") 93 | Check_UInt128("unsigned int128_t" UINT128_DEF "HAVEunsignedint128_t") 94 | Check_UInt128("unsigned __int128" UINT128_DEF "HAVEunsigned__int128") 95 | Check_UInt128("unsigned int128" UINT128_DEF "HAVEunsignedint128") 96 | 97 | if(UINT128_FOUND) 98 | set(INT128_FLAGS "${INT128_FLAGS} -D${UINT128_DEF}") 99 | 100 | if(HAS_UINT128_STD_HASH) 101 | set(INT128_FLAGS "${INT128_FLAGS} -DHASH_FOR_UINT128_DEFINED") 102 | endif() 103 | 104 | else() 105 | message("Compiler/platform support for unsigned 128 bit integers not found, falling back to boost mpfr.") 106 | endif() 107 | 108 | endmacro() -------------------------------------------------------------------------------- /include/binary8.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2017-2019 Emanuele Ruffaldi 3 | * Distributed under the terms of the BSD 3-Clause License. 4 | * 5 | * (See accompanying file LICENSE) 6 | * 7 | * -- 8 | */ 9 | /** 10 | * Zbinary8tabn library 11 | * 12 | * Emanuele Ruffaldi 2017 13 | */ 14 | #pragma once 15 | #include 16 | #include 17 | #include 18 | 19 | #ifndef BINARY8_SPEC 20 | #define BINARY8_SPEC 21 | #define BINARY8_EXP 5 22 | #define BINARY8_MAN 2 23 | #endif 24 | 25 | #include "floattraits.hpp" 26 | 27 | #ifndef FPGAHLS 28 | inline float uint32_to_float(uint32_t i) 29 | { 30 | union { 31 | float f; 32 | uint32_t i; 33 | } x; 34 | x.i = i; 35 | return x.f; 36 | } 37 | #endif 38 | 39 | 40 | namespace binary8ns 41 | { 42 | extern int8_t opadd[]; 43 | extern int8_t opmul[]; 44 | extern int8_t opdiv[]; 45 | extern int8_t opinv[]; 46 | extern int8_t opexp2[]; 47 | extern int8_t ophalf[]; 48 | extern int8_t opsquare[]; 49 | extern uint32_t op2float[]; 50 | } 51 | 52 | class binary8tab 53 | { 54 | public: 55 | // template 56 | 57 | 58 | using value_t=int8_t; 59 | using fraction_t=uint16_t; 60 | static constexpr int vtotalbits = 8; 61 | static constexpr int vesbits = BINARY8_EXP; 62 | using exponenttype=int32_t; 63 | using trait_t=any_floattrait; 64 | using FT=fraction_t; 65 | 66 | struct DeepInit{}; 67 | 68 | explicit binary8tab(double a); 69 | explicit binary8tab(float a); 70 | binary8tab(int a); 71 | explicit binary8tab(DeepInit , int8_t a) : v(a) {} 72 | binary8tab(): v(0) {} 73 | 74 | bool is_negative() const { return v < 0; } 75 | //bool is_nan() const { return trait_t::withnan && v == trait_t::POSIT_NAN; } 76 | //bool is_infinity() const { return (v == trait_t::POSIT_PINF || v == trait_t::POSIT_NINF);} 77 | static binary8tab max() { return binary8tab(DeepInit(),trait_t::max_h); } // 126 or 127 78 | static binary8tab min() { return binary8tab(DeepInit(),trait_t::min_h); } // ? -126 : -127 79 | static binary8tab one() { return binary8tab(DeepInit(),trait_t::one_h); } // 0x40 80 | static binary8tab afterone() { return binary8tab(DeepInit(),trait_t::afterone_h); } // 0x40 81 | //static binary8tab two() { return binary8tab(DeepInit(),trait_t::two); } 82 | static binary8tab zero() { return binary8tab(DeepInit(),0); } 83 | //static binary8tab onehalf() { return binary8tab(DeepInit(),trait_t::onehalf); } 84 | //static binary8tab pinfinity() { return binary8tab(DeepInit(),trait_t::pinfinity); } // ? 127: -128); } 85 | //static binary8tab ninfinity() { return binary8tab(DeepInit(),trait_t::ninfinity); } //,trait_t::withnan ?-127: -128); } 86 | //static binary8tab nan() { return binary8tab(DeepInit(),trait_t::nan); } // withnan ?-128: 0); } 87 | 88 | binary8tab inv() const { return binary8tab(DeepInit(),binary8ns::opinv[uu()]); } 89 | binary8tab half() const { return binary8tab(DeepInit(),binary8ns::ophalf[uu()]);} 90 | binary8tab twice() const; 91 | binary8tab exp2() const { return binary8tab(DeepInit(),binary8ns::opexp2[uu()]); } 92 | binary8tab square() const { return binary8tab(DeepInit(),binary8ns::opsquare[uu()]); } 93 | binary8tab operator-() const { return binary8tab(DeepInit(),-v); } 94 | 95 | friend bool operator<(binary8tab a, binary8tab b) { return a.v < b.v; } 96 | friend bool operator>(binary8tab a, binary8tab b) { return a.v > b.v; } 97 | friend bool operator<=(binary8tab a, binary8tab b) { return a.v <= b.v;} 98 | friend bool operator>=(binary8tab a, binary8tab b) { return a.v >= b.v; } 99 | friend bool operator==(binary8tab a, binary8tab b) { return a.v == b.v; } 100 | friend bool operator!=(binary8tab a, binary8tab b) { return a.v !=b.v; } 101 | 102 | friend binary8tab operator+(binary8tab a, binary8tab b) { return binary8tab(DeepInit(),binary8ns::opadd[a.uu()*256+b.uu()]); } 103 | friend binary8tab operator*(binary8tab a, binary8tab b) { return binary8tab(DeepInit(),binary8ns::opmul[a.uu()*256+b.uu()]); } 104 | friend binary8tab operator-(binary8tab a, binary8tab b) { return a+(-b); } 105 | friend binary8tab operator/(binary8tab a, binary8tab b) { return binary8tab(DeepInit(),binary8ns::opdiv[a.uu()*256+b.uu()]); } 106 | friend std::ostream & operator << (std::ostream & ons, const binary8tab & p); 107 | 108 | binary8tab& operator+=(const binary8tab &a) { binary8tab r = *this+a; v = r.v; return *this; } 109 | binary8tab& operator*=(const binary8tab &a) { binary8tab r = *this*a; v = r.v; return *this; } 110 | 111 | unsigned int uu() const { return (unsigned int)(uint8_t)v;} 112 | 113 | operator float() const { return uint32_to_float(binary8ns::op2float[uu()]); } 114 | operator double() const { return (float)*this; } 115 | operator int() const { return (float)*this; } 116 | 117 | int8_t v; 118 | 119 | }; 120 | 121 | inline binary8tab half(binary8tab z) { return z.half(); } 122 | 123 | inline binary8tab twice(binary8tab z) { return z.twice(); } 124 | 125 | inline binary8tab square(binary8tab z) { return z.square(); } 126 | 127 | inline binary8tab inv(binary8tab x) { return x.inv(); } 128 | 129 | inline binary8tab neg(binary8tab z) { return -z; } 130 | 131 | inline binary8tab exp2(binary8tab z) { return z.exp2(); } 132 | 133 | inline bool is_negative(binary8tab a) 134 | { 135 | return a.v < 0; 136 | } 137 | 138 | 139 | namespace std { 140 | template<> class numeric_limits { 141 | public: 142 | static binary8tab max() {return binary8tab::max(); }; 143 | static binary8tab min() {return binary8tab::min(); }; 144 | static binary8tab epsilon() {return binary8tab::afterone()-binary8tab::one(); }; 145 | // One can implement other methods if needed 146 | }; 147 | } 148 | 149 | 150 | 151 | namespace std 152 | { 153 | inline binary8tab abs(binary8tab z) 154 | { 155 | return binary8tab(binary8tab::DeepInit(),z.v & 0x7F); 156 | } 157 | 158 | inline binary8tab min(binary8tab a, binary8tab b) 159 | { 160 | return a <= b ? a : b; 161 | } 162 | 163 | inline binary8tab max(binary8tab a, binary8tab b) 164 | { 165 | return a >= b ? a : b; 166 | } 167 | } 168 | 169 | inline std::ostream & operator << (std::ostream & ons, const binary8tab & p) 170 | { 171 | ons << "binary8tab(" << p.v << ")" ; 172 | return ons; 173 | } -------------------------------------------------------------------------------- /include/exp2.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2017-2019 Emanuele Ruffaldi 3 | * Distributed under the terms of the BSD 3-Clause License. 4 | * 5 | * (See accompanying file LICENSE) 6 | * 7 | * -- 8 | */ 9 | /** 10 | - express the constants as integer and then use constexpr conversion to the Unpacked template 11 | - implement polevl and p1evl 12 | - implement ldexp 13 | 14 | */ 15 | // https://github.com/aterrel/libdynd/blob/master/thirdparty/cephes/exp2.c 16 | // https://github.com/jeremybarnes/cephes/blob/master/cprob/polevl.c 17 | 18 | #ifdef UNK 19 | static double P[] = { 20 | 2.30933477057345225087E-2, 21 | 2.02020656693165307700E1, 22 | 1.51390680115615096133E3, 23 | }; 24 | 25 | static double Q[] = { 26 | /* 1.00000000000000000000E0, */ 27 | 2.33184211722314911771E2, 28 | 4.36821166879210612817E3, 29 | }; 30 | 31 | 32 | #ifdef IBMPC 33 | static unsigned short P[] = { 34 | 0xead3, 0x549a, 0xa5c8, 0x3f97, 35 | 0x5bde, 0x9361, 0x33ba, 0x4034, 36 | 0x7693, 0x907b, 0xa7a0, 0x4097, 37 | }; 38 | 39 | static unsigned short Q[] = { 40 | /*0x0000,0x0000,0x0000,0x3ff0, */ 41 | 0x5c3c, 0x0ffb, 0x25e5, 0x406d, 42 | 0x0bae, 0x2fed, 0x1036, 0x40b1, 43 | }; 44 | 45 | template 46 | Unpacked polevl(Unpacked x) 47 | { 48 | 49 | } 50 | 51 | template 52 | Unpacked p1evl(Unpacked x) 53 | { 54 | 55 | } 56 | 57 | template 58 | Unpacked ldexp(Unpacked x, int n) 59 | { 60 | 61 | } 62 | 63 | template 64 | Unpacked x exp2(Unpacked x) 65 | { 66 | using UT=Unpacked; 67 | 68 | switch(x.type) 69 | { 70 | case UT::NaN: return x; 71 | case UT::Infinity: return x.negativeSign ? UT(UT::Zero) : x; 72 | default: 73 | break; 74 | } 75 | if(x > 1024) // TODO 76 | return UT(UT::Infinity); 77 | else if(x < -1024) // TODO 78 | return UT(UT::Zero); 79 | T px, xx; 80 | short n; 81 | 82 | xx = x; /* save x */ 83 | /* separate into integer and fractional parts */ 84 | px = floor(x + 0.5); 85 | n = px; 86 | x = x - px; 87 | 88 | /* rational approximation 89 | * exp2(x) = 1 + 2xP(xx)/(Q(xx) - P(xx)) 90 | * where xx = x**2 91 | */ 92 | xx = x * x; 93 | px = x * polevl(xx, P, 2); 94 | x = px / (p1evl(xx, Q, 2) - px); 95 | x = 1.0 + ldexp(x, 1); 96 | 97 | /* scale by power of 2 */ 98 | x = ldexp(x, n); 99 | return (x); 100 | } -------------------------------------------------------------------------------- /include/exp2t.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2017-2019 Emanuele Ruffaldi 3 | * Distributed under the terms of the BSD 3-Clause License. 4 | * 5 | * (See accompanying file LICENSE) 6 | * 7 | * -- 8 | */ 9 | #pragma once 10 | 11 | template 12 | struct exp2t_ 13 | { 14 | T operator() (T a) 15 | { 16 | return (T)(exp2((double)a)); 17 | } 18 | }; 19 | 20 | 21 | 22 | template 23 | T exp2t(T a) 24 | { 25 | return exp2t_()(a); 26 | } -------------------------------------------------------------------------------- /include/fixedtraits.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2017-2019 Emanuele Ruffaldi 3 | * Distributed under the terms of the BSD 3-Clause License. 4 | * 5 | * (See accompanying file LICENSE) 6 | * 7 | * -- 8 | */ 9 | #pragma once 10 | 11 | 12 | template 13 | struct fixedtrait 14 | { 15 | static_assert(sizeof(T)*8 <= N,"fixedtrait holding type is too small"); 16 | static_assert(N > 0,"fixedtrait total bits should be positive"); 17 | static_assert(F <= N && F >= 0,"fraction bits should be less than N and not negative"); 18 | static_assert(std::is_integral::value && std::is_signed::value,"only for signed integrals"); 19 | using value_t = T; 20 | static constexpr int totalbits = N; 21 | static constexpr int fraction_bits = F; 22 | }; 23 | 24 | #if 0 25 | #if __cplusplus >= 201402L 26 | template 27 | struct fixedtrait 28 | { 29 | static_assert(std::is_integral::value && std::is_signed::value,"only for signed integrals"); 30 | using value_t = T; 31 | static constexpr int totalbits = sizeof(T)*8; 32 | static constexpr int fraction_bits = 0; 33 | }; 34 | #endif 35 | #endif 36 | -------------------------------------------------------------------------------- /include/float12.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2017-2019 Emanuele Ruffaldi 3 | * Distributed under the terms of the BSD 3-Clause License. 4 | * 5 | * (See accompanying file LICENSE) 6 | * 7 | * -- 8 | */ 9 | #include 10 | #include 11 | #include "platform.h" 12 | #include "internals.h" 13 | #include "specialize.h" 14 | #include "softfloat.h" 15 | 16 | struct ui12_f12 17 | { 18 | 19 | }; 20 | float12_t 21 | 22 | #define packToF12UI( sign, exp, sig ) (((uint12_t) (sign)<<15) + ((uint12_t) (exp)<<10) + (sig)) 23 | #define softfloat_commonNaNToF12UI(aPtr) (uint_fast12_t) ((aPtr)->sign<<15 | 0x7E00 | aPtr->v64>>54) 24 | 25 | 26 | int_float12_t 27 | uint_float12_t 28 | 29 | // f8 is typically 1 4 3 30 | // f12 is typically 1 5 6 or 1 4 7 31 | // f16 is 1 6 9 32 | template 33 | float12_t f32_to_f12( float32_t a ) 34 | { 35 | union ui32_f32 uA; 36 | uint_fast32_t uiA; 37 | bool sign; 38 | int_fast12_t exp; 39 | uint_fast32_t frac; 40 | struct commonNaN commonNaN; 41 | uint_fast12_t uiZ, frac16; 42 | union ui12_f12 uZ; 43 | 44 | /*------------------------------------------------------------------------ 45 | *------------------------------------------------------------------------*/ 46 | uA.f = a; 47 | uiA = uA.ui; 48 | sign = signF32UI( uiA ); 49 | exp = expF32UI( uiA ); 50 | frac = fracF32UI( uiA ); 51 | /*------------------------------------------------------------------------ 52 | *------------------------------------------------------------------------*/ 53 | if ( exp == 0xFF ) { 54 | if ( frac ) { 55 | softfloat_f32UIToCommonNaN( uiA, &commonNaN ); 56 | uiZ = softfloat_commonNaNToF12UI( &commonNaN ); 57 | } else { 58 | uiZ = packToF12UI( sign, 0x1F, 0 ); 59 | } 60 | goto uiZ; 61 | } 62 | /*------------------------------------------------------------------------ 63 | *------------------------------------------------------------------------*/ 64 | frac16 = frac>> mantissabits| ((frac & mantissamask) != 0); 65 | if ( ! (exp | frac16) ) { 66 | uiZ = packToF12UI( sign, 0, 0 ); 67 | goto uiZ; 68 | } 69 | /*------------------------------------------------------------------------ 70 | *------------------------------------------------------------------------*/ 71 | return softfloat_roundPackToF12( sign, exp - 0x71, frac16 | 0x4000 ); 72 | uiZ: 73 | uZ.ui = uiZ; 74 | return uZ.f; 75 | 76 | } 77 | 78 | 79 | 80 | float12_t 81 | softfloat_roundPackToF12( bool sign, int_fast12_t exp, uint_fast12_t sig ) 82 | { 83 | uint_fast8_t roundingMode; 84 | bool roundNearEven; 85 | uint_fast8_t roundIncrement, roundBits; 86 | bool isTiny; 87 | uint_fast12_t uiZ; 88 | union ui12_f12 uZ; 89 | 90 | /*------------------------------------------------------------------------ 91 | *------------------------------------------------------------------------*/ 92 | roundingMode = softfloat_roundingMode; 93 | roundNearEven = (roundingMode == softfloat_round_near_even); 94 | roundIncrement = 0x8; 95 | if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) { 96 | roundIncrement = 97 | (roundingMode 98 | == (sign ? softfloat_round_min : softfloat_round_max)) 99 | ? 0xF 100 | : 0; 101 | } 102 | roundBits = sig & 0xF; 103 | /*------------------------------------------------------------------------ 104 | *------------------------------------------------------------------------*/ 105 | if ( 0x1D <= (unsigned int) exp ) { 106 | if ( exp < 0 ) { 107 | /*---------------------------------------------------------------- 108 | *----------------------------------------------------------------*/ 109 | isTiny = 110 | (softfloat_detectTininess == softfloat_tininess_beforeRounding) 111 | || (exp < -1) || (sig + roundIncrement < 0x8000); 112 | sig = softfloat_shiftRightJam32( sig, -exp ); 113 | exp = 0; 114 | roundBits = sig & 0xF; 115 | if ( isTiny && roundBits ) { 116 | softfloat_raiseFlags( softfloat_flag_underflow ); 117 | } 118 | } else if ( (0x1D < exp) || (0x8000 <= sig + roundIncrement) ) { 119 | /*---------------------------------------------------------------- 120 | *----------------------------------------------------------------*/ 121 | softfloat_raiseFlags( 122 | softfloat_flag_overflow | softfloat_flag_inexact ); 123 | uiZ = packToF12UI( sign, 0x1F, 0 ) - ! roundIncrement; 124 | goto uiZ; 125 | } 126 | } 127 | /*------------------------------------------------------------------------ 128 | *------------------------------------------------------------------------*/ 129 | sig = (sig + roundIncrement)>>4; 130 | if ( roundBits ) { 131 | softfloat_exceptionFlags |= softfloat_flag_inexact; 132 | #ifdef SOFTFLOAT_ROUND_ODD 133 | if ( roundingMode == softfloat_round_odd ) { 134 | sig |= 1; 135 | goto packReturn; 136 | } 137 | #endif 138 | } 139 | sig &= ~(uint_fast12_t) (! (roundBits ^ 8) & roundNearEven); 140 | if ( ! sig ) exp = 0; 141 | /*------------------------------------------------------------------------ 142 | *------------------------------------------------------------------------*/ 143 | packReturn: 144 | uiZ = packToF12UI( sign, exp, sig ); 145 | uiZ: 146 | uZ.ui = uiZ; 147 | return uZ.f; 148 | 149 | } 150 | 151 | struct exp8_sig16 softfloat_normSubnormalF12Sig( uint_fast12_t sig ) 152 | { 153 | int_fast8_t shiftDist; 154 | struct exp8_sig16 z; 155 | 156 | shiftDist = softfloat_countLeadingZeros16( sig ) - 5; // TODO 157 | z.exp = 1 - shiftDist; 158 | z.sig = sig<sign = uiA>>15; 170 | zPtr->v64 = (uint_fast64_t) uiA<<54; 171 | zPtr->v0 = 0; 172 | } 173 | 174 | 175 | float32_t f12_to_f32( float12_t a ) 176 | { 177 | union ui12_f12 uA; 178 | uint_fast12_t uiA; 179 | bool sign; 180 | int_fast8_t exp; 181 | uint_fast12_t frac; 182 | struct commonNaN commonNaN; 183 | uint_fast32_t uiZ; 184 | struct exp8_sig16 normExpSig; 185 | union ui32_f32 uZ; 186 | 187 | /*------------------------------------------------------------------------ 188 | *------------------------------------------------------------------------*/ 189 | uA.f = a; 190 | uiA = uA.ui; 191 | sign = signF12UI( uiA ); 192 | exp = expF12UI( uiA ); 193 | frac = fracF12UI( uiA ); 194 | /*------------------------------------------------------------------------ 195 | *------------------------------------------------------------------------*/ 196 | if ( exp == 0x1F ) { 197 | if ( frac ) { 198 | softfloat_f12UIToCommonNaN( uiA, &commonNaN ); 199 | uiZ = softfloat_commonNaNToF32UI( &commonNaN ); 200 | } else { 201 | uiZ = packToF32UI( sign, 0xFF, 0 ); 202 | } 203 | goto uiZ; 204 | } 205 | /*------------------------------------------------------------------------ 206 | *------------------------------------------------------------------------*/ 207 | if ( ! exp ) { 208 | if ( ! frac ) { 209 | uiZ = packToF32UI( sign, 0, 0 ); 210 | goto uiZ; 211 | } 212 | normExpSig = softfloat_normSubnormalF12Sig( frac ); 213 | exp = normExpSig.exp - 1; 214 | frac = normExpSig.sig; 215 | } 216 | /*------------------------------------------------------------------------ 217 | *------------------------------------------------------------------------*/ 218 | uiZ = packToF32UI( sign, exp + 0x70, (uint_fast32_t) frac<<13 ); 219 | uiZ: 220 | uZ.ui = uiZ; 221 | return uZ.f; 222 | 223 | } 224 | 225 | 226 | -------------------------------------------------------------------------------- /include/float16native32.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2017-2019 Emanuele Ruffaldi 3 | * Distributed under the terms of the BSD 3-Clause License. 4 | * 5 | * (See accompanying file LICENSE) 6 | * 7 | * -- 8 | */ 9 | /** 10 | * Float 16 native 32 11 | * Emanueel Ruffaldi 2017 12 | */ 13 | #pragma once 14 | #include 15 | #include 16 | #include "unpacked.h" 17 | 18 | using float16n_t = halffloat; 19 | 20 | // use native F16C operation for conversion back & forth 21 | // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi16 22 | // https://db.in.tum.de/~finis/x86-intrin-cheatsheet-v2.1.pdf 23 | #if defined(__AVX__) && defined(__F16C__) 24 | 25 | // for _mm_cvtph_ps 26 | // for _mm_cvtss_f32 27 | // for _mm_cvtsi128_si32 28 | // for _mm_extract_epi16 SSE2 29 | #include 30 | 31 | 32 | // these intrinsics are rot constexpr 33 | inline float float16nto32(float16n_t x) 34 | { 35 | // FP16C cost (4-5,1) 36 | return _mm_cvtss_f32(_mm_cvtph_ps( _mm_set1_epi16 (x.what) )); 37 | } 38 | 39 | inline float16n_t float32to16n(float x) 40 | { 41 | // rounding from 32 to 16 42 | // _mm_cvtsi128_si32(x) &0xFFFF with cost (2,1) 43 | // or 44 | // _mm_extract(x,0) cost (3,1) 45 | return float16n_t((uint16_t)(_mm_cvtsi128_si32(_mm_cvtps_ph( _mm_set_ps1 (x),_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC )) & 0xFFFF)); 46 | } 47 | 48 | #else 49 | 50 | // TODO: use optimized quicker version 51 | // TODO: constexpr requires floatconst2bits.hpp 52 | inline float float16nto32(float16n_t x) 53 | { 54 | // template 55 | // constexpr DstTrait::holder_t convertfloats(SrcTrait::holder_t src) 56 | return Unpacked::template make_floati(x.what).template pack_xfloati(); 57 | } 58 | 59 | // TODO: use optimized quicker version 60 | inline float16n_t float32to16n(float x) 61 | { 62 | return float16n_t(Unpacked::template make_float(x).template pack_xfloati()); 63 | } 64 | 65 | 66 | #endif 67 | 68 | 69 | struct float16n32 70 | { 71 | using trait_t = half_trait; 72 | using value_t= float16n_t; 73 | using impl_t = float; 74 | explicit float16n32() : value (0) {} 75 | explicit float16n32(float f): value(float32to16n(f)) {} 76 | explicit float16n32(float16n_t f): value(f) {} 77 | explicit constexpr float16n32(bool , uint16_t x): value(x) {} 78 | 79 | operator float () const { return float16nto32(value); } 80 | operator double () const { return float16nto32(value); } 81 | operator float16n_t() const { return value; } 82 | 83 | uint16_t raw() const { return value.what; } 84 | constexpr float16n32 abs() const { return float16n32(0,value.what & ~0x8000); } // could be >= infinity because infinity is sign symmetric 85 | constexpr float16n32 neg() const { return float16n32(0,value.what ^ 0x8000); }; 86 | 87 | /// replace them with constants 88 | static float16n32 two() { return float16n32(false,trait_t::two_h); }; 89 | static float16n32 one() { return float16n32(false,trait_t::one_h); }; 90 | static float16n32 zero() { return float16n32(false,0); }; 91 | 92 | friend float16n32 operator+ (float16n32 a, float16n32 b) { return float16n32((impl_t)a+(impl_t)b); } 93 | friend float16n32 operator- (float16n32 a, float16n32 b) { return float16n32((impl_t)a-(impl_t)b); } 94 | friend float16n32 operator* (float16n32 a, float16n32 b) { return float16n32((impl_t)a*(impl_t)b); } 95 | friend float16n32 operator/ (float16n32 a, float16n32 b) { return float16n32((impl_t)a/(impl_t)b); } 96 | float16n32 & operator+=(const float16n32 &a) { float16n32 x = *this + a; value = x.value; return *this; } 97 | float16n32 & operator*=(const float16n32 &a) { float16n32 x = *this * a; value = x.value; return *this; } 98 | 99 | float16n32 operator-() const { return neg(); } 100 | 101 | friend float16n32 rem(float16n32 a,float16n32 b) { return float16n32(remainder((impl_t)a,(impl_t)b)); } 102 | friend float16n32 sqrt(float16n32 a) { return float16n32(sqrt((impl_t)a)); } 103 | friend float16n32 mulAdd(float16n32 a, float16n32 b, float16n32 c) { return float16n32((impl_t)a*(impl_t)b+(impl_t)c); } 104 | 105 | friend float16n32 half(float16n32 a) { return a/two(); } 106 | friend float16n32 square(float16n32 a) { return a*a; } 107 | friend float16n32 inv(float16n32 a) { return one()/a; } 108 | 109 | friend bool operator == (float16n32 a, float16n32 b) { return ((impl_t)a==(impl_t)b); } 110 | friend bool operator < (float16n32 a, float16n32 b) { return ((impl_t)a<(impl_t)b); } 111 | friend bool operator > (float16n32 a, float16n32 b) { return ((impl_t)a>(impl_t)b); } 112 | friend bool operator >= (float16n32 a, float16n32 b) { return ((impl_t)a>=(impl_t)b); } 113 | friend bool operator <= (float16n32 a, float16n32 b) { return ((impl_t)a<=(impl_t)b); } 114 | 115 | friend std::ostream & operator << (std::ostream & ons, const float16n32 & v) 116 | { 117 | ons << "f16(" << ((impl_t)v) << ")"; 118 | return ons; 119 | } 120 | 121 | private: 122 | value_t value; 123 | }; 124 | 125 | namespace std 126 | { 127 | inline CONSTEXPR14 float16n32 abs(float16n32 z) 128 | { 129 | return z.abs(); 130 | } 131 | } 132 | 133 | #if 0 134 | namespace std 135 | { 136 | template <> 137 | struct numeric_limits 138 | { 139 | using T=float16n32; 140 | using FT=typename T::value_t; 141 | using TT=typename T::trait_t; 142 | //static constexpr T mk(uint32_t v) { return T(false,v);} 143 | #define mk(x) T(FT({x})) 144 | static constexpr bool is_specialized = true; 145 | static constexpr T min() noexcept { return mk(TT::min_h); } 146 | static constexpr T max() noexcept { return mk(TT::max_h); } 147 | static constexpr T lowest() noexcept { return mk(TT::lowest_h); } 148 | static constexpr int digits = 24; //number of digits (in radix base) in the mantissa 149 | static constexpr int digits10 = 6; 150 | static constexpr bool is_signed = true; 151 | static constexpr bool is_integer = false; 152 | static constexpr bool is_exact = false; 153 | static constexpr int radix = 2; 154 | static constexpr T epsilon() noexcept { return mk(TT::epsilon_h); } 155 | //static constexpr T round_error() noexcept { return T(); } // 0.5f 156 | 157 | // this is also the maximum integer 158 | static constexpr int min_exponent = -126; 159 | static constexpr int min_exponent10 = -37; 160 | static constexpr int max_exponent = 127; 161 | static constexpr int max_exponent10 = 38; 162 | 163 | static constexpr bool has_infinity = true; 164 | static constexpr bool has_quiet_NaN = true; 165 | static constexpr bool has_signaling_NaN = false; 166 | static constexpr float_denorm_style has_denorm = std::denorm_present; 167 | static constexpr bool has_denorm_loss = false; 168 | static constexpr T infinity() noexcept { return mk(TT::pinfinity_h); } 169 | static constexpr T quiet_NaN() noexcept { return mk(TT::nan_h); } 170 | static constexpr T signaling_NaN() noexcept { return mk(TT::nan_h); } 171 | static constexpr T denorm_min() noexcept { return mk(TT::denorm_min_h); } 172 | 173 | static constexpr bool is_iec559 = true; 174 | static constexpr bool is_bounded = true; 175 | static constexpr bool is_modulo = false; 176 | 177 | static constexpr bool traps = false; 178 | static constexpr bool tinyness_before = false; 179 | #undef mk 180 | }; 181 | } 182 | #endif -------------------------------------------------------------------------------- /include/float2posit.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2017-2019 Emanuele Ruffaldi 3 | * Distributed under the terms of the BSD 3-Clause License. 4 | * 5 | * (See accompanying file LICENSE) 6 | * 7 | * -- 8 | */ 9 | /** 10 | * Expandedn one 11 | * 12 | g++ -I. --std=c++14 float2posit.hpp -DTESTFLOAT2POSIT -c 13 | */ 14 | #include "posit.h" 15 | #include "floatconst2bits.hpp" 16 | 17 | //template 18 | template 19 | CONSTEXPR14 PP float2posit(typename Trait::holder_t value) 20 | { 21 | //using PP = Posit; 22 | using PT=typename PP::PT; // trait 23 | constexpr auto totalbits = PP::vtotalbits; 24 | constexpr auto esbits = PP::vesbits; 25 | using FT=typename PP::fraction_t; 26 | using POSIT_UTYPE = typename PT::POSIT_UTYPE; 27 | using POSIT_STYPE = typename PT::POSIT_STYPE; 28 | using ET=typename PT::exponenttype; 29 | using UT=Unpacked; 30 | 31 | // Phase 2: float to expanded (Unpacked) handling special cases 32 | 33 | ET rawexp = bitset_getT(value,Trait::fraction_bits,Trait::exponent_bits) ; 34 | bool negativeSign = value & (((typename Trait::holder_t)1) << (Trait::data_bits-1)); 35 | auto exponentF = rawexp - Trait::exponent_bias; // ((un.u >> Trait::fraction_bits) & Trait::exponent_mask) 36 | auto fractionF = cast_right_to_left()(value); 37 | 38 | if(rawexp == ((1 << Trait::exponent_bits)-1)) 39 | { 40 | if(fractionF == 0) 41 | { 42 | return PP(typename PP::DeepInit(),negativeSign ? PT::POSIT_NINF : PT::POSIT_PINF); 43 | } 44 | else 45 | { 46 | return PP(typename PP::DeepInit(),PT::POSIT_NAN); 47 | } 48 | } 49 | else if (rawexp == 0) 50 | { 51 | if(fractionF == 0) 52 | { 53 | negativeSign = false; 54 | return PP::zero(); 55 | } 56 | else 57 | { 58 | int k = findbitleftmostC(fractionF); 59 | 60 | exponentF -= k; 61 | fractionF <<= k+1; // plus normalization 62 | } 63 | } 64 | 65 | // Phase 3: compute low as regime (Unpacked_Low) 66 | 67 | auto eexponent = clamp(exponentF,PT::minexponent,PT::maxexponent); // no overflow 68 | auto rr = PT::split_reg_exp(exponentF); 69 | auto fraction = cast_msb()(fractionF); 70 | auto reg = rr.first; 71 | auto exp = rr.second; 72 | 73 | // Phase 4: UnpackedLow to Posit 74 | 75 | auto rs = -reg+1 > reg+2 ? -reg+1:reg+2; //std::max(-reg + 1, reg + 2); MSVC issue 76 | auto es = (totalbits-rs-1) < esbits ? (totalbits-rs-1): esbits; //std::min((int)(totalbits-rs-1),(int)esbits); MSVC issue 77 | 78 | POSIT_UTYPE regbits = reg < 0 ? (PT::POSIT_HOLDER_MSB >> -reg) : (PT::POSIT_MASK << (PT::POSIT_HOLDER_SIZE-(reg+1))); // reg+1 bits on the left 79 | POSIT_UTYPE eexp = msb_exp()(exp); 80 | POSIT_STYPE p = ((fraction >> (rs+es+1)) | (eexp >> (rs+1)) | (regbits>>1)) >> (sizeof(PP)*8-totalbits); 81 | 82 | return PP(typename PP::DeepInit(),negativeSign ? -p : p); 83 | } 84 | 85 | template 86 | //template 87 | PP float2positF(typename Trait::value_t fvalue) 88 | { 89 | union { 90 | typename Trait::holder_t i; 91 | typename Trait::value_t f; 92 | } uu; 93 | uu.f = fvalue; 94 | return float2posit(uu.i); 95 | } 96 | 97 | template 98 | CONSTEXPR14 PP float2positx(float value) 99 | { 100 | return float2posit(float2bits(value)); 101 | } 102 | 103 | template 104 | CONSTEXPR14 PP float2positx(double value) 105 | { 106 | return float2posit(float2bits(value)); 107 | } 108 | 109 | 110 | #ifdef TESTFLOAT2POSIT 111 | 112 | // import struct; print("%04X" % struct.unpack("I",struct.pack('f',3.5))) 113 | using PP=Posit; 114 | enum Q : int16_t { 115 | v1 = float2posit(0x40600000).v, 116 | v2 = float2posit(float2bits(3.5f)).v, 117 | v3 = float2positx(3.5f).v 118 | //v4 = float2posit(float2bits(3.5)).v, 119 | 120 | }; 121 | 122 | int main(int argc, char const *argv[]) 123 | { 124 | std::cout << PP(typename PP::DeepInit(),Q::v1) << " " << (float)PP(typename PP::DeepInit(),Q::v1) << std::endl; 125 | std::cout << PP(typename PP::DeepInit(),Q::v2) << std::endl; 126 | std::cout << PP(typename PP::DeepInit(),Q::v3) << std::endl; 127 | //std::cout << PP(typename PP::DeepInit(),Q::v4) << std::endl; // FAILS 128 | return 0; 129 | } 130 | #endif -------------------------------------------------------------------------------- /include/floatconst2bits.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2017-2019 Emanuele Ruffaldi 3 | * Distributed under the terms of the BSD 3-Clause License. 4 | * 5 | * (See accompanying file LICENSE) 6 | * 7 | * -- 8 | */ 9 | #pragma once 10 | #include 11 | // http://brnz.org/hbr/?p=1518 12 | // Based on code from 13 | // https://graphics.stanford.edu/~seander/bithacks.html 14 | // 15 | // TODO double version by Emanuele Ruffaldi 16 | constexpr int count_leading_zeroes(uint64_t v) 17 | { 18 | constexpr char bit_position[64] = { 19 | 0, 1, 2, 7, 3, 13, 8, 19, 4, 25, 14, 28, 9, 34, 20, 40, 20 | 5, 17, 26, 38, 15, 46, 29, 48, 10, 31, 35, 54, 21, 50, 41, 57, 21 | 63, 6, 12, 18, 24, 27, 33, 39, 16, 37, 45, 47, 30, 53, 49, 56, 22 | 62, 11, 23, 32, 36, 44, 52, 55, 61, 22, 43, 51, 60, 42, 59, 58 }; 23 | 24 | v |= v >> 1; // first round down to one less than a power of 2 25 | v |= v >> 2; 26 | v |= v >> 4; 27 | v |= v >> 8; 28 | v |= v >> 16; 29 | v |= v >> 32; 30 | v = (v >> 1) + 1; 31 | 32 | return 63 - bit_position[(v * 0x0218a392cd3d5dbf)>>58]; // [3] 33 | } 34 | 35 | // original 36 | constexpr uint32_t float2bits(float f) 37 | { 38 | if (f == 0.0f) 39 | return 0; // also matches -0.0f and gives wrong result 40 | else if (f == INFINITY) 41 | return 0x7f800000; 42 | else if (f == -INFINITY) 43 | return 0xff800000; 44 | else if (f != f) // NaN 45 | return 0x7fc00000; // This is my NaN... 46 | 47 | uint32_t sign = f < 0.0f; 48 | float abs_f = sign ? -f : f; 49 | 50 | int exponent = 254; 51 | 52 | while(abs_f < 0x1p87f) 53 | { 54 | abs_f *= 0x1p41f; 55 | exponent -= 41; 56 | } 57 | 58 | auto a = (uint64_t)(abs_f * 0x1p-64f); 59 | int lz = count_leading_zeroes(a); 60 | exponent -= lz; 61 | 62 | if (exponent <= 0) 63 | { 64 | exponent = 0; 65 | lz = 8 - 1; 66 | } 67 | 68 | uint32_t significand = (a << (lz + 1)) >> (64 - 23); // [3] 69 | return (sign << 31) | (exponent << 23) | significand; 70 | } 71 | 72 | #if 0 73 | // GENERALIZE for double 74 | template 75 | constexpr typename float_trait::holder_t float2bitsx(typename float_trait::value_t f) 76 | { 77 | if (f == float_trait::zero) 78 | return 0; // also matches -0.0f and gives wrong result 79 | else if (f == float_trait::pinfinity) 80 | return float_trait::pinfinity_h; 81 | else if (f == float_trait::ninfinity) 82 | return float_trait::ninfinity_h; 83 | else if (f != f) // NaN 84 | return float_trait::nan_h; // This is my NaN... 85 | 86 | typename float_trait::holder_t sign = f < float_trait::zero; 87 | typename float_trait::value_t abs_f = sign ? -f : f; 88 | 89 | int exponent = float_trait::exponent_max; 90 | 91 | while(abs_f < 0x1p87) // TODO 92 | { 93 | abs_f *= 0x1p41; // TODO 94 | exponent -= 41; // TODO 95 | } 96 | 97 | auto a = (uint64_t)(abs_f * 0x1p-64); // TODO 98 | int lz = count_leading_zeroes(a); 99 | exponent -= lz; 100 | 101 | if (exponent <= 0) 102 | { 103 | exponent = 0; 104 | lz = float_trait::exponent_bits - 1; 105 | } 106 | 107 | uint64_t significand = (a << (lz + 1)) >> (64 - float_trait::fraction_bits); // [3] 108 | return (sign << (float_trait::data_bits-1)) | (((typename float_trait::holder_t )exponent) << float_trait::fraction_bits) | significand; // TODO: 109 | } 110 | 111 | constexpr double float2bits(double f) 112 | { 113 | return float2bitsx(f); 114 | } 115 | #endif -------------------------------------------------------------------------------- /include/posit10.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2017-2019 Emanuele Ruffaldi 3 | * Distributed under the terms of the BSD 3-Clause License. 4 | * 5 | * (See accompanying file LICENSE) 6 | * 7 | * -- 8 | */ 9 | /** 10 | * Zposit10n library 11 | * 12 | * Emanuele Ruffaldi 2017 13 | */ 14 | #pragma once 15 | #include 16 | #include 17 | 18 | #ifndef POSIT0_SPEC 19 | #define POSIT10_SPEC 20 | #define POSIT10_ES 2 21 | #define POSIT10_MODE PositSpec::WithNan 22 | #endif 23 | 24 | #include 25 | #include "posit.h" 26 | 27 | namespace posit10ns 28 | { 29 | extern int16_t opinv[]; 30 | extern int16_t ophalf[]; 31 | extern int16_t opexp2[]; 32 | extern int16_t opsquare[]; 33 | extern uint32_t op2float[]; 34 | extern int16_t opadd[]; 35 | extern int16_t opmul[]; 36 | extern int16_t opdiv[]; 37 | } 38 | 39 | 40 | class posit10 41 | { 42 | public: 43 | // template 44 | 45 | using value_t=int16_t; 46 | using fraction_t=uint16_t; 47 | static constexpr int vtotalbits = 10; 48 | static constexpr int vesbits = POSIT10_ES; 49 | using exponenttype=int32_t; 50 | using PT=PositTrait; 51 | using FT=fraction_t; 52 | using FPT=Posit; 53 | using UnpackedLow=typename FPT::UnpackedLow; 54 | using UnpackedT=typename FPT::UnpackedT; 55 | using info=typename FPT::info; 56 | 57 | struct DeepInit{}; 58 | 59 | explicit posit10(double a); 60 | explicit posit10(float a); 61 | posit10(int a); 62 | explicit posit10(DeepInit , int16_t a) : v(a) {} 63 | #ifdef POSITMUL 64 | posit10(FPT::PositMul x) : v(x.asPosit().v) {} 65 | #endif 66 | posit10() : v(0) {} 67 | explicit posit10(FPT x) : v(x.v) {} 68 | explicit posit10(UnpackedLow u) : v(FPT(u).v) {} 69 | explicit posit10(UnpackedT u) : v(FPT(u).v) {} 70 | posit10(const posit10 & a): v(a.v) {} 71 | 72 | bool is_negative() const { return v < 0; } 73 | bool has_neg_exponent() const { return (v & 0x100) == 0; } 74 | bool is_nan() const { return PT::withnan && v == -512; } 75 | bool is_infinity() const { return PT::withnan ? (v == -511 || v == 511) : v == -512;} 76 | static posit10 max() { return posit10(DeepInit(),PT::withnan ? 510 : 511); } 77 | static posit10 min() { return posit10(DeepInit(),1); } 78 | static posit10 one() { return posit10(DeepInit(),0x100); } 79 | static posit10 afterone() { return posit10(DeepInit(),0x101); } 80 | static posit10 zero() { return posit10(DeepInit(),0); } 81 | static posit10 two() { return posit10(DeepInit(),288); } 82 | static posit10 onehalf() { return posit10(DeepInit(),0x80); } 83 | static posit10 pinfinity() { return posit10(DeepInit(),PT::withnan ? 511: -512); } 84 | static posit10 ninfinity() { return posit10(DeepInit(),PT::withnan ?-511: -512); } 85 | static posit10 nan() { return posit10(DeepInit(),PT::withnan ?-512: 0); } 86 | 87 | 88 | friend bool operator<(const posit10 &a, const posit10 & b) { return a.v < b.v; } 89 | friend bool operator>(const posit10 &a, const posit10 & b) { return a.v > b.v; } 90 | friend bool operator<=(const posit10 &a, const posit10 & b) { return a.v <= b.v;} 91 | friend bool operator>=(const posit10 &a, const posit10 & b) { return a.v >= b.v; } 92 | friend bool operator==(const posit10 &a, const posit10 & b) { return a.v == b.v; } 93 | friend bool operator!=(const posit10 &a, const posit10 & b) { return a.v !=b.v; } 94 | 95 | posit10 inv() const { return posit10(DeepInit(),posit10ns::opinv[uu()]); } 96 | posit10 half() const { return posit10(DeepInit(),posit10ns::ophalf[uu()]);} 97 | posit10 twice() const; 98 | posit10 exp2() const { return posit10(DeepInit(),posit10ns::opexp2[uu()]); } 99 | posit10 square() const { return posit10(DeepInit(),posit10ns::opsquare[uu()]); } 100 | posit10 operator-() const { return posit10(DeepInit(),-v); } 101 | 102 | #if 0 103 | // friend posit10 operator+(const posit10 &a, const posit10 & b) { return posit10(DeepInit(),a.v<0? -posit10ns::opadd[(-a).uu()*1024+(-b).uu()] : posit10ns::opadd[a.uu()*1024+b.uu()]); } 104 | friend posit10 operator+(const posit10 &a, const posit10 & b) { return posit10(DeepInit(),posit10ns::opadd[a.uu()*1024+b.uu()]); } 105 | friend posit10 operator*(const posit10 &a, const posit10 & b) { return posit10(DeepInit(),a.v<0? -posit10ns::opmul[-a.uu()*1024+(-b).uu()] : posit10ns::opmul[a.uu()*1024+b.uu()]); } 106 | friend posit10 operator-(const posit10 &a, const posit10 & b) { return posit10(DeepInit(),a.v<0? -posit10ns::opadd[(-a).uu()*1024+(b).uu()] : posit10ns::opadd[a.uu()*1024+(-b).uu()]); } 107 | friend posit10 operator/(const posit10 &a, const posit10 & b) { return posit10(DeepInit(),posit10ns::opdiv[a.uu()*1024+b.uu()]); } 108 | #else 109 | friend posit10 operator+(const posit10 &a, const posit10 & b) { return posit10(DeepInit(),posit10ns::opadd[a.uu()*1024+b.uu()]); } 110 | friend posit10 operator*(const posit10 &a, const posit10 & b) { return posit10(DeepInit(),posit10ns::opmul[a.uu()*1024+b.uu()]); } 111 | friend posit10 operator-(const posit10 &a, const posit10 & b) { return a+(-b); } 112 | friend posit10 operator/(const posit10 &a, const posit10 & b) { return posit10(DeepInit(),posit10ns::opdiv[a.uu()*1024+b.uu()]); } 113 | #endif 114 | friend std::ostream & operator << (std::ostream & ons, const posit10 & p); 115 | 116 | posit10& operator+=(const posit10 &a) { posit10 r = *this+a; v = r.v; return *this; } 117 | posit10& operator*=(const posit10 &a) { posit10 r = *this*a; v = r.v; return *this; } 118 | 119 | unsigned int uu() const { return ((unsigned int)(uint16_t)v) & 0x03FF;} 120 | UnpackedLow unpack_low() const { return as_posit().unpack_low(); } 121 | UnpackedT unpack() const { return as_posit().unpack(); } 122 | info analyze() const { return as_posit().analyze(); } 123 | 124 | FPT as_posit() const { return FPT(typename FPT::DeepInit(), v); } 125 | operator FPT() const { return as_posit(); } 126 | operator float() const { return uint32_to_float(posit10ns::op2float[uu()]); } 127 | operator double() const { return (float)*this; } 128 | operator int() const { return (float)*this; } 129 | 130 | int16_t v; 131 | 132 | }; 133 | 134 | inline posit10 half(posit10 z) { return z.half(); } 135 | 136 | inline posit10 twice(posit10 z) { return z.twice(); } 137 | 138 | inline posit10 square(posit10 z) { return z.square(); } 139 | 140 | inline posit10 inv(posit10 x) { return x.inv(); } 141 | 142 | inline posit10 neg(posit10 z) { return -z; } 143 | 144 | inline posit10 exp2(posit10 z) { return z.exp2(); } 145 | 146 | inline bool is_negative(posit10 a) 147 | { 148 | return a.v < 0; 149 | } 150 | 151 | namespace std { 152 | template<> class numeric_limits { 153 | public: 154 | static posit10 max() {return posit10::max(); }; 155 | static posit10 min() {return posit10::min(); }; 156 | static posit10 epsilon() {return posit10::afterone()-posit10::one(); }; 157 | // One can implement other methods if needed 158 | }; 159 | } 160 | 161 | namespace std 162 | { 163 | inline posit10 abs(posit10 z) 164 | { 165 | return posit10(posit10::DeepInit(),z.v < 0 ? -z.v : z.v); 166 | } 167 | 168 | inline posit10 min(const posit10 &a, const posit10 & b) 169 | { 170 | return a <= b ? a : b; 171 | } 172 | 173 | inline posit10 max(const posit10 &a, const posit10 & b) 174 | { 175 | return a >= b ? a : b; 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /include/posit12.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2017-2019 Emanuele Ruffaldi 3 | * Distributed under the terms of the BSD 3-Clause License. 4 | * 5 | * (See accompanying file LICENSE) 6 | * 7 | * -- 8 | */ 9 | /** 10 | * ZPosit8n library 11 | * 12 | * Emanuele Ruffaldi 2017 13 | */ 14 | #pragma once 15 | #include 16 | #include 17 | 18 | #ifndef POSIT12_SPEC 19 | #define POSIT12_SPEC 20 | #define POSIT12_ES 2 21 | #define POSIT12_MODE PositSpec::WithNan 22 | #endif 23 | 24 | #include "posit.h" 25 | 26 | namespace posit12ns 27 | { 28 | extern int16_t opinv[]; 29 | extern int16_t opexp2[]; 30 | extern int16_t opsquare[]; 31 | extern uint32_t op2float[]; 32 | } 33 | 34 | 35 | class posit12 36 | { 37 | public: 38 | // template 39 | using value_t=int16_t; 40 | using fraction_t=uint16_t; 41 | static constexpr int vtotalbits = 12; 42 | static constexpr int vesbits = POSIT12_ES; 43 | using exponenttype=int32_t; 44 | using PT=PositTrait; 45 | using FT=fraction_t; 46 | using FPT=Posit; 47 | using UnpackedLow=typename FPT::UnpackedLow; 48 | using UnpackedT=typename FPT::UnpackedT; 49 | using info=typename FPT::info; 50 | 51 | struct DeepInit{}; 52 | 53 | explicit posit12(double a); 54 | explicit posit12(float a); 55 | posit12(int a); 56 | #ifdef POSITMUL 57 | posit12(FPT::PositMul x) : v(x.asPosit().v) {} 58 | #endif 59 | explicit posit12(DeepInit , int16_t a) : v(a) {} 60 | posit12(): v(0) {} 61 | explicit posit12(FPT x) : v(x.v) {} 62 | explicit posit12(UnpackedLow u) : v(FPT(u).v) {} 63 | explicit posit12(UnpackedT u) : v(FPT(u).v) {} 64 | 65 | bool is_negative() const { return v < 0; } 66 | bool has_neg_exponent() const { return (v & 0x400) == 0; } 67 | bool is_nan() const { return PT::withnan && v == -2048; } 68 | bool is_infinity() const { return PT::withnan ? (v == -2047 || v == 2047) : v == -2048;} 69 | static posit12 max() { return posit12(DeepInit(),PT::POSIT_MAXPOS); } // 126 or 127 70 | static posit12 min() { return posit12(DeepInit(),1); } // ? -126 : -127 71 | static posit12 one() { return posit12(DeepInit(),0x400); } 72 | static posit12 afterone() { return posit12(DeepInit(),0x401); } // 0x40 73 | static posit12 onehalf() { return posit12(DeepInit(),0x200); } 74 | static posit12 pinfinity() { return posit12(DeepInit(),PT::withnan ? 2047: -2048); } 75 | static posit12 ninfinity() { return posit12(DeepInit(),PT::withnan ?-2047: -2048); } 76 | static posit12 nan() { return posit12(DeepInit(),PT::withnan ?-2048: 0); } 77 | 78 | posit12 inv() const { return posit12(DeepInit(),posit12ns::opinv[uu()]); } 79 | posit12 half() const; 80 | posit12 twice() const; 81 | posit12 exp2() const { return posit12(DeepInit(),posit12ns::opexp2[uu()]); } 82 | posit12 square() const { return posit12(DeepInit(),posit12ns::opsquare[uu()]); } 83 | posit12 operator-() const { return posit12(DeepInit(),-v); } 84 | 85 | friend bool operator<(posit12 a, posit12 b) { return a.v < b.v; } 86 | friend bool operator>(posit12 a, posit12 b) { return a.v > b.v; } 87 | friend bool operator<=(posit12 a, posit12 b) { return a.v <= b.v;} 88 | friend bool operator>=(posit12 a, posit12 b) { return a.v >= b.v; } 89 | friend bool operator==(posit12 a, posit12 b) { return a.v == b.v; } 90 | friend bool operator!=(posit12 a, posit12 b) { return a.v !=b.v; } 91 | 92 | friend posit12 operator+(posit12 a, posit12 b) { return (posit12)(a.as_posit()+b.as_posit()); } 93 | friend posit12 operator*(posit12 a, posit12 b) { return (posit12)(a.as_posit()*b.as_posit()); } 94 | friend posit12 operator-(posit12 a, posit12 b) { return a+(-b); } 95 | friend posit12 operator/(posit12 a, posit12 b) { return (posit12)(a.as_posit()/b.as_posit()); } 96 | friend std::ostream & operator << (std::ostream & ons, const posit12 & p); 97 | 98 | posit12& operator+=(const posit12 &a) { posit12 r = *this+a; v = r.v; return *this; } 99 | posit12& operator*=(const posit12 &a) { posit12 r = *this*a; v = r.v; return *this; } 100 | 101 | unsigned int uu() const { return ((unsigned int)(uint16_t)v) & 0x0FFF;} 102 | 103 | UnpackedLow unpack_low() const { return as_posit().unpack_low(); } 104 | UnpackedT unpack() const { return as_posit().unpack(); } 105 | info analyze() const { return as_posit().analyze(); } 106 | 107 | FPT as_posit() const { return FPT(typename FPT::DeepInit(), v); } 108 | operator FPT() const { return as_posit(); } 109 | operator float() const { return uint32_to_float(posit12ns::op2float[uu()]); } 110 | operator double() const { return (double)(float)*this; } 111 | operator int() const { return (int)(float)*this; } 112 | 113 | int16_t v; 114 | 115 | }; 116 | 117 | inline posit12 half(posit12 z) { return z.half(); } 118 | 119 | inline posit12 twice(posit12 z) { return z.twice(); } 120 | 121 | inline posit12 square(posit12 z) { return z.square(); } 122 | 123 | inline posit12 inv(posit12 x) { return x.inv(); } 124 | 125 | inline posit12 neg(posit12 z) { return -z; } 126 | 127 | inline posit12 exp2(posit12 z) { return z.exp2(); } 128 | 129 | namespace std { 130 | template<> class numeric_limits { 131 | public: 132 | static posit12 max() {return posit12::max(); }; 133 | static posit12 min() {return posit12::min(); }; 134 | static posit12 epsilon() {return posit12::afterone()-posit12::one(); }; 135 | // One can implement other methods if needed 136 | }; 137 | } 138 | 139 | 140 | namespace std 141 | { 142 | inline posit12 abs(posit12 z) 143 | { 144 | return posit12(posit12::DeepInit(),z.v < 0 ? -z.v : z.v); 145 | } 146 | 147 | 148 | inline posit12 min(posit12 a, posit12 b) 149 | { 150 | return a <= b ? a : b; 151 | } 152 | 153 | inline posit12 max(posit12 a, posit12 b) 154 | { 155 | return a >= b ? a : b; 156 | } 157 | } -------------------------------------------------------------------------------- /include/posit8.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2017-2019 Emanuele Ruffaldi 3 | * Distributed under the terms of the BSD 3-Clause License. 4 | * 5 | * (See accompanying file LICENSE) 6 | * 7 | * -- 8 | */ 9 | /** 10 | * ZPosit8n library 11 | * 12 | * Emanuele Ruffaldi 2017 13 | */ 14 | #pragma once 15 | #include 16 | #include 17 | #include 18 | 19 | #ifndef POSIT8_SPEC 20 | #define POSIT8_SPEC 21 | #define POSIT8_ES 0 22 | #define POSIT8_MODE PositSpec::WithNan 23 | #endif 24 | 25 | #include "posit.h" 26 | 27 | namespace posit8ns 28 | { 29 | extern int8_t opadd[]; 30 | extern int8_t opmul[]; 31 | extern int8_t opdiv[]; 32 | extern int8_t opinv[]; 33 | extern int8_t opexp2[]; 34 | extern int8_t ophalf[]; 35 | extern int8_t opsquare[]; 36 | extern uint32_t op2float[]; 37 | } 38 | 39 | class posit8 40 | { 41 | public: 42 | // template 43 | 44 | 45 | using value_t=int8_t; 46 | using fraction_t=uint8_t; 47 | static constexpr int vtotalbits = 8; 48 | static constexpr int vesbits = POSIT8_ES; 49 | using exponenttype=int32_t; 50 | using PT=PositTrait; 51 | using FT=fraction_t; 52 | using FPT=Posit; 53 | using UnpackedLow=typename FPT::UnpackedLow; 54 | using UnpackedT=typename FPT::UnpackedT; 55 | using info=typename FPT::info; 56 | 57 | struct DeepInit{}; 58 | 59 | explicit posit8(double a); 60 | explicit posit8(float a); 61 | posit8(int a); 62 | #ifdef POSITMUL 63 | posit8(FPT::PositMul x) : v(x.asPosit().v) {} 64 | #endif 65 | explicit posit8(DeepInit , int8_t a) : v(a) {} 66 | posit8(): v(0) {} 67 | explicit posit8(FPT x) : v(x.v) {} 68 | explicit posit8(UnpackedLow u) : v(FPT(u).v) {} 69 | explicit posit8(UnpackedT u) : v(FPT(u).v) {} 70 | 71 | bool is_negative() const { return v < 0; } 72 | bool has_neg_exponent() const { return (v & 0x80) == 0; } 73 | bool is_nan() const { return PT::withnan && v == PT::POSIT_NAN; } 74 | bool is_infinity() const { return (v == PT::POSIT_PINF || v == PT::POSIT_NINF);} 75 | static posit8 max() { return posit8(DeepInit(),PT::POSIT_MAXPOS); } // 126 or 127 76 | static posit8 min() { return posit8(DeepInit(),1); } // ? -126 : -127 77 | static posit8 one() { return posit8(DeepInit(),PT::POSIT_ONE); } // 0x40 78 | static posit8 afterone() { return posit8(DeepInit(),PT::POSIT_ONE+1); } // 0x40 79 | static posit8 two() { return posit8(DeepInit(),72); } 80 | static posit8 zero() { return posit8(DeepInit(),0); } 81 | static posit8 onehalf() { return posit8(DeepInit(),0x20); } 82 | static posit8 pinfinity() { return posit8(DeepInit(),PT::POSIT_PINF); } // ? 127: -128); } 83 | static posit8 ninfinity() { return posit8(DeepInit(),PT::POSIT_NINF); } //,PT::withnan ?-127: -128); } 84 | static posit8 nan() { return posit8(DeepInit(),PT::POSIT_NAN); } // withnan ?-128: 0); } 85 | 86 | posit8 inv() const { return posit8(DeepInit(),posit8ns::opinv[uu()]); } 87 | posit8 half() const { return posit8(DeepInit(),posit8ns::ophalf[uu()]);} 88 | posit8 twice() const; 89 | posit8 exp2() const { return posit8(DeepInit(),posit8ns::opexp2[uu()]); } 90 | posit8 square() const { return posit8(DeepInit(),posit8ns::opsquare[uu()]); } 91 | posit8 operator-() const { return posit8(DeepInit(),-v); } 92 | 93 | friend bool operator<(posit8 a, posit8 b) { return a.v < b.v; } 94 | friend bool operator>(posit8 a, posit8 b) { return a.v > b.v; } 95 | friend bool operator<=(posit8 a, posit8 b) { return a.v <= b.v;} 96 | friend bool operator>=(posit8 a, posit8 b) { return a.v >= b.v; } 97 | friend bool operator==(posit8 a, posit8 b) { return a.v == b.v; } 98 | friend bool operator!=(posit8 a, posit8 b) { return a.v !=b.v; } 99 | 100 | friend posit8 operator+(posit8 a, posit8 b) { return posit8(DeepInit(),posit8ns::opadd[a.uu()*256+b.uu()]); } 101 | friend posit8 operator*(posit8 a, posit8 b) { return posit8(DeepInit(),posit8ns::opmul[a.uu()*256+b.uu()]); } 102 | friend posit8 operator-(posit8 a, posit8 b) { return a+(-b); } 103 | friend posit8 operator/(posit8 a, posit8 b) { return posit8(DeepInit(),posit8ns::opdiv[a.uu()*256+b.uu()]); } 104 | friend std::ostream & operator << (std::ostream & ons, const posit8 & p); 105 | 106 | posit8& operator+=(const posit8 &a) { posit8 r = *this+a; v = r.v; return *this; } 107 | posit8& operator*=(const posit8 &a) { posit8 r = *this*a; v = r.v; return *this; } 108 | 109 | unsigned int uu() const { return (unsigned int)(uint8_t)v;} 110 | 111 | FPT as_posit() const { return FPT(typename FPT::DeepInit(), v); } 112 | operator FPT() const { return as_posit(); } 113 | operator float() const { return uint32_to_float(posit8ns::op2float[uu()]); } 114 | operator double() const { return (float)*this; } 115 | operator int() const { return (float)*this; } 116 | 117 | UnpackedLow unpack_low() const { return as_posit().unpack_low(); } 118 | UnpackedT unpack() const { return as_posit().unpack(); } 119 | info analyze() const { return as_posit().analyze(); } 120 | 121 | 122 | int8_t v; 123 | 124 | }; 125 | 126 | inline posit8 half(posit8 z) { return z.half(); } 127 | 128 | inline posit8 twice(posit8 z) { return z.twice(); } 129 | 130 | inline posit8 square(posit8 z) { return z.square(); } 131 | 132 | inline posit8 inv(posit8 x) { return x.inv(); } 133 | 134 | inline posit8 neg(posit8 z) { return -z; } 135 | 136 | inline posit8 exp2(posit8 z) { return z.exp2(); } 137 | 138 | inline bool is_negative(posit8 a) 139 | { 140 | return a.v < 0; 141 | } 142 | 143 | 144 | namespace std { 145 | template<> class numeric_limits { 146 | public: 147 | static posit8 max() {return posit8::max(); }; 148 | static posit8 min() {return posit8::min(); }; 149 | static posit8 epsilon() {return posit8::afterone()-posit8::one(); }; 150 | // One can implement other methods if needed 151 | }; 152 | } 153 | 154 | 155 | 156 | namespace std 157 | { 158 | inline posit8 abs(posit8 z) 159 | { 160 | return posit8(posit8::DeepInit(),z.v < 0 ? -z.v : z.v); 161 | } 162 | 163 | inline posit8 min(posit8 a, posit8 b) 164 | { 165 | return a <= b ? a : b; 166 | } 167 | 168 | inline posit8 max(posit8 a, posit8 b) 169 | { 170 | return a >= b ? a : b; 171 | } 172 | } 173 | 174 | -------------------------------------------------------------------------------- /include/positeigen.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2017-2019 Emanuele Ruffaldi 3 | * Distributed under the terms of the BSD 3-Clause License. 4 | * 5 | * (See accompanying file LICENSE) 6 | * 7 | * -- 8 | */ 9 | /** 10 | * Emanuele Ruffaldi (C) 2017 11 | * Templated C++ Posit 12 | */ 13 | #pragma once 14 | #include "posit.h" 15 | #include 16 | 17 | namespace Eigen { 18 | 19 | template 20 | struct NumTraits > 21 | : GenericNumTraits > // permits to get the epsilon, dummy_precision, lowest, highest functions 22 | { 23 | typedef Posit P; 24 | 25 | typedef P Real; 26 | typedef P NonInteger; 27 | typedef P Nested; 28 | 29 | //static inline Real epsilon() { return 0; } 30 | //static inline Real dummy_precision() { return 0; } 31 | //static inline Real digits10() { return 0; } 32 | // highest() and lowest() functions returning the highest and lowest possible values respectively. 33 | // An epsilon() function which, unlike std::numeric_limits::epsilon(), it returns a Real instead of a T. 34 | // digits10() function returning the number of decimal digits that can be represented without change. This is the analogue of std::numeric_limits::digits10 which is used as the default implementation if specialized. 35 | 36 | 37 | enum { 38 | IsComplex = 0, 39 | IsInteger = 0, 40 | IsSigned = 1, 41 | RequireInitialization = 1, 42 | ReadCost = 1, 43 | AddCost = 3, 44 | MulCost = 3 45 | }; 46 | }; 47 | 48 | 49 | 50 | template 51 | struct NumTraits > 52 | : GenericNumTraits > // permits to get the epsilon, dummy_precision, lowest, highest functions 53 | { 54 | typedef Unpacked P; 55 | 56 | typedef P Real; 57 | typedef P NonInteger; 58 | typedef P Nested; 59 | 60 | //static inline Real epsilon() { return 0; } 61 | //static inline Real dummy_precision() { return 0; } 62 | //static inline Real digits10() { return 0; } 63 | // highest() and lowest() functions returning the highest and lowest possible values respectively. 64 | // An epsilon() function which, unlike std::numeric_limits::epsilon(), it returns a Real instead of a T. 65 | // digits10() function returning the number of decimal digits that can be represented without change. This is the analogue of std::numeric_limits::digits10 which is used as the default implementation if specialized. 66 | 67 | 68 | enum { 69 | IsComplex = 0, 70 | IsInteger = 0, 71 | IsSigned = 1, 72 | RequireInitialization = 1, 73 | ReadCost = 1, 74 | AddCost = 3, 75 | MulCost = 3 76 | }; 77 | }; 78 | 79 | namespace internal { 80 | 81 | template 82 | inline typename Posit::UnpackedT cast(const Posit& x) 83 | { return x.unpack(); } 84 | } 85 | 86 | #if 0 87 | namespace internal { 88 | 89 | template 90 | inline double cast,double>(const Posit& x) 91 | { return x.todouble(); } 92 | 93 | template 94 | inline float cast,float>(const Posit& x) 95 | { return x.tofloat(); } 96 | 97 | template 98 | inline Posit cast >(const float & x) 99 | { return Posit::convert(x); } 100 | 101 | template 102 | inline Posit cast >(const double & x) 103 | { return Posit::convert(x); } 104 | 105 | } 106 | #endif 107 | 108 | } -------------------------------------------------------------------------------- /include/simd/asimd_all.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2017-2019 Emanuele Ruffaldi 3 | * Distributed under the terms of the BSD 3-Clause License. 4 | * 5 | * (See accompanying file LICENSE) 6 | * 7 | * -- 8 | */ 9 | /** 10 | * Requires: AVX2, allows for AVX512F 11 | * 12 | - missing: 64bit integer, unsigned integers except uint8 13 | - AVX2: double 4, float/int32 8, int16 16, int8 32 14 | 15 | */ 16 | #pragma once 17 | #include "asimd_base.h" 18 | #include "asimd_32.h" 19 | #include "asimd_8.h" 20 | #include "asimd_16.h" 21 | #include "asimd_f.h" 22 | #include "asimd_d.h" 23 | 24 | 25 | -------------------------------------------------------------------------------- /include/simd/asimd_f.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2017-2019 Emanuele Ruffaldi 3 | * Distributed under the terms of the BSD 3-Clause License. 4 | * 5 | * (See accompanying file LICENSE) 6 | * 7 | * -- 8 | */ 9 | /** 10 | * Requires: AVX2, allows for AVX512F 11 | * 12 | - missing: 64bit integer, unsigned integers except uint8 13 | - AVX2: double 4, float/int32 8, int16 16, int8 32 14 | 15 | * 128bit for f: 4 => int32 16 | * 256bit for f: 8 => int32 17 | * 512bit for f: 16 => int32 18 | */ 19 | #pragma once 20 | #include "asimd_32.h" 21 | 22 | // DUE TO _mm_i32gather_epi32 23 | #ifdef __AVX2__ 24 | 25 | class simd_f_4 26 | { 27 | public: 28 | typedef float type; 29 | typedef __m128 simdtype; 30 | typedef simd_i32_4 indextype; // for indices 31 | typedef WithGather gathermode; 32 | typedef simd_f_4 self; 33 | typedef self cmpresult; 34 | enum { csize = 4 }; 35 | 36 | inline simd_f_4() {} 37 | inline simd_f_4(type v) : x(_mm_set1_ps(v)) {} 38 | inline simd_f_4(simdtype y): x(y) {} 39 | inline simd_f_4(type v1, type v2, type v3, type v4) : x(_mm_setr_ps(v1,v2,v3,v4)) {} 40 | inline void load(const type * ptr) { x = _mm_loadu_ps((const type*)ptr); } 41 | inline void store(type * ptr) const { _mm_storeu_ps((type*)ptr,x); } 42 | 43 | inline self max(self & y) const { return self(_mm_max_ps(x,y.x)); } 44 | inline cmpresult cmplt(self & y) const { return cmpresult(_mm_cmplt_ps(x,y.x)); } 45 | inline void gather(const type * ptr, indextype idx) { x = _mm_i32gather_ps(ptr, idx.x, 1); } 46 | 47 | inline unsigned int size() const { return csize; } 48 | 49 | /* 50 | inline type operator[] (unsigned int idx) const 51 | { 52 | type temp[csize]; 53 | store(temp); 54 | return temp[idx]; 55 | } 56 | */ 57 | 58 | void initincrement(type x) 59 | { 60 | type a[csize]; 61 | for(int i = 0; i < csize; i++) 62 | a[i] = i*x; 63 | load(a); 64 | } 65 | 66 | inline void blend(self & other, self mask) 67 | { 68 | x = _mm_blendv_ps(x,other.x,mask.x); 69 | } 70 | 71 | static inline void blendindex(indextype & oindex, indextype other, self mask); 72 | 73 | simdtype x; 74 | }; 75 | 76 | 77 | inline void simd_f_4::blendindex(indextype & oindex, indextype other, self mask) 78 | { 79 | oindex.x = _mm_blendv_ps(oindex.x,other.x,mask.x); 80 | } 81 | #endif 82 | 83 | #ifdef __AVX2__ 84 | 85 | class simd_f_8 86 | { 87 | public: 88 | typedef float type; 89 | typedef __m256 simdtype; 90 | typedef simd_i32_8 indextype; 91 | typedef WithGather gathermode; 92 | typedef simd_f_8 self; 93 | typedef self cmpresult; 94 | enum { csize = 8 }; 95 | 96 | inline simd_f_8() {} 97 | inline simd_f_8(type v) : x(_mm256_set1_ps(v)) {} 98 | inline simd_f_8(simdtype y): x(y) {} 99 | //inline simd_f_8(type v1, type v2, type v3, type v4) : x(_mm256_setr_ps(v1,v2,v3,v4)) {} 100 | inline void load(const type * ptr) { x = _mm256_loadu_ps((const type*)ptr); } 101 | inline void store(type * ptr) const { _mm256_storeu_ps((type*)ptr,x); } 102 | 103 | inline self max(simd_f_8 & y) const { return self(_mm256_max_ps(x,y.x)); } 104 | inline cmpresult cmplt(self & y) const { return cmpresult(_mm256_cmp_ps(x,y.x, 1 /*_CMP_LT_OS*/)); } 105 | inline void gather(const type * ptr, indextype idx) { x = _mm256_i32gather_ps(ptr, idx.x, 1); } 106 | 107 | inline unsigned int size() const { return csize; } 108 | inline type operator[] (unsigned int idx) const 109 | { 110 | type temp[csize]; 111 | store(temp); 112 | return temp[idx]; 113 | } 114 | 115 | void initincrement(type x) 116 | { 117 | type a[csize]; 118 | for(int i = 0; i < csize; i++) 119 | a[i] = i*x; 120 | load(a); 121 | } 122 | 123 | inline void blend(self & other, self mask) 124 | { 125 | x = _mm256_blendv_ps(x,other.x,mask.x); 126 | } 127 | 128 | static inline void blendindex(indextype & oindex, indextype other, self mask); 129 | 130 | simdtype x; 131 | }; 132 | 133 | 134 | inline void simd_f_8::blendindex(indextype & oindex, indextype other, self mask) 135 | { 136 | oindex.x = _mm256_blendv_ps(oindex.x,other.x,mask.x); 137 | } 138 | 139 | 140 | #endif 141 | 142 | #ifdef WITH_AVX512F 143 | class simd_f_16 144 | { 145 | public: 146 | typedef float type; 147 | typedef __m512 simdtype; 148 | typedef simd_i32_16 indextype; 149 | typedef WithGather gathermode; 150 | typedef simd_f_16 self; 151 | typedef __mmask16 cmpresult; 152 | enum { csize = 16 }; 153 | 154 | 155 | inline simd_f_16() {} 156 | inline simd_f_16(type v) : x(_mm512_set1_ps(v)) {} 157 | inline simd_f_16(simdtype y): x(y) {} 158 | //inline simd_f_8(type v1, type v2, type v3, type v4) : x(_mm256_setr_ps(v1,v2,v3,v4)) {} 159 | inline void load(const type * ptr) { x = _mm512_loadu_ps((const type*)ptr); } 160 | inline void store(type * ptr) const { _mm512_storeu_ps((type*)ptr,x); } 161 | 162 | inline self max(self & y) const { return self(_mm512_max_ps(x,y.x)); } 163 | inline cmpresult cmplt(self & y) const { return (_mm512_cmp_ps_mask(x,y.x, 1 /*_CMP_LT_OS*/)); } 164 | inline void gather(const type * ptr, indextype idx) { x = _mm512_i32gather_ps(idx.x,ptr, 1); } 165 | 166 | inline unsigned int size() const { return csize; } 167 | inline type operator[] (unsigned int idx) const 168 | { 169 | type temp[csize]; 170 | store(temp); 171 | return temp[idx]; 172 | } 173 | 174 | void initincrement(type x) 175 | { 176 | type a[csize]; 177 | for(int i = 0; i < csize; i++) 178 | a[i] = i*x; 179 | load(a); 180 | } 181 | 182 | inline void blend(self & other, cmpresult mask) 183 | { 184 | x = _mm512_mask_blend_ps(mask,x,other.x); 185 | } 186 | 187 | static inline void blendindex(indextype & oindex, indextype other, cmpresult mask) 188 | { 189 | oindex.x = _mm512_mask_blend_epi32(mask, oindex.x,other.x); 190 | } 191 | 192 | simdtype x; 193 | }; 194 | #endif 195 | 196 | #ifdef __AVX2__ 197 | DECLAREOSTREAM(simd_f_4,"f_4") 198 | DECLAREOSTREAM(simd_f_8,"f_8") 199 | #endif 200 | 201 | #ifdef WITH_AVX512F 202 | DECLAREOSTREAM(simd_f_16,"f_16") 203 | #endif 204 | 205 | 206 | #ifdef WITH_AVX512F 207 | 208 | template 209 | struct simdgenn 210 | { 211 | using type = typename std::conditional::type >::type; 212 | typedef SimdMode simdmarker; 213 | }; 214 | 215 | 216 | template <> 217 | struct simdgen 218 | { 219 | typedef simd_f_16 type; 220 | typedef SimdMode simdmarker; 221 | }; 222 | #else 223 | #ifdef __AVX2__ 224 | template 225 | struct simdgenn 226 | { 227 | using type = typename std::conditional::type; 228 | typedef SimdMode simdmarker; 229 | }; 230 | 231 | template <> 232 | struct simdgen 233 | { 234 | typedef simd_f_8 type; 235 | typedef SimdMode simdmarker; 236 | }; 237 | #endif 238 | #endif 239 | 240 | 241 | 242 | -------------------------------------------------------------------------------- /include/simdposit8.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2017-2019 Emanuele Ruffaldi 3 | * Distributed under the terms of the BSD 3-Clause License. 4 | * 5 | * (See accompanying file LICENSE) 6 | * 7 | * -- 8 | */ 9 | /** 10 | * ZPosit8n library 11 | * 12 | * Emanuele Ruffaldi 2017 13 | */ 14 | #pragma once 15 | #include 16 | #include 17 | #include 18 | 19 | #ifndef POSIT8_SPEC 20 | #define POSIT8_SPEC 21 | #define POSIT8_ES 2 22 | #define POSIT8_NAN PositSpec::WithNan 23 | #endif 24 | 25 | #include "posit.h" 26 | 27 | namespace posit8ns 28 | { 29 | extern uint8_t opadd[]; 30 | extern uint8_t opmul[]; 31 | extern uint8_t opdiv[]; 32 | extern uint8_t opinv[]; 33 | extern uint8_t opexp2[]; 34 | extern uint8_t opsquare[]; 35 | extern uint32_t op2float[]; 36 | } 37 | 38 | /* 39 | * Idea: optimize the extraction of 40 | 41 | 42 | */ 43 | 44 | class simd_posit8_32 45 | { 46 | public: 47 | 48 | static __m256i lookup1(__m256i a, const uint8_t * tab) 49 | { 50 | alignas(__m256i) uint8_t idata[32]; 51 | alignas(__m256i) uint8_t odata[32]; 52 | 53 | _mm256_store_si256((__m256i*)idata,a); 54 | for(int i = 0; i < 32; i++) 55 | odata[i] = tab[idata[i]]; 56 | return _mm256_load_si256((__m256i*)odata); 57 | } 58 | 59 | static __m256i lookup2(__m256i a, __m256i b, const uint8_t * tab) 60 | { 61 | alignas(__m256i) uint8_t idata1[32]; 62 | alignas(__m256i) uint8_t idata2[32]; 63 | alignas(__m256i) uint8_t odata[32]; 64 | 65 | _mm256_store_si256((__m256i*)idata1,a); 66 | _mm256_store_si256((__m256i*)idata2,b); 67 | for(int i = 0; i < 32; i++) 68 | odata[i] = tab[idata1[i]*256+idata2[i]]; 69 | return _mm256_load_si256((__m256i*)odata); 70 | } 71 | 72 | using this_t=simd_posit8_32; 73 | using cmpresult=__m256i; 74 | using FPT=Posit; 75 | using PT=typename FPT::PT; 76 | using FT=uint16_t; 77 | using base_t=FPT; 78 | 79 | enum { csize = 32 }; 80 | struct Init{}; 81 | 82 | explicit simd_posit8_32(float a); 83 | explicit simd_posit8_32(int a); 84 | explicit simd_posit8_32(Init , __m256i x) : v(x) {} 85 | explicit simd_posit8_32() {} 86 | explicit simd_posit8_32(__m256i x) : v(x) {} 87 | 88 | cmpresult is_negative() const { return (_mm256_cmpgt_epi8(_mm256_set1_epi8(0),v)); } 89 | cmpresult has_neg_exponent() const { return (_mm256_cmpeq_epi8(_mm256_set1_epi8(0),_mm256_and_si256 (_mm256_set1_epi8(0x40),v))); } // (v & 0x40) == 0; } 90 | cmpresult is_nan() const { return (_mm256_cmpeq_epi8(_mm256_set1_epi8(-128),v)); } 91 | cmpresult is_infinity() const { return PT::withnan ? (_mm256_or_si256( _mm256_cmpeq_epi8(_mm256_set1_epi8(-127),v), _mm256_cmpeq_epi8(_mm256_set1_epi8(127),v))) : (_mm256_cmpeq_epi8(_mm256_set1_epi8(-128),v));} 92 | 93 | this_t inv() const { return this_t(lookup1(v,posit8ns::opinv)); } 94 | this_t half() const { return *this / simd_posit8_32(2); } 95 | this_t twice() const { return *this * simd_posit8_32(2); } 96 | 97 | this_t exp2() const { return this_t(lookup1(v,posit8ns::opexp2)); } 98 | this_t square() const { return this_t(lookup1(v,posit8ns::opsquare)); } 99 | this_t operator-() const { return this_t(_mm256_sign_epi8 (v,v)); } 100 | 101 | 102 | friend this_t operator+(this_t a, this_t b) { return this_t(lookup2(a.uu(),b.uu(),posit8ns::opadd)); } 103 | friend this_t operator*(this_t a, this_t b) { return this_t(lookup2(a.uu(),b.uu(),posit8ns::opmul)); } 104 | friend this_t operator-(this_t a, this_t b) { return a+(-b); } 105 | friend this_t operator/(this_t a, this_t b) { return this_t(lookup2(a.uu(),b.uu(),posit8ns::opdiv)); } 106 | friend std::ostream & operator << (std::ostream & ons, const this_t & p); 107 | 108 | 109 | __m256i uu() const { return v;} 110 | 111 | operator std::array () const ; 112 | 113 | private: 114 | __m256i v; 115 | 116 | }; 117 | 118 | inline std::ostream & operator << (std::ostream & ons, const std::array & a) 119 | { 120 | ons << "a32(" ; 121 | for(int i = 0; i < 32; i++) 122 | ons << " " << a[i]; 123 | ons << ")"; 124 | return ons; 125 | } 126 | 127 | inline std::ostream & operator << (std::ostream & ons, const __m256i & p) 128 | { 129 | alignas(__m256i) uint8_t idata1[32]; 130 | _mm256_store_si256((__m256i*)idata1,p); 131 | ons << "256i(" << std::hex ; 132 | for(int i = 0; i < 32; i++) 133 | ons << " " << (int)idata1[i]; 134 | ons << std::dec << ")"; 135 | return ons; 136 | } 137 | 138 | inline simd_posit8_32 half(simd_posit8_32 z) { return z.half(); } 139 | 140 | inline simd_posit8_32 twice(simd_posit8_32 z) { return z.twice(); } 141 | 142 | inline simd_posit8_32 square(simd_posit8_32 z) { return z.square(); } 143 | 144 | inline simd_posit8_32 inv(simd_posit8_32 x) { return x.inv(); } 145 | 146 | inline simd_posit8_32 neg(simd_posit8_32 z) { return -z; } 147 | 148 | inline simd_posit8_32 exp2(simd_posit8_32 z) { return z.exp2(); } 149 | 150 | template 151 | T gauss2(T x, T mu, T sigma) 152 | { 153 | return exp2(-half(square((x-mu)/sigma))); 154 | } -------------------------------------------------------------------------------- /include/tposit.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2017-2019 Emanuele Ruffaldi 3 | * Distributed under the terms of the BSD 3-Clause License. 4 | * 5 | * (See accompanying file LICENSE) 6 | * 7 | * -- 8 | */ 9 | /** 10 | * TPosit is a generalized tabled posit 11 | * 12 | * Emanuele Ruffaldi 2017 13 | */ 14 | #pragma once 15 | #include 16 | #include 17 | 18 | #ifndef CONSTEXPR14 19 | #define CONSTEXPR14 20 | #endif 21 | 22 | #include "posit.h" 23 | 24 | 25 | template 26 | class tposit 27 | { 28 | public: 29 | // template 30 | 31 | 32 | using value_t=T; 33 | using fraction_t=FT; 34 | static constexpr int vtotalbits = totalbits; 35 | static constexpr int vesbits = esbits; 36 | using FPT=Posit; 37 | using PT=typename FPT::PT; 38 | using exponenttype=typename PT::exponenttype; 39 | using FT=fraction_t; 40 | using PTU=typename PT::POSIT_UTYPE; 41 | using UnpackedLow=typename FPT::UnpackedLow; 42 | using UnpackedT=typename FPT::UnpackedT; 43 | using info=typename FPT::info; 44 | 45 | private: 46 | static T opinv[]; 47 | static T ophalf[]; 48 | static T opexp2[]; 49 | static T optwice[]; 50 | static T opsquare[]; 51 | static T opmul[]; 52 | static T opadd[]; 53 | static T opdiv[]; 54 | static float op2float[]; 55 | 56 | public: 57 | struct DeepInit{}; 58 | 59 | explicit tposit(double a) 60 | explicit tposit(float a); 61 | explicit tposit(int a); 62 | explicit tposit(DeepInit , int8_t a) : v(a) {} 63 | explicit tposit() {} 64 | explicit tposit(FPT x) : v(x.v) {} 65 | explicit tposit(UnpackedLow u) : v(FPT(u).v) {} 66 | explicit tposit(UnpackedT u) : v(FPT(u).v) {} 67 | 68 | bool is_negative() const { return v < 0; } 69 | bool has_neg_exponent() const { return (v & PT::POSIT_INVERTBIT) == 0; } 70 | bool is_nan() const { return PT::withnan && v == PT::POSIT_NAN; } 71 | bool is_infinity() const { return v==PT::POSIT_NINF || v==PT::POSIT_PINF; } 72 | static tposit max() { return tposit(DeepInit(),PT::POSIT_MAX); } 73 | static tposit min() { return tposit(DeepInit(),PT::POSIT_MIN); } // <------ 74 | static tposit one() { return tposit(DeepInit(),PT::POSIT_ONE); } 75 | 76 | //static tposit two() { return tposit(DeepInit(),72); } // <------ 77 | //static tposit onehalf() { return tposit(DeepInit(),0x20); } // <------ 78 | 79 | static tposit pinfinity() { return tposit(DeepInit(),PT::POSIT_PINF); 80 | static tposit ninfinity() { return tposit(DeepInit(),PT::POSIT_NINF); 81 | static tposit nan() { return tposit(DeepInit(),PT::POSIT_NAN); } 82 | 83 | tposit inv() const { return tposit(DeepInit(),opinv[uu()]); } 84 | tposit half() const { return tposit(DeepInit(),ophalf[uu()]);} 85 | tposit twice() const { return tposit(DeepInit(),optwice[uu()]); } 86 | tposit exp2() const { return tposit(DeepInit(),opexp2[uu()]); } 87 | tposit square() const { return tposit(DeepInit(),opsquare[uu()]); } 88 | tposit operator-() const { return tposit(DeepInit(),-v); } 89 | 90 | friend bool operator<(tposit a, tposit b) { return a.v < b.v; } 91 | friend bool operator>(tposit a, tposit b) { return a.v > b.v; } 92 | friend bool operator<=(tposit a, tposit b) { return a.v <= b.v;} 93 | friend bool operator>=(tposit a, tposit b) { return a.v >= b.v; } 94 | friend bool operator==(tposit a, tposit b) { return a.v == b.v; } 95 | friend bool operator!=(tposit a, tposit b) { return a.v !=b.v; } 96 | 97 | friend tposit operator+(tposit a, tposit b) { return tposit(DeepInit(),opadd[a.uu()*256+b.uu()]); } // <------ 98 | friend tposit operator*(tposit a, tposit b) { return tposit(DeepInit(),opmul[a.uu()*256+b.uu()]); } // <------ 99 | friend tposit operator-(tposit a, tposit b) { return a+(-b); } 100 | friend tposit operator/(tposit a, tposit b) { return tposit(DeepInit(),opdiv[a.uu()*256+b.uu()]); } // <------ 101 | 102 | tposit& operator+=(const tposit &a) { tposit r = *this+a; v = r.v; return *this; } 103 | tposit& operator*=(const tposit &a) { tposit r = *this*a; v = r.v; return *this; } 104 | 105 | PTU uu() const { return (PTU)v;} 106 | 107 | FPT as_posit() const { return FPT(typename FPT::DeepInit(), v); } 108 | operator FPT() const { return as_posit(); } 109 | operator float() const { return uint32_to_float(op2float[uu()]); } 110 | operator double() const { return (float)*this; } 111 | operator int() const { return (float)*this; } 112 | 113 | UnpackedLow unpack_low() const { return as_posit().unpack_low(); } 114 | UnpackedT unpack() const { return as_posit().unpack(); } 115 | info analyze() const { return as_posit().analyze(); } 116 | 117 | 118 | friend std::ostream & operator << (std::ostream & ons, const tposit & p) 119 | { 120 | ons << "posit(" << p.uu() << ")" ; 121 | return ons; 122 | } 123 | 124 | 125 | T v; 126 | 127 | }; 128 | 129 | template 130 | inline tposit half(tposit z) { return z.half(); } 131 | 132 | template 133 | inline tposit twice(tposit z) { return z.twice(); } 134 | 135 | template 136 | inline tposit square(tposit z) { return z.square(); } 137 | 138 | template 139 | inline tposit inv(tposit x) { return x.inv(); } 140 | 141 | template 142 | CONSTEXPR14 inline tposit neg(tposit z) { return -z; } 143 | 144 | template 145 | inline tposit exp2(tposit z) { return z.exp2(); } 146 | 147 | template 148 | inline bool is_negative(tposit a) 149 | { 150 | return a.v < 0; 151 | } 152 | 153 | template 154 | tposit::tposit(int a) 155 | { 156 | if(a == 0) 157 | v = 0; 158 | else if(a == 1) 159 | v = PT::POSIT_ONE; 160 | else 161 | v = FPT(a).v; 162 | 163 | } 164 | 165 | template 166 | tposit::tposit(float a) 167 | { 168 | if(a == 0) 169 | v = 0; 170 | else if(a == 1) 171 | v = PT::POSIT_ONE; 172 | else 173 | v = FPT(a).v; 174 | 175 | } 176 | 177 | template 178 | tposit::tposit(double a) 179 | { 180 | if(a == 0) 181 | v = 0; 182 | else if(a == 1) 183 | v = PT::POSIT_ONE; 184 | else 185 | v = FPT(a).v; 186 | } 187 | 188 | 189 | 190 | 191 | 192 | namespace std { 193 | template class numeric_limits > { 194 | public: 195 | static tposit max() {return tposit::max(); }; 196 | static tposit min() {return tposit::min(); }; 197 | // One can implement other methods if needed 198 | }; 199 | } 200 | 201 | 202 | 203 | namespace std 204 | { 205 | template 206 | inline CONSTEXPR14 tposit abs(tposit z) 207 | { 208 | return tposit(tposit::DeepInit(),z.v < 0 ? -z.v : z.v); 209 | } 210 | 211 | template 212 | inline CONSTEXPR14 tposit min(tposit a, tposit b) 213 | { 214 | return a <= b ? a : b; 215 | } 216 | 217 | template 218 | inline CONSTEXPR14 tposit max(tposit a, tposit b) 219 | { 220 | return a >= b ? a : b; 221 | } 222 | } 223 | 224 | -------------------------------------------------------------------------------- /include/tvalids.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2017-2019 Emanuele Ruffaldi 3 | * Distributed under the terms of the BSD 3-Clause License. 4 | * 5 | * (See accompanying file LICENSE) 6 | * 7 | * -- 8 | */ 9 | // tvalids.hpp 10 | // 11 | // Emanuele Ruffaldi 2016 12 | #pragma once 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | #ifndef CONSTEXPR14 19 | #define CONSTEXPR14 20 | #endif 21 | 22 | #include "posit.h" 23 | /** 24 | // T = support type (enough for 1< 32 | class tvalid 33 | { 34 | public: 35 | static_assert(std::is_signed::value,"required signed T"); 36 | static_assert(N >= 2,"minimum is 2 for the case 0,1,2"); 37 | static_assert(sizeof(T)*8 >= N*4,"required enough storage T for provided bits SREF"); 38 | 39 | using value_t=T; 40 | static constexpr int TN = N*4; 41 | using PT=PositTrait; 42 | static constexpr int vesbits = 0; 43 | using PTU=typename PT::POSIT_UTYPE; 44 | using tposit=value_t; 45 | 46 | private: 47 | static T exacts[TN]; 48 | static T opexp2[TN*TN]; 49 | static T optwice[TN]; 50 | static T ophalf[TN]; 51 | static T opsquare[TN]; 52 | static T opmul[TN*TN]; 53 | static T opadd[TN*TN]; 54 | static float op2float[TN]; 55 | 56 | public: 57 | struct DeepInit{}; 58 | 59 | explicit tvalid(double a); 60 | explicit tvalid(float a); 61 | explicit tvalid(int a); 62 | explicit tvalid(DeepInit , int16_t a) : v(a) {} 63 | explicit tvalid() {} 64 | tvalid(const tvalid & a): v(a.v) {} 65 | 66 | bool is_negative() const { return v < 0; } 67 | bool has_neg_exponent() const { return (v & PT::POSIT_INVERTBIT) == 0; } 68 | bool is_nan() const { return PT::withnan && v == PT::POSIT_NAN; } 69 | bool is_infinity() const { return v==PT::POSIT_NINF || v==PT::POSIT_PINF; } 70 | static tposit max() { return tposit(DeepInit(),PT::POSIT_MAX); } 71 | static tposit min() { return tposit(DeepInit(),PT::POSIT_MIN); } 72 | static tposit smallestpos() { return tposit(DeepInit(),PT::POSIT_SMALLPOS); } 73 | static tposit one() { return tposit(DeepInit(),PT::POSIT_ONE); } 74 | static tposit two() { return tposit(DeepInit(),vtwo); } 75 | static tposit onehalf() { return inv(two()); } 76 | static tposit pinfinity() { return tposit(DeepInit(),PT::POSIT_PINF); 77 | static tposit ninfinity() { return tposit(DeepInit(),PT::POSIT_NINF); 78 | static tposit nan() { return tposit(DeepInit(),PT::POSIT_NAN); } 79 | 80 | friend bool operator<(const tvalid &a, const tvalid & b) { return a.v < b.v; } 81 | friend bool operator>(const tvalid &a, const tvalid & b) { return a.v > b.v; } 82 | friend bool operator<=(const tvalid &a, const tvalid & b) { return a.v <= b.v;} 83 | friend bool operator>=(const tvalid &a, const tvalid & b) { return a.v >= b.v; } 84 | friend bool operator==(const tvalid &a, const tvalid & b) { return a.v == b.v; } 85 | friend bool operator!=(const tvalid &a, const tvalid & b) { return a.v !=b.v; } 86 | 87 | tvalid inv() const { return tvalid(DeepInit(),-(v+TN)); } 88 | tvalid half() const { return (*this * inv(two())); } 89 | tvalid twice() const{ return tvalid(DeepInit(),optwice[uu()]); } 90 | tvalid exp2() const { return tvalid(DeepInit(),opexp2[uu()]); } 91 | tvalid square() const { return tvalid(DeepInit(),opsquare[uu()]); } 92 | tvalid operator-() const { return tvalid(DeepInit(),-v); } 93 | 94 | friend tvalid operator+(const tvalid &a, const tvalid & b) { return tvalid(DeepInit(),opadd[a.uu()*TN+b.uu()]); } 95 | friend tvalid operator*(const tvalid &a, const tvalid & b) { return tvalid(DeepInit(),opmul[a.uu()*TN+b.uu()]); } 96 | friend tvalid operator-(const tvalid &a, const tvalid & b) { return a+(-b); } 97 | friend tvalid operator/(const tvalid &a, const tvalid & b) { return tvalid(DeepInit(),opmul[a.uu()*TN+(inv(b).uu())]); } 98 | 99 | friend std::ostream & operator << (std::ostream & ons, const tvalid & p); 100 | 101 | tvalid& operator+=(const tvalid &a) { tvalid r = *this+a; v = r.v; return *this; } 102 | tvalid& operator*=(const tvalid &a) { tvalid r = *this*a; v = r.v; return *this; } 103 | 104 | PTU uu() const { return (PTU)v;} 105 | 106 | operator float() const { return uint32_to_float(op2float[uu()]); } 107 | operator double() const { return (float)*this; } 108 | operator int() const { return (float)*this; } 109 | 110 | value_t v; 111 | 112 | }; 113 | 114 | template 115 | inline tvalid half(tvalid z) { return z.half(); } 116 | 117 | template 118 | inline tvalid twice(tvalid z) { return z.twice(); } 119 | 120 | template 121 | inline tvalid square(tvalid z) { return z.square(); } 122 | 123 | template 124 | inline tvalid inv(tvalid x) { return x.inv(); } 125 | 126 | template 127 | CONSTEXPR14 inline tvalid neg(tvalid z) { return -z; } 128 | 129 | template 130 | inline tvalid exp2(tvalid z) { return z.exp2(); } 131 | 132 | template 133 | inline bool is_negative(tvalid a) 134 | { 135 | return a.v < 0; 136 | } 137 | 138 | namespace std { 139 | template class numeric_limits > { 140 | public: 141 | static tvalid max() {return tvalid ::max(); }; 142 | static tvalid min() {return tvalid ::min(); }; 143 | // One can implement other methods if needed 144 | }; 145 | } 146 | 147 | namespace std 148 | { 149 | template 150 | inline CONSTEXPR14 tvalid abs(tvalid z) 151 | { 152 | return tvalid(tvalid::DeepInit(),z.v < 0 ? -z.v : z.v); 153 | } 154 | 155 | template 156 | inline CONSTEXPR14 tvalid min(const tvalid &a, const tvalid & b) 157 | { 158 | return a <= b ? a : b; 159 | } 160 | 161 | template 162 | inline CONSTEXPR14 tvalid max(const tvalid &a, const tvalid & b) 163 | { 164 | return a >= b ? a : b; 165 | } 166 | } 167 | 168 | -------------------------------------------------------------------------------- /jupyter/.ipynb_checkpoints/first-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Welcome to initial Jupyter testing" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 2, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "data": { 17 | "text/plain": [] 18 | }, 19 | "execution_count": 2, 20 | "metadata": {}, 21 | "output_type": "execute_result" 22 | } 23 | ], 24 | "source": [ 25 | "#include \"../include/posit.h\"" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 3, 31 | "metadata": {}, 32 | "outputs": [ 33 | { 34 | "data": { 35 | "text/plain": [ 36 | "(unsigned long) 2\n" 37 | ] 38 | }, 39 | "execution_count": 3, 40 | "metadata": {}, 41 | "output_type": "execute_result" 42 | } 43 | ], 44 | "source": [ 45 | "sizeof(int_least_bits<10>::type)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "metadata": {}, 52 | "outputs": [ 53 | { 54 | "data": { 55 | "text/plain": [] 56 | }, 57 | "execution_count": 4, 58 | "metadata": {}, 59 | "output_type": "execute_result" 60 | } 61 | ], 62 | "source": [ 63 | "using P=Posit;" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 5, 69 | "metadata": {}, 70 | "outputs": [ 71 | { 72 | "data": { 73 | "text/plain": [] 74 | }, 75 | "execution_count": 5, 76 | "metadata": {}, 77 | "output_type": "execute_result" 78 | } 79 | ], 80 | "source": [ 81 | "P a(10.0f);\n", 82 | "P b(20.0f);\n" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 6, 88 | "metadata": {}, 89 | "outputs": [ 90 | { 91 | "data": { 92 | "text/plain": [ 93 | "(float) 28.00000f\n" 94 | ] 95 | }, 96 | "execution_count": 6, 97 | "metadata": {}, 98 | "output_type": "execute_result" 99 | } 100 | ], 101 | "source": [ 102 | "(float)(a+b)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 7, 108 | "metadata": {}, 109 | "outputs": [ 110 | { 111 | "data": { 112 | "text/plain": [ 113 | "(float) -10.00000f\n" 114 | ] 115 | }, 116 | "execution_count": 7, 117 | "metadata": {}, 118 | "output_type": "execute_result" 119 | } 120 | ], 121 | "source": [ 122 | "(float)(a-b)" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 8, 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "data": { 132 | "text/plain": [ 133 | "(float) 0.09375f\n" 134 | ] 135 | }, 136 | "execution_count": 8, 137 | "metadata": {}, 138 | "output_type": "execute_result" 139 | } 140 | ], 141 | "source": [ 142 | "(float)inv(a)" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 9, 148 | "metadata": {}, 149 | "outputs": [ 150 | { 151 | "data": { 152 | "text/plain": [ 153 | "(float) 192.00000f\n" 154 | ] 155 | }, 156 | "execution_count": 9, 157 | "metadata": {}, 158 | "output_type": "execute_result" 159 | } 160 | ], 161 | "source": [ 162 | "(float)(a*b)" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 10, 168 | "metadata": {}, 169 | "outputs": [ 170 | { 171 | "data": { 172 | "text/plain": [ 173 | "(bool) true\n" 174 | ] 175 | }, 176 | "execution_count": 10, 177 | "metadata": {}, 178 | "output_type": "execute_result" 179 | } 180 | ], 181 | "source": [ 182 | "a < b" 183 | ] 184 | } 185 | ], 186 | "metadata": { 187 | "kernelspec": { 188 | "display_name": "C++14", 189 | "language": "", 190 | "name": "cling-cpp14" 191 | }, 192 | "language_info": { 193 | "codemirror_mode": "c++", 194 | "file_extension": ".c++", 195 | "mimetype": "text/x-c++src", 196 | "name": "c++" 197 | } 198 | }, 199 | "nbformat": 4, 200 | "nbformat_minor": 2 201 | } 202 | -------------------------------------------------------------------------------- /jupyter/first.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Welcome to initial Jupyter testing" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "#include \"../include/posit.h\"" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "data": { 26 | "text/plain": [ 27 | "2" 28 | ] 29 | }, 30 | "execution_count": 2, 31 | "metadata": {}, 32 | "output_type": "execute_result" 33 | } 34 | ], 35 | "source": [ 36 | "sizeof(int_least_bits<10>::type)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 3, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "using P=Posit;" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "P a(10.0f);\n", 55 | "P b(20.0f);\n" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 5, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/plain": [ 66 | "28f" 67 | ] 68 | }, 69 | "execution_count": 5, 70 | "metadata": {}, 71 | "output_type": "execute_result" 72 | } 73 | ], 74 | "source": [ 75 | "(float)(a+b)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 6, 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "data": { 85 | "text/plain": [ 86 | "-10f" 87 | ] 88 | }, 89 | "execution_count": 6, 90 | "metadata": {}, 91 | "output_type": "execute_result" 92 | } 93 | ], 94 | "source": [ 95 | "(float)(a-b)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 7, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "data": { 105 | "text/plain": [ 106 | "0.09375f" 107 | ] 108 | }, 109 | "execution_count": 7, 110 | "metadata": {}, 111 | "output_type": "execute_result" 112 | } 113 | ], 114 | "source": [ 115 | "(float)inv(a)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 8, 121 | "metadata": {}, 122 | "outputs": [ 123 | { 124 | "data": { 125 | "text/plain": [ 126 | "192f" 127 | ] 128 | }, 129 | "execution_count": 8, 130 | "metadata": {}, 131 | "output_type": "execute_result" 132 | } 133 | ], 134 | "source": [ 135 | "(float)(a*b)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 9, 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "data": { 145 | "text/plain": [ 146 | "true" 147 | ] 148 | }, 149 | "execution_count": 9, 150 | "metadata": {}, 151 | "output_type": "execute_result" 152 | } 153 | ], 154 | "source": [ 155 | "a < b" 156 | ] 157 | } 158 | ], 159 | "metadata": { 160 | "kernelspec": { 161 | "display_name": "C++14", 162 | "language": "C++14", 163 | "name": "xeus-cling-cpp14" 164 | }, 165 | "language_info": { 166 | "codemirror_mode": "text/x-c++src", 167 | "file_extension": ".cpp", 168 | "mimetype": "text/x-c++src", 169 | "name": "c++", 170 | "version": "-std=c++14" 171 | } 172 | }, 173 | "nbformat": 4, 174 | "nbformat_minor": 2 175 | } 176 | -------------------------------------------------------------------------------- /makeone.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cat <(echo -n "// cppPosit ") <(git rev-parse HEAD) <(echo "") include/bithippop.hpp include/fixedtraits.hpp include/floattraits.hpp include/typehelpers.hpp include/unpacked.h include/posit.h | grep -vEh '#include "[^\"]+"' | grep -vh "#pragma once" - > posit_one.h 3 | cat posit_one.h | pbcopy 4 | echo "copied in Clipboard if macOS" -------------------------------------------------------------------------------- /matlab/Lenna.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eruffaldi/cppPosit/3c497041063dc87de2f9615a52a55b0ed5222b12/matlab/Lenna.png -------------------------------------------------------------------------------- /matlab/cellindex.m: -------------------------------------------------------------------------------- 1 | function r =cellindex(a) 2 | r=a{:}; -------------------------------------------------------------------------------- /matlab/cellvcat.m: -------------------------------------------------------------------------------- 1 | function r = cellvcat(x) 2 | 3 | if isempty(x) 4 | r=[]; 5 | else 6 | r =x{1}; 7 | for I=2:length(x) 8 | r=[r;x{I}]; 9 | end 10 | 11 | end -------------------------------------------------------------------------------- /matlab/compareposits.m: -------------------------------------------------------------------------------- 1 | addpath halfprecision/ 2 | n8 = arrayfun(@(x) {8,x,sprintf('positint8_t,8,%d,uint16_t,false.bin',x)},0:3,'UniformOutput',false); 3 | p8 = cellvcat(cellfun(@(x) loadpositdump(['../build/' x{3}],x{1},x{2}),n8,'UniformOutput',false)); 4 | 5 | %% 6 | n10 = arrayfun(@(x) {10,x,sprintf('positint16_t,10,%d,uint16_t,false.bin',x)},0:3,'UniformOutput',false); 7 | p10 = cellvcat(cellfun(@(x) loadpositdump(['../build/' x{3}],x{1},x{2}),n10,'UniformOutput',false)); 8 | 9 | %% 10 | n12 = arrayfun(@(x) {12,x,sprintf('positint16_t,12,%d,uint16_t,false.bin',x)},0:4,'UniformOutput',false); 11 | p12 = cellvcat(cellfun(@(x) loadpositdump(['../build/' x{3}],x{1},x{2}),n12,'UniformOutput',false)); 12 | 13 | %% 14 | [u,h] = halfinrange(-10,10); 15 | tf16=dump2table([-length(u)/2:(length(u)/2-1);cast(u,'double');h]'); 16 | tf16.bits=16; 17 | tf16.es=0; 18 | tf16.what=categorical({'float'},{'posit','float','valid'}) 19 | f16=tf16; 20 | %% 21 | p12e=[f16;p12]; 22 | p10e=[f16;p10]; 23 | 24 | %% 25 | qss={p8,p10,p12,f16}; 26 | %% 27 | colors =[[0 0.4470 0.7410];[0.8500 0.3250 0.0980];[0.9290 0.6940 0.1250];[0.4940 0.1840 0.5560];[0,1,1]]; 28 | 29 | %% 30 | for K=1:length(qss) 31 | qs=qss{K}; 32 | figure; 33 | s=[]; 34 | h=[]; 35 | for I=1:length(qs) 36 | pd=qs(I,:); 37 | p=double(pd.data{1}); 38 | usenan=false; 39 | h(I)=plot(p(:,1),p(:,3),'Color',colors(I,:)); 40 | hold on 41 | plot(p(:,1),p(:,3),['*'],'Color',colors(I,:)); 42 | if pd.what=='float' 43 | s{I} =sprintf('float16'); 44 | else 45 | s{end+1} =sprintf('posit%d es=%d',pd.bits,pd.es); 46 | end 47 | ylim([-10,10]); 48 | end 49 | yl =ylim; 50 | title(sprintf('Posits %d bits with Y limit %f %f',bits,yl(1),yl(2))); 51 | 52 | hold off 53 | legend(h,s); 54 | end 55 | 56 | %% 57 | 58 | % 59 | % size(u) 60 | % plot(h) 61 | % hold on 62 | % plot(h,'*'); 63 | % yl =ylim; 64 | % title(sprintf('float16 in range %f %f',yl(1),yl(2))); 65 | 66 | %% 67 | qss={p10e}; 68 | %% 69 | for K=1:length(qss) 70 | qs=qss{K}; 71 | figure; 72 | 73 | s={}; 74 | h=[]; 75 | for I=1:height(qs) 76 | bits =qs.bits(I); 77 | isfloatx = qs.what(I) == 'float'; 78 | es=qs.es(I); 79 | usenan=false; 80 | h(I)=plot(p.float{I},p.res{I},'Color',colors(I,:)); 81 | hold on 82 | plot(p(:,3),p(:,4),['.'],'Color',colors(I,:)); 83 | if isfloatx 84 | s{I} =sprintf('float%d',bits); 85 | else 86 | s{I} = sprintf('posit%d es %d',bits,es); 87 | end 88 | end 89 | xlabel('Value'); 90 | ylabel('Resolution'); 91 | legend(h,s); 92 | hold off 93 | xlim([-10,10]); 94 | end 95 | -------------------------------------------------------------------------------- /matlab/compareposits2.m: -------------------------------------------------------------------------------- 1 | addpath halfprecision/ 2 | n8 = arrayfun(@(x) {8,x,sprintf('positint8_t,8,%d,uint16_t,false.bin',x)},0:3,'UniformOutput',false); 3 | p8 = cellvcat(cellfun(@(x) loadpositdump(['../build/' x{3}],x{1},x{2}),n8,'UniformOutput',false)); 4 | 5 | %% 6 | n10 = arrayfun(@(x) {10,x,sprintf('positint16_t,10,%d,uint16_t,false.bin',x)},0:3,'UniformOutput',false); 7 | p10 = cellvcat(cellfun(@(x) loadpositdump(['../build/' x{3}],x{1},x{2}),n10,'UniformOutput',false)); 8 | 9 | %% 10 | n12 = arrayfun(@(x) {12,x,sprintf('positint16_t,12,%d,uint16_t,false.bin',x)},0:4,'UniformOutput',false); 11 | p12 = cellvcat(cellfun(@(x) loadpositdump(['../build/' x{3}],x{1},x{2}),n12,'UniformOutput',false)); 12 | 13 | %% 14 | [u,h] = halfinrange(-10,10); 15 | tf16=dump2table([-length(u)/2:(length(u)/2-1);cast(u,'double');h]'); 16 | tf16.bits=16; 17 | tf16.es=0; 18 | tf16.what=categorical({'float'},{'posit','float','valid'}) 19 | f16=tf16; 20 | %% 21 | p12e=[f16;p12]; 22 | p10e=[f16;p10]; 23 | 24 | %% 25 | qss1={[p8;p10;p12;f16]}; 26 | qss1=limittab(qss1,-2,2,'exclusive'); 27 | qss={p8,p10,p12,f16}; 28 | qss=limittab(qss,-2,2,'exclusive'); 29 | %% 30 | p81=qss{1}; 31 | p8z =p81(1,:); 32 | %% 33 | colors =[[0 0.4470 0.7410];[0.8500 0.3250 0.0980];[0.9290 0.6940 0.1250];[0.4940 0.1840 0.5560];[0,1,1]]; 34 | 35 | %% 36 | for K=1:length(qss) 37 | qs=qss{K}; 38 | figure; 39 | s=[]; 40 | h=[]; 41 | for I=1:height(qs) 42 | h(I)=plot(qs.signed{I},qs.float{I},'Color',colors(mod(I,length(colors))+1,:)); 43 | hold on 44 | plot(qs.signed{I},qs.float{I},['*'],'Color',colors(mod(I,length(colors))+1,:)); 45 | if qs.what(I)=='float' 46 | s{I} =sprintf('float16'); 47 | else 48 | s{end+1} =sprintf('posit%d es=%d',qs.bits(I),qs.es(I)); 49 | end 50 | ylim([-1,1]); 51 | end 52 | yl =ylim; 53 | 54 | hold off 55 | legend(h,s); 56 | end 57 | 58 | %% 59 | 60 | % 61 | % size(u) 62 | % plot(h) 63 | % hold on 64 | % plot(h,'*'); 65 | % yl =ylim; 66 | % title(sprintf('float16 in range %f %f',yl(1),yl(2))); 67 | 68 | %% 69 | qss={p10e,f16}; 70 | qss=limittab(qss,-1,1,'exclusive'); 71 | 72 | %% 73 | for K=1:length(qss) 74 | qs=qss{K}; 75 | figure; 76 | 77 | s={}; 78 | h=[]; 79 | for I=1:height(qs) 80 | bits =qs.bits(I); 81 | isfloatx = qs.what(I) == 'float'; 82 | es=qs.es(I); 83 | % if es >0 84 | % continue 85 | % end 86 | usenan=false; 87 | h(I)=plot(qs.float{I},qs.res{I},'Color',colors(I,:)); 88 | hold on 89 | plot(qs.float{I},qs.res{I},['.'],'Color',colors(I,:)); 90 | if qs.what(I) =='float' 91 | s{I} =sprintf('float%d',bits); 92 | else 93 | s{I} = sprintf('posit%d es %d',bits,es); 94 | end 95 | end 96 | xlabel('Value'); 97 | ylabel('Resolution'); 98 | legend(h,s); 99 | hold off 100 | xlim([-1,1]); 101 | end 102 | -------------------------------------------------------------------------------- /matlab/describetab.m: -------------------------------------------------------------------------------- 1 | function t =describetab(t) 2 | 3 | for I=1:height(t) 4 | v=t.float{I}; 5 | W=v(~(isnan(v)|isinf(v))); 6 | v=t.res{I}; 7 | Q=v(~(isnan(v)|isinf(v))); 8 | t.max(I) = max(W); 9 | t.min(I) = min(W); 10 | t.smallest(I) = min(abs(W(W ~= 0))); 11 | t.minres(I) = min(Q); 12 | t.maxres(I) = max(Q); 13 | end 14 | -------------------------------------------------------------------------------- /matlab/dump2table.m: -------------------------------------------------------------------------------- 1 | function q =dump2table(r) 2 | 3 | % TODO 4 | r(:,4) =meandiff(r(:,3)); 5 | 6 | q = table(); 7 | q.signed = {r(:,1)}; 8 | q.unsigned = {r(:,2)}; 9 | v=r(:,3); 10 | q.float ={v}; 11 | q.res = {r(:,4)}; 12 | q = describetab(q); 13 | 14 | 15 | function r = meandiff(x) 16 | x=x(:); 17 | 18 | r =nan(size(x)); 19 | 20 | % ((next-cur)+(cur-prev))/2 == (next-prev)/2 21 | r(2:end-1) = (x(3:end)-x(1:end-2))/2; 22 | -------------------------------------------------------------------------------- /matlab/fromindex.m: -------------------------------------------------------------------------------- 1 | 2 | function a = fromindex(zp,mode,ii) 3 | 4 | if isempty(ii) 5 | a=ii; 6 | return; 7 | end 8 | switch mode 9 | case 'index' 10 | a=ii; 11 | case 'signed' 12 | a = (zp.signed{1}(ii)); 13 | case 'unsigned' 14 | a = (zp.unsigned{1}(ii)); 15 | case 'float' 16 | a = zp.float{1}(ii); 17 | otherwise 18 | a = 0; 19 | end -------------------------------------------------------------------------------- /matlab/halfinrange.m: -------------------------------------------------------------------------------- 1 | function [u,h] = halfinrange(mi,ma) 2 | 3 | z =uint16(0:65535); 4 | y =halfprecision(z,'double'); 5 | 6 | ii = find(y >= mi & y <=ma); 7 | u =z(ii); 8 | h = y(ii); 9 | 10 | [h,hi]= sort(h); 11 | u =u(hi); -------------------------------------------------------------------------------- /matlab/halfprecision/halfprecision.m: -------------------------------------------------------------------------------- 1 | % halfprecision converts IEEE 754 floating point to half precision IEEE 754r 2 | %****************************************************************************** 3 | % 4 | % MATLAB (R) is a trademark of The Mathworks (R) Corporation 5 | % 6 | % Function: halfprecision 7 | % Filename: halfprecision.c 8 | % Programmer: James Tursa 9 | % Version: 1.0 10 | % Date: March 3, 2009 11 | % Copyright: (c) 2009 by James Tursa, All Rights Reserved 12 | % 13 | % This code uses the BSD License: 14 | % 15 | % Redistribution and use in source and binary forms, with or without 16 | % modification, are permitted provided that the following conditions are 17 | % met: 18 | % 19 | % * Redistributions of source code must retain the above copyright 20 | % notice, this list of conditions and the following disclaimer. 21 | % * Redistributions in binary form must reproduce the above copyright 22 | % notice, this list of conditions and the following disclaimer in 23 | % the documentation and/or other materials provided with the distribution 24 | % 25 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 26 | % AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 | % IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 | % ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 29 | % LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 | % CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 | % SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 | % INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 | % CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 | % ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 | % POSSIBILITY OF SUCH DAMAGE. 36 | % 37 | % halfprecision converts the input argument to/from a half precision floating 38 | % point bit pattern corresponding to IEEE 754r. The bit pattern is stored in a 39 | % uint16 class variable. Please note that halfprecision is *not* a class. That 40 | % is, you cannot do any arithmetic with the half precision bit patterns. 41 | % halfprecision is simply a function that converts the IEEE 754r half precision 42 | % bit pattern to/from other numeric MATLAB variables. You can, however, take 43 | % the half precision bit patterns, convert them to single or double, do the 44 | % operation, and then convert the result back manually. 45 | % 46 | % 1 bit sign bit 47 | % 5 bits exponent, biased by 15 48 | % 10 bits mantissa, hidden leading bit, normalized to 1.0 49 | % 50 | % Special floating point bit patterns recognized and supported: 51 | % 52 | % All exponent bits zero: 53 | % - If all mantissa bits are zero, then number is zero (possibly signed) 54 | % - Otherwise, number is a denormalized bit pattern 55 | % 56 | % All exponent bits set to 1: 57 | % - If all mantissa bits are zero, then number is +Infinity or -Infinity 58 | % - Otherwise, number is NaN (Not a Number) 59 | % 60 | % Building: 61 | % 62 | % halfprecision requires that a mex routine be built (one time only). This 63 | % process is typically self-building the first time you call the function 64 | % as long as you have the files halfprecision.m and halfprecision.c in the 65 | % same directory somewhere on the MATLAB path. If you need to manually build 66 | % the mex function, here are the commands: 67 | % 68 | % >> mex -setup 69 | % (then follow instructions to select a C / C++ compiler of your choice) 70 | % >> mex halfprecision.c 71 | % 72 | % If you have an older version of MATLAB, you may need to use this command: 73 | % 74 | % >> mex -DDEFINEMWSIZE halfprecision.c 75 | % 76 | % Syntax 77 | % 78 | % B = halfprecision(A) 79 | % C = halfprecision(B,S) 80 | % halfprecision(B,'disp') 81 | % 82 | % Description 83 | % 84 | % A = a MATLAB numeric array, char array, or logical array. 85 | % 86 | % B = the variable A converted into half precision floating point bit pattern. 87 | % The bit pattern will be returned as a uint16 class variable. The values 88 | % displayed are simply the bit pattern interpreted as if it were an unsigned 89 | % 16-bit integer. To see the halfprecision values, use the 'disp' option, which 90 | % simply converts the bit patterns into a single class and then displays them. 91 | % 92 | % C = the half precision floating point bit pattern in B converted into class S. 93 | % B must be a uint16 or int16 class variable. 94 | % 95 | % S = char string naming the desired class (e.g., 'single', 'int32', etc.) 96 | % If S = 'disp', then the floating point bit values are simply displayed. 97 | % 98 | % Examples 99 | % 100 | % >> a = [-inf -1e30 -1.2 NaN 1.2 1e30 inf] 101 | % a = 102 | % 1.0e+030 * 103 | % -Inf -1.0000 -0.0000 NaN 0.0000 1.0000 Inf 104 | % 105 | % >> b = halfprecision(a) 106 | % b = 107 | % 64512 64512 48333 65024 15565 31744 31744 108 | % 109 | % >> halfprecision(b,'disp') 110 | % -Inf -Inf -1.2002 NaN 1.2002 Inf Inf 111 | % 112 | % >> halfprecision(b,'double') 113 | % ans = 114 | % -Inf -Inf -1.2002 NaN 1.2002 Inf Inf 115 | % 116 | % >> 2^(-24) 117 | % ans = 118 | % 5.9605e-008 119 | % 120 | % >> halfprecision(ans) 121 | % ans = 122 | % 1 123 | % 124 | % >> halfprecision(ans,'disp') 125 | % 5.9605e-008 126 | % 127 | % >> 2^(-25) 128 | % ans = 129 | % 2.9802e-008 130 | % 131 | % >> halfprecision(ans) 132 | % ans = 133 | % 1 134 | % 135 | % >> halfprecision(ans,'disp') 136 | % 5.9605e-008 137 | % 138 | % >> 2^(-26) 139 | % ans = 140 | % 1.4901e-008 141 | % 142 | % >> halfprecision(ans) 143 | % ans = 144 | % 0 145 | % 146 | % >> halfprecision(ans,'disp') 147 | % 0 148 | % 149 | % Note that the special cases of -Inf, +Inf, and NaN are handled correctly. 150 | % Also, note that the -1e30 and 1e30 values overflow the half precision format 151 | % and are converted into half precision -Inf and +Inf values, and stay that 152 | % way when they are converted back into doubles. 153 | % 154 | % For the denormalized cases, note that 2^(-24) is the smallest number that can 155 | % be represented in half precision exactly. 2^(-25) will convert to 2^(-24) 156 | % because of the rounding algorithm used, and 2^(-26) is too small and underflows 157 | % to zero. 158 | % 159 | %************************************************************************** 160 | 161 | function varargout = halfprecision(varargin) 162 | disp(' '); 163 | disp('You must build the mex routine before you can use halfprecision.'); 164 | disp('Attempting to do so now ...'); 165 | disp(' '); 166 | mname = mfilename('fullpath'); 167 | cname = [mname '.c']; 168 | if( isempty(dir(cname)) ) 169 | disp('Cannot find the file halfprecision.c in the same directory as the'); 170 | disp('file halfprecision.m. Please ensure that they are in the same'); 171 | disp('directory and try again. The following file was not found:'); 172 | disp(' '); 173 | disp(cname); 174 | disp(' '); 175 | error('Unable to compile halprecision.c'); 176 | else 177 | disp(['Found file halfprecision.c in ' cname]); 178 | disp(' '); 179 | disp('Now attempting to compile ...'); 180 | disp('(If prompted, please press the Enter key and then select any C/C++'); 181 | disp('compiler that is available, such as lcc.)'); 182 | disp(' '); 183 | disp(['mex(''' cname ''')']); 184 | disp(' '); 185 | try 186 | mex(cname); 187 | disp('mex halfprecision.c build completed ... you may now use halfprecision.'); 188 | disp(' '); 189 | catch 190 | disp(' '); 191 | disp('Well, *that* didn''t work ... now trying it with mwSize defined ...'); 192 | disp(' '); 193 | try 194 | disp(' '); 195 | disp(['mex(''-DDEFINEMWSIZE'',''' cname ''')']); 196 | disp(' '); 197 | mex('-DDEFINEMWSIZE',cname); 198 | disp('mex halfprecision.c build completed ... you may now use halfprecision.'); 199 | disp(' '); 200 | catch 201 | disp('Hmmm ... That didn''t work either.'); 202 | disp(' '); 203 | disp('The mex command failed. This may be because you have already run'); 204 | disp('mex -setup and selected a non-C compiler, such as Fortran. If this'); 205 | disp('is the case, then rerun mex -setup and select a C/C++ compiler.'); 206 | disp(' '); 207 | error('Unable to compile halprecision.c'); 208 | end 209 | end 210 | end 211 | if false 212 | varargout = varargin; % Get rid of the lint message 213 | end 214 | end 215 | -------------------------------------------------------------------------------- /matlab/halfprecision/halfprecisionmax.m: -------------------------------------------------------------------------------- 1 | % halfprecisionmax returns IEEE 754r bit pattern of max half precision value 2 | %****************************************************************************** 3 | % 4 | % MATLAB (R) is a trademark of The Mathworks (R) Corporation 5 | % 6 | % Function: halfprecisionmax 7 | % Filename: halfprecisionmax.m 8 | % Programmer: James Tursa 9 | % Version: 1.0 10 | % Date: March 3, 2009 11 | % Copyright: (c) 2009 by James Tursa, All Rights Reserved 12 | % 13 | % This code uses the BSD License: 14 | % 15 | % Redistribution and use in source and binary forms, with or without 16 | % modification, are permitted provided that the following conditions are 17 | % met: 18 | % 19 | % * Redistributions of source code must retain the above copyright 20 | % notice, this list of conditions and the following disclaimer. 21 | % * Redistributions in binary form must reproduce the above copyright 22 | % notice, this list of conditions and the following disclaimer in 23 | % the documentation and/or other materials provided with the distribution 24 | % 25 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 26 | % AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 | % IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 | % ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 29 | % LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 | % CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 | % SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 | % INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 | % CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 | % ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 | % POSSIBILITY OF SUCH DAMAGE. 36 | % 37 | % Type 'help halfprecision' to get details of this bit pattern 38 | % 39 | %****************************************************************************** 40 | 41 | function h = halfprecisionmax 42 | if nargin ~= 0 43 | error(nargchk(0, 0, nargin)); 44 | end 45 | if nargout > 1 46 | error(nargoutchk(0, 1, nargout)); 47 | end 48 | h = uint16(hex2dec('7BFF')); 49 | end 50 | -------------------------------------------------------------------------------- /matlab/halfprecision/halfprecisionmin.m: -------------------------------------------------------------------------------- 1 | % halfprecisionmin returns IEEE 754r bit pattern of min half precision value 2 | %****************************************************************************** 3 | % 4 | % MATLAB (R) is a trademark of The Mathworks (R) Corporation 5 | % 6 | % Function: halfprecisionmin 7 | % Filename: halfprecisionmin.m 8 | % Programmer: James Tursa 9 | % Version: 1.0 10 | % Date: March 3, 2009 11 | % Copyright: (c) 2009 by James Tursa, All Rights Reserved 12 | % 13 | % This code uses the BSD License: 14 | % 15 | % Redistribution and use in source and binary forms, with or without 16 | % modification, are permitted provided that the following conditions are 17 | % met: 18 | % 19 | % * Redistributions of source code must retain the above copyright 20 | % notice, this list of conditions and the following disclaimer. 21 | % * Redistributions in binary form must reproduce the above copyright 22 | % notice, this list of conditions and the following disclaimer in 23 | % the documentation and/or other materials provided with the distribution 24 | % 25 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 26 | % AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 | % IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 | % ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 29 | % LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 | % CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 | % SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 | % INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 | % CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 | % ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 | % POSSIBILITY OF SUCH DAMAGE. 36 | % 37 | % Type 'help halfprecision' to get details of this bit pattern 38 | % 39 | %****************************************************************************** 40 | 41 | function h = halfprecisionmin 42 | if nargin ~= 0 43 | error(nargchk(0, 0, nargin)); 44 | end 45 | if nargout > 1 46 | error(nargoutchk(0, 1, nargout)); 47 | end 48 | h = uint16(hex2dec('0001')); 49 | end 50 | -------------------------------------------------------------------------------- /matlab/halfprecision/license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009, James Tursa 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in 12 | the documentation and/or other materials provided with the distribution 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 18 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /matlab/halfprecision/origin.txt: -------------------------------------------------------------------------------- 1 | http://it.mathworks.com/matlabcentral/fileexchange/23173-ieee-754r-half-precision-floating-point-converter -------------------------------------------------------------------------------- /matlab/limittab.m: -------------------------------------------------------------------------------- 1 | function t =limittab(t,mi,ma,mode) 2 | 3 | if iscell(t) 4 | for I=1:length(t) 5 | t{I} = limittab(t{I},mi,ma,mode); 6 | end 7 | else 8 | 9 | for I=1:height(t) 10 | v = t.float{I}; 11 | if strcmp(mode,'exclusive') 12 | ba =v>mi & v=mi & v<=ma; 15 | end 16 | t.signed{I} =t.signed{I}(ba); 17 | t.float{I} =t.float{I}(ba); 18 | t.unsigned{I} =t.unsigned{I}(ba); 19 | t.res{I} =t.res{I}(ba); 20 | end 21 | t =describetab(t); 22 | 23 | end 24 | -------------------------------------------------------------------------------- /matlab/loadpositdump.m: -------------------------------------------------------------------------------- 1 | function q = loadpositdump(name,bits,es) 2 | 3 | if nargin < 2 4 | bits=0; 5 | es=0; 6 | end 7 | % iIv 8 | fid =fopen(name,'rb'); 9 | r =fread(fid,Inf,'double'); 10 | r =reshape(r,3,[])'; 11 | q = dump2table(r); 12 | q.bits=bits; 13 | q.es=es; 14 | q.what=categorical({'posit'},{'posit','float','valid'}); 15 | 16 | -------------------------------------------------------------------------------- /matlab/meandiff.m: -------------------------------------------------------------------------------- 1 | function r = meandiff(x) 2 | x=x(:); 3 | 4 | r =nan(size(x)); 5 | 6 | % ((next-cur)+(cur-prev))/2 == (next-prev)/2 7 | r(2:end-1) = (x(3:end)-x(1:end-2))/2; 8 | 9 | -------------------------------------------------------------------------------- /matlab/mex_make.m: -------------------------------------------------------------------------------- 1 | FLAGS='--std=c++14 -O3 --march=native -Werror -Wall -Wno-long-long -pedantic'; 2 | mex('floatTop8.cpp',['COMPFLAGS="$COMPFLAGS ' FLAGS]); 3 | mex('p8Tofloat.cpp',['COMPFLAGS="$COMPFLAGS ' FLAGS]); 4 | mex('p8binop.cpp',['COMPFLAGS="$COMPFLAGS ' FLAGS]); 5 | mex('p8unop.cpp',['COMPFLAGS="$COMPFLAGS ' FLAGS]); 6 | -------------------------------------------------------------------------------- /matlab/morton.m: -------------------------------------------------------------------------------- 1 | function ind=morton(n); 2 | % MORTON(N) return the morton permutation order for array of size 2^N 3 | % e.g. 4 | % matrix size is 2^n 5 | % n=2; 6 | % ind=morton(n); 7 | % d=fix(rand(2^n,2^n)*10); 8 | % disp(d) 9 | % disp(d(ind)) 10 | linind4=(1:4^n)-1; %start index count for array at zero 11 | ind4str=dec2base(linind4,4); %convert indices to base-4 12 | b1=dec2bin(str2num(ind4str(:,1))); %split each base-4 into two base-2numbers 13 | b2=dec2bin(str2num(ind4str(:,2))); 14 | rb=[b1(:,1) b2(:,1)]; %the rows are given by the first bits of b1 and 15 | b2 16 | cb=[b1(:,2) b2(:,2)]; %the columns are given by the second bits of b1 and b2 17 | r=bin2dec(rb)+1; %convert the row from bit to decimal 18 | c=bin2dec(cb)+1; %convert column 19 | ind=[2^n*(c-1)+r]'; %make a linear row index into array for easyaddressing 20 | %morton.m ends -------------------------------------------------------------------------------- /matlab/normr.m: -------------------------------------------------------------------------------- 1 | function X = normr(Y) 2 | 3 | X = Y./repmat(sqrt(sum(Y.*Y, 2)),1, size(Y, 2)); -------------------------------------------------------------------------------- /matlab/p8Tofloat.cpp: -------------------------------------------------------------------------------- 1 | #include "mex.h" 2 | //#include "zposit8.hpp" 3 | #include "posit.h" 4 | 5 | 6 | using zposit_type = Posit; 7 | 8 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) 9 | { 10 | if(nrhs != 2 && nlhs != 1) 11 | { 12 | mexPrintf("zp8Tofloat(input,output template) -> output. Use (unsigned) int 16 for half precision. \n"); 13 | return; 14 | } 15 | if(mxGetClassID(prhs[0]) != mxINT8_CLASS) 16 | { 17 | mexPrintf("zp8Tofloat(input,output template) -> output. Use (unsigned) int 16 for half precision. \n"); 18 | return; 19 | } 20 | switch(mxGetClassID(prhs[1])) 21 | { 22 | case mxDOUBLE_CLASS: 23 | case mxSINGLE_CLASS: 24 | case mxINT16_CLASS: 25 | case mxUINT16_CLASS: 26 | break; 27 | default: 28 | mexPrintf("zp8Tofloat unsupported output type: float double half(as int16)\n"); 29 | return; 30 | } 31 | bool complex = mxIsComplex(prhs[0]); 32 | const auto ndims = mxGetNumberOfDimensions(prhs[0]); 33 | const mwSize * dimi = mxGetDimensions(prhs[0]); 34 | plhs[0] = mxCreateUninitNumericArray(ndims,(mwSize*)dimi,mxGetClassID(prhs[1]),!complex ? mxREAL: mxCOMPLEX); 35 | zposit_type * src = (zposit_type*)mxGetData(prhs[0]); 36 | int n = mxGetNumberOfElements(prhs[0])*(complex?2:1); 37 | switch(mxGetClassID(prhs[1])) 38 | { 39 | case mxDOUBLE_CLASS: 40 | { 41 | double * dst = (double*)mxGetData(plhs[0]); 42 | for(int i = 0;i < n; i++) 43 | { 44 | dst[i] = (double)src[i]; 45 | } 46 | } 47 | // double precision 48 | break; 49 | case mxSINGLE_CLASS: 50 | { 51 | float * dst = (float*)mxGetData(plhs[0]); 52 | for(int i = 0;i < n; i++) 53 | { 54 | dst[i] = (float)src[i]; 55 | } 56 | } 57 | break; 58 | case mxINT16_CLASS: 59 | case mxUINT16_CLASS: 60 | { 61 | halffloat * dst = (halffloat*)mxGetData(plhs[0]); 62 | for(int i = 0;i < n; i++) 63 | { 64 | dst[i] = (halffloat)src[i].unpack(); 65 | } 66 | } 67 | break; 68 | default: 69 | break; 70 | } 71 | 72 | } -------------------------------------------------------------------------------- /matlab/p8binop.cpp: -------------------------------------------------------------------------------- 1 | #include "mex.h" 2 | //#include "zposit8.hpp" 3 | #include "posit.h" 4 | #include 5 | using zposit_type = Posit; 6 | 7 | 8 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) 9 | { 10 | if(nrhs != 3 && nlhs != 1) 11 | { 12 | mexPrintf("out = zp8binop(a,b,op);\n"); 13 | return; 14 | } 15 | if(mxGetClassID(prhs[0]) != mxGetClassID(prhs[1]) || mxGetClassID(prhs[0]) != mxINT8_CLASS) 16 | { 17 | mexPrintf("expected signed int 8bit\n"); 18 | return; 19 | } 20 | if(mxGetClassID(prhs[2]) != mxCHAR_CLASS) 21 | { 22 | mexPrintf("expected string op\n"); 23 | return; 24 | } 25 | const auto ndims = mxGetNumberOfDimensions(prhs[0]); 26 | const mwSize * dimi = mxGetDimensions(prhs[0]); 27 | bool complex1 = mxIsComplex(prhs[0]); 28 | int n1 = mxGetNumberOfElements(prhs[0])*(complex1?2:1); 29 | 30 | 31 | 32 | bool complex2 = mxIsComplex(prhs[1]); 33 | int n2 = mxGetNumberOfElements(prhs[1])*(complex2?2:1); 34 | if(n1 != n2) 35 | { 36 | mexPrintf("mismatch of items count\n"); 37 | return; 38 | } 39 | char * opa = mxArrayToString(prhs[2]); 40 | int n = n1; 41 | if(opa[1] !=0 ) 42 | { 43 | mexPrintf("unknown operation %s\n",opa); 44 | mxFree((void*)opa); 45 | return; 46 | } 47 | 48 | plhs[0] = mxCreateUninitNumericArray(ndims,(mwSize*)dimi,mxINT8_CLASS,!complex2 ? mxREAL: mxCOMPLEX); 49 | const zposit_type * a = (const zposit_type*)mxGetData(prhs[0]); 50 | const zposit_type * b = (const zposit_type*)mxGetData(prhs[1]); 51 | zposit_type * dst = (zposit_type*)mxGetData(plhs[0]); 52 | 53 | char op = opa[0]; 54 | mxFree((void*)opa); 55 | switch(op) 56 | { 57 | case '+': 58 | for(int i= 0; i < n; i++) 59 | { 60 | dst[i] = a[i] + b[i]; 61 | } 62 | break; 63 | case '-': 64 | for(int i= 0; i < n; i++) 65 | { 66 | dst[i] = a[i] - b[i]; 67 | } 68 | break; 69 | case '*': 70 | for(int i= 0; i < n; i++) 71 | { 72 | dst[i] = a[i] * b[i]; 73 | } 74 | break; 75 | case '/': 76 | for(int i= 0; i < n; i++) 77 | { 78 | dst[i] = a[i] / b[i]; 79 | } 80 | break; 81 | default: 82 | mexPrintf("unknown operation %c\n",op); 83 | break; 84 | } 85 | 86 | } -------------------------------------------------------------------------------- /matlab/p8unop.cpp: -------------------------------------------------------------------------------- 1 | #include "mex.h" 2 | //#include "zposit8.hpp" 3 | #include "posit.h" 4 | #include 5 | using zposit_type = Posit; 6 | 7 | 8 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) 9 | { 10 | if(nrhs != 2 && nlhs != 1) 11 | { 12 | mexPrintf("out = zp8unop(a,op);\n"); 13 | return; 14 | } 15 | if(mxGetClassID(prhs[0]) != mxINT8_CLASS) 16 | { 17 | mexPrintf("expected signed int 8bit\n"); 18 | return; 19 | } 20 | if(mxGetClassID(prhs[1]) != mxCHAR_CLASS) 21 | { 22 | mexPrintf("expected string op\n"); 23 | return; 24 | } 25 | const auto ndims = mxGetNumberOfDimensions(prhs[0]); 26 | const mwSize * dimi = mxGetDimensions(prhs[0]); 27 | bool complex1 = mxIsComplex(prhs[0]); 28 | int n1 = mxGetNumberOfElements(prhs[0])*(complex1?2:1); 29 | char * opa = mxArrayToString(prhs[1]); 30 | int n = n1; 31 | 32 | plhs[0] = mxCreateUninitNumericArray(ndims,(mwSize*)dimi,mxINT8_CLASS,!complex1 ? mxREAL: mxCOMPLEX); 33 | const zposit_type * a = (const zposit_type*)mxGetData(prhs[0]); 34 | zposit_type * dst = (zposit_type*)mxGetData(plhs[0]); 35 | 36 | if(strcmp(opa,"inv") == 0) 37 | { 38 | for(int i= 0; i < n; i++) 39 | { 40 | dst[i] = inv(a[i]); 41 | } 42 | } 43 | else 44 | { 45 | mexPrintf("unknown operation <%s>\n",opa); 46 | } 47 | mxFree((void*)opa); 48 | 49 | 50 | } -------------------------------------------------------------------------------- /matlab/test_p8conv.m: -------------------------------------------------------------------------------- 1 | x_i = cast(-128:127,'int8'); 2 | x_if = p8Tofloat(x_i,double(0)); 3 | x_ifi = floatTop8(x_if); 4 | assert(all(x_i==x_ifi),'conversion'); 5 | %% 6 | scatter(x_i,x_if) 7 | xlabel('Posit (uint8)'); 8 | ylabel('float'); 9 | %% 10 | d_i = p8binop(x_i,x_i,'+'); 11 | ds_i = p8binop(x_i,x_i,'*'); 12 | di_i = p8unop(x_i,'inv'); 13 | d_if = p8Tofloat(d_i,double(0)); 14 | di_if = p8Tofloat(di_i,double(0)); 15 | ds_if = p8Tofloat(ds_i,double(0)); 16 | scatter(x_i,x_if,'r') 17 | hold on 18 | scatter(x_i,d_if,'b') 19 | scatter(x_i,ds_if,'g') 20 | scatter(x_i,di_if,'m') 21 | hold off 22 | xlabel('Posit (uint8)'); 23 | ylabel('float'); 24 | legend({'x','2x','x**2','inv(x)'}); 25 | -------------------------------------------------------------------------------- /matlab/testfloat11.m: -------------------------------------------------------------------------------- 1 | function r = testfloatt11(zp,mode,a,b,omode) 2 | r = []; 3 | 4 | ia = toindex(zp,mode,a); 5 | if isempty(ia) 6 | return; 7 | end 8 | r.a=struct('index',ia,'signed',zp.signed{1}(ia),'unsigned',zp.unsigned{1}(ia),'float',zp.float{1}(ia)); 9 | 10 | ib = toindex(zp,mode,b); 11 | if isempty(ib) 12 | return; 13 | end 14 | r.b=struct('index',ib,'signed',zp.signed{1}(ib),'unsigned',zp.unsigned{1}(ib),'float',zp.float{1}(ib)); 15 | r.s =fromindex(zp,'float',ia)+fromindex(zp,'float',ib); 16 | iy = toindex(zp,'float',r.s); 17 | if isempty(iy)==0 18 | r.y=struct('index',iy,'signed',zp.signed{1}(iy),'unsigned',zp.unsigned{1}(iy),'float',zp.float{1}(iy)); 19 | r.diff =r.s-r.y.float; 20 | end 21 | -------------------------------------------------------------------------------- /matlab/toindex.m: -------------------------------------------------------------------------------- 1 | 2 | function ii = toindex(zp,mode,a) 3 | if isempty(a) 4 | ii=a; 5 | return; 6 | end 7 | 8 | switch mode 9 | case 'index' 10 | ii =a; 11 | case 'signed' 12 | ii = find(zp.signed{1}==a,1,'first'); 13 | case 'unsigned' 14 | ii = find(zp.unsigned{1}==a,1,'first'); 15 | case 'float' 16 | %ii = find(zp.float{1}==a,1,'first'); 17 | [~,ii] = min((zp.float{1}-a).^2); 18 | otherwise 19 | ii = 0; 20 | 21 | end 22 | -------------------------------------------------------------------------------- /preamble.txt: -------------------------------------------------------------------------------- 1 | Copyright (C) 2017-2019 Emanuele Ruffaldi 2 | Distributed under the terms of the BSD 3-Clause License. 3 | 4 | (See accompanying file LICENSE) 5 | -------------------------------------------------------------------------------- /scripts/float2bin.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import binascii 3 | 4 | 5 | types = (np.float16,np.float32,np.float64,np.float128); 6 | 7 | for t in types: 8 | inf = np.array(np.inf,dtype=t) 9 | nan = np.array(np.nan,dtype=t) 10 | zero = np.zeros(1,dtype=t) 11 | one = np.ones(1,dtype=t) 12 | for x,y in zip((-inf,inf,nan,one,2*one),("-inf","inf","nan","one","two")): 13 | bb = x.byteswap().tobytes() 14 | print "%30s %10s" % (t,y),"0x" + binascii.hexlify(bb) -------------------------------------------------------------------------------- /scripts/listpositany.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | SCRIPT_HOME=`dirname $0 | while read a; do cd $a && pwd && break; done` 3 | shopt -s expand_aliases 4 | . ~/.bash_profile 5 | T=$1 6 | shift 7 | tabs -12 8 | cling "-DPOSIT_TYPE=$T" -I$SCRIPT_HOME/../src -I$SCRIPT_HOME/../extern -I$SCRIPT_HOME/../include $SCRIPT_HOME/../src/listposits.cpp $* -------------------------------------------------------------------------------- /scripts/listposits8_16.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | SCRIPT_HOME=`dirname $0 | while read a; do cd $a && pwd && break; done` 3 | $SCRIPT_HOME/listpositany.sh int16_t,12,3,uint16_t,true -DHEADONLY 4 | $SCRIPT_HOME/listpositany.sh int8_t,8,0,uint16_t,false -DHEADONLY 5 | $SCRIPT_HOME/listpositany.sh int8_t,8,0,uint16_t,true -DHEADONLY 6 | $SCRIPT_HOME/listpositany.sh int8_t,8,1,uint16_t,false -DHEADONLY 7 | $SCRIPT_HOME/listpositany.sh int8_t,8,1,uint16_t,true -DHEADONLY 8 | $SCRIPT_HOME/listpositany.sh int8_t,8,2,uint16_t,false -DHEADONLY 9 | $SCRIPT_HOME/listpositany.sh int8_t,8,2,uint16_t,true -DHEADONLY 10 | $SCRIPT_HOME/listpositany.sh int8_t,8,3,uint16_t,false -DHEADONLY 11 | $SCRIPT_HOME/listpositany.sh int8_t,8,3,uint16_t,true -DHEADONLY 12 | $SCRIPT_HOME/listpositany.sh int16_t,12,0,uint16_t,false -DHEADONLY 13 | $SCRIPT_HOME/listpositany.sh int16_t,12,0,uint16_t,true -DHEADONLY 14 | $SCRIPT_HOME/listpositany.sh int16_t,10,0,uint16_t,false -DHEADONLY 15 | $SCRIPT_HOME/listpositany.sh int16_t,10,0,uint16_t,true -DHEADONLY 16 | $SCRIPT_HOME/listpositany.sh int16_t,10,2,uint16_t,false -DHEADONLY 17 | $SCRIPT_HOME/listpositany.sh int16_t,10,2,uint16_t,true -DHEADONLY 18 | $SCRIPT_HOME/listpositany.sh int16_t,16,0,uint16_t,false -DHEADONLY 19 | $SCRIPT_HOME/listpositany.sh int16_t,16,0,uint16_t,true -DHEADONLY -------------------------------------------------------------------------------- /scripts/listposits8_16bin.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | SCRIPT_HOME=`dirname $0 | while read a; do cd $a && pwd && break; done` 3 | XX="-DLISTFORMAT " 4 | listme() { 5 | $SCRIPT_HOME/listpositany.sh $1 -DLISTFORMAT=Biov 6 | mv tmp "posit$1.bin" 7 | } 8 | 9 | listme int8_t,8,0,uint16_t,false 10 | listme int8_t,8,0,uint16_t,true 11 | listme int8_t,8,1,uint16_t,false 12 | listme int8_t,8,1,uint16_t,true 13 | listme int8_t,8,2,uint16_t,false 14 | listme int8_t,8,2,uint16_t,true 15 | listme int8_t,8,3,uint16_t,false 16 | listme int8_t,8,3,uint16_t,true 17 | listme int16_t,12,0,uint16_t,false 18 | listme int16_t,12,0,uint16_t,true 19 | listme int16_t,12,1,uint16_t,false 20 | listme int16_t,12,1,uint16_t,true 21 | listme int16_t,12,2,uint16_t,false 22 | listme int16_t,12,2,uint16_t,true 23 | listme int16_t,12,3,uint16_t,false 24 | listme int16_t,12,3,uint16_t,true 25 | listme int16_t,12,4,uint16_t,false 26 | listme int16_t,12,4,uint16_t,true 27 | listme int16_t,10,0,uint16_t,false 28 | listme int16_t,10,0,uint16_t,true 29 | listme int16_t,10,2,uint16_t,false 30 | listme int16_t,10,1,uint16_t,true 31 | listme int16_t,10,1,uint16_t,false 32 | listme int16_t,10,2,uint16_t,true 33 | listme int16_t,10,3,uint16_t,true 34 | listme int16_t,10,3,uint16_t,false 35 | listme int16_t,16,0,uint16_t,false 36 | listme int16_t,16,0,uint16_t,true -------------------------------------------------------------------------------- /scripts/valid2tvalid.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import punum 3 | import argparse 4 | import fractions 5 | import operator 6 | import tabulate 7 | 8 | 9 | def main(): 10 | parser = argparse.ArgumentParser(description='Table to C code') 11 | parser.add_argument('--id',default=1,help="tvalid identifier") 12 | parser.add_argument('--tvalid',help="emits structure for tvalid class") 13 | pass 14 | 15 | if __name__ == '__main__': 16 | main() -------------------------------------------------------------------------------- /scripts/validsgentable.py: -------------------------------------------------------------------------------- 1 | # 2 | # Properties of summation table 3 | # 4 | # Given only points on lattice (a,b) >= 1 5 | # 6 | # lattice size n --> unum 3+n tbis 7 | # 8 | # (a,-a,1/a,-1/a) + (b,-b,1/b,-1/b) 9 | # 10 | # [ a + b, a - b, a + 1/b, a - 1/b] 11 | # [ b - a, - a - b, 1/b - a, - a - 1/b] 12 | # [ b + 1/a, 1/a - b, 1/a + 1/b, 1/a - 1/b] 13 | # [ b - 1/a, - b - 1/a, 1/b - 1/a, - 1/a - 1/b] 14 | # 15 | # [ 1, 2, 3, 4; -2, -1, -4, -3; 5, 6, 7, 8; -6, -5, -8, -7] 16 | # 17 | # a+b 18 | # a-b 19 | # a+1/b = (ab+1)/b 20 | # a-1/b = (ab-1)/b 21 | # b+1/a = (ab+1)/a 22 | # b-1/a = (ab-1)/a 23 | # 1/a+1/b = (a+b)/ab deived 24 | # 1/a-1/b = (b-a)/ab derived 25 | # 26 | # But then we can consider our magic properties so tat 27 | # 28 | # 29 | # 30 | # Taking all pairs (a,b) with a <= b we have n(n+1)/2 pairs each of 8 cases => 8n (n+1)/2 31 | # 32 | # e.g. 16bit as n=13 we have 13*8 (13+1)/2 instead of 33 | # 34 | # Emanuele Ruffaldi 2017 35 | 36 | 37 | import punum 38 | import argparse 39 | import fractions 40 | import operator 41 | import tabulate 42 | 43 | 44 | def main(): 45 | t1 =["half","twice","square","exp2","float"] 46 | t2 =["times","plus"] 47 | parser = argparse.ArgumentParser(description='Table generator') 48 | parser.add_argument('--points',nargs="+",type=int,help="lattice points") 49 | parser.add_argument('--op1',choices=t1,nargs="+",default=t1) 50 | parser.add_argument('--op2',choices=t2,nargs="+",default=t2) 51 | parser.add_argument('--p3',action="store_true") 52 | parser.add_argument('--p4',action="store_true") 53 | parser.add_argument('--p5',action="store_true") 54 | parser.add_argument('--p8',action="store_true") 55 | parser.add_argument('--p8a',action="store_true") 56 | parser.add_argument('--p16a',action="store_true") 57 | parser.add_argument('--verbose',action="store_true") 58 | parser.add_argument('--sparsetab') 59 | parser.add_argument('--pickle') 60 | args = parser.parse_args() 61 | if args.points is not None: 62 | if args.points[0] != 1: 63 | args.points = [1] + args.points 64 | args.points.sort() 65 | alpha = punum.Alphabet(args.points) 66 | else: 67 | if args.p3: 68 | alpha = punum.Alphabet.p3() 69 | elif args.p4: 70 | alpha = punum.Alphabet.p4() 71 | elif args.p5: 72 | alpha = punum.Alphabet.p5() 73 | elif args.p8: 74 | alpha = punum.Alphabet.p8() 75 | elif args.p8a: 76 | alpha = punum.Alphabet.p8a() 77 | elif args.p16a: 78 | alpha = punum.Alphabet.p16a() 79 | 80 | 81 | print ("with",len(alpha.eexacts),"obtain",alpha.n," and ",alpha.n2) 82 | print (alpha.eexacts) 83 | 84 | 85 | # from arguments to operation groups 86 | ops1=[] 87 | ops2=[] 88 | asfloat=False 89 | for aop in args.op2: 90 | if aop == "plus": 91 | op = operator.add 92 | elif aop == "times": 93 | op = operator.mul 94 | else: 95 | print ("skipping op",aop) 96 | continue 97 | ops2.append((aop,op)) 98 | 99 | two = alpha.convert(2) 100 | for aop in args.op1: 101 | if aop == "half": 102 | op = lambda x: x/two 103 | elif aop == "twice": 104 | op = lambda x: x*two 105 | elif aop == "square": 106 | op = lambda x: x*x 107 | elif aop == "exp2": 108 | op = lambda x: math.exp2(x) 109 | elif aop == "float": 110 | asfloat=True 111 | continue 112 | else: 113 | print ("unknown op",aop) 114 | continue 115 | ops1.append((aop,op)) 116 | 117 | # prepare the outputs with flag and array 118 | outputs = {} 119 | for aop,op in ops2: 120 | outputs[aop] = (2,[]) 121 | for aop,op in ops1: 122 | outputs[aop] = (1,[]) 123 | if asfloat: 124 | outputs["float"] = (-1,[]) 125 | # also listing 126 | outputs["exacts"] = (0,alpha.eexacts) 127 | 128 | # enumerate all the exacts (i is correctly orderer 1/4) 129 | for i,ae in enumerate(alpha.eexacts): 130 | a = alpha.fromexactsindex(i) # fraction 131 | ia = ~a 132 | 133 | # direct conversion table 134 | if asfloat: 135 | outputs["float"][1].append(dict(a=a,ai=a.v,y=a.exactvalue())) 136 | outputs["float"][1].append(dict(a=ia,ai=ia.v,y=ia.exactvalue())) 137 | 138 | # unaries for the 4 cases: +- 1/x -1/x 139 | for aop,op in ops1: 140 | q = outputs[aop][1] 141 | ma =-a; 142 | mia=-ia; 143 | y = op(a) 144 | iy = op(ia) 145 | my = op(ma) 146 | miy = op(mia) 147 | # 4 variants 148 | q.append(dict(a=a,ai=a.v,yi=y.v,y=y.exactvalue())) 149 | q.append(dict(a=ma,ai=ma.v,yi=my.v,y=my.exactvalue())) 150 | q.append(dict(a=ia,ai=ia.v,yi=iy.v,y=iy.exactvalue())) 151 | q.append(dict(a=mia,ai=mia.v,yi=miy.v,y=miy.exactvalue())) 152 | 153 | for j,be in enumerate(alpha.eexacts): 154 | if j < i: 155 | continue 156 | b = alpha.fromexactsindex(j) 157 | # 8 cases 158 | ib = ~b 159 | if i == 0: 160 | if j == 0: 161 | # exactly 1+1 or 1-1 162 | # keep first v < second 163 | whats = [(a,b),(-a,b)] 164 | else: 165 | # a is 1, j > 1 166 | # keep first v < second 167 | whats = [(a,b),(a,-b),(-a,-ib),(a,-ib)] 168 | elif i == j: 169 | whats = [(a,a),(a,-a),(ia,a),(ia,ia),(a,-ia)] 170 | else: 171 | # both > 1 172 | whats = [(a,b),(a,-b),(ia,b),(-a,-ib),(-ia,-ib),(ia,-ib),(ia,-b),(a,-ib)] 173 | for k,(xa,xb) in enumerate(whats): 174 | print (xa.v,xb.v,ae,be,k,len(whats)) 175 | x1 = xa.exactvalue() 176 | x2 = xb.exactvalue() 177 | for aop,op in ops2: 178 | rr = outputs[aop][1] 179 | y = op(x1,x2) 180 | uy = alpha.convert(y) 181 | rr.append(dict(ai=xa.v,bi=xb.v,a=x1,b=x2,y=y,yi=uy.v)) 182 | if args.pickle: 183 | outputs["two"] = (-2,[two.v,inv(two).v]) 184 | pickle.dump(outputs,open(args.pickle,"wb"),protocol=pickle.HIGHEST_PROTOCOL) 185 | elif args.sparsetab: 186 | for x,y in outputs.items(): 187 | o = open(args.sparsetab+x,"w",encoding="utf8") 188 | mode,content = y 189 | if mode == 2: 190 | # binop 191 | for x in content: 192 | o.write("%d %d %d\n" % (x["ai"],x["bi"],x["yi"])) 193 | elif mode == 1: 194 | # unaryop 195 | for x in content: 196 | o.write("%d %d\n" % (x["ai"],x["yi"])) 197 | elif mode == -1: 198 | # float 199 | for x in content: 200 | o.write("%d %f\n" % (x["ai"],x["y"])) 201 | elif mode == 0: 202 | # exact list 203 | for x in y[1]: 204 | o.write("%d\n" % (x["ai"])) 205 | else: 206 | for x,y in outputs.items(): 207 | mode,content = y 208 | print ("\n",x,mode,"\n") 209 | print (tabulate.tabulate(content)) 210 | 211 | 212 | 213 | 214 | if __name__ == '__main__': 215 | main() -------------------------------------------------------------------------------- /src/binary8.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Zbinary8 library 3 | * 4 | * Emanuele Ruffaldi 2017 5 | */ 6 | #include "binary8.hpp" 7 | 8 | binary8tab::binary8tab(int a) 9 | { 10 | } 11 | 12 | binary8tab::binary8tab(float a) 13 | { 14 | } 15 | 16 | binary8tab::binary8tab(double a) 17 | { 18 | } 19 | -------------------------------------------------------------------------------- /src/binary8_gen.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Emanuele Ruffaldi (C) 2017 3 | * Templated C++ Posit 4 | */ 5 | #include 6 | #include 7 | #include 8 | #include "binary8.hpp" 9 | #include "anyfloat.hpp" 10 | 11 | #define SIGNEX(v, sb) ((v) | (((v) & (1 << (sb))) ? ~((1 << (sb))-1) : 0)) 12 | 13 | int main(int argc, char const *argv[]) 14 | { 15 | // template 16 | // match the posit8 from posit8.hpp 17 | using X=anyfloat_emu; 18 | union Q { 19 | float f; 20 | uint32_t i; 21 | } ; 22 | int8_t opadd[256*256],opmul[256*256],opdiv[256*256],opinv[256],opsquare[256],opexp2[256],ophalf[256]; 23 | uint32_t op2float[256]; 24 | 25 | for(int16_t s = -128 ; s < 128; s++) 26 | { 27 | X x; 28 | x.v = s; 29 | int32_t i = ((unsigned int)(uint16_t)s) & 0xFF; 30 | float fx(x); 31 | Q fxq; 32 | fxq.f = fx; 33 | 34 | opinv[i] = x.inv().v; 35 | opsquare[i] = X(fx*fx).v; 36 | op2float[i] = fxq.i; 37 | ophalf[i] = X(fx/2).v; 38 | opexp2[i] = X(exp2(fx)).v; 39 | //std::cout << "twice " << (X)(fx*2) << " half " << (X)(fx/2) << std::endl; 40 | } 41 | 42 | for(int16_t s1 = -128 ; s1 < 128; s1++) 43 | { 44 | X x1; 45 | x1.v = s1; 46 | int32_t i1 = ((unsigned int)(uint8_t)s1) & 0xFF; 47 | float f1 = uint32_to_float(op2float[i1]); 48 | 49 | for(int16_t s2 = -128 ; s2 < 128; s2++) 50 | { 51 | X x2; 52 | x2.v = s2; 53 | int32_t i2 = ((unsigned int)(uint8_t)s2) & 0xFF; 54 | float f2 = uint32_to_float(op2float[i2]); 55 | 56 | opadd[i1*256+i2] = X(f1+f2).v; 57 | opmul[i1*256+i2] = X(f1*f2).v; 58 | opdiv[i1*256+i2] = X(f1/f2).v; 59 | //opadd[i*256+j] = (x*y).v; 60 | } 61 | } 62 | 63 | std::ofstream onf(argc == 1 ? "binary8_tbl.cpp" : argv[1]); 64 | onf << "#include \nnamespace binary8ns {\n"; 65 | onf << "// type is " << typeid(X).name() << std::endl; 66 | onf << "int8_t opadd[] = {\n"; 67 | for(int i = 0; i < 256*256; i++) 68 | onf << (int)(opadd[i]) << ","; 69 | onf << "}; " << std::endl; 70 | // emit the numbers as C file 71 | onf << "int8_t opmul[] = {\n"; 72 | for(int i = 0; i < 256*256; i++) 73 | onf << (int)(opmul[i]) << ","; 74 | onf << "}; " << std::endl; 75 | onf << "int8_t opdiv[] = {\n"; 76 | for(int i = 0; i < 256*256; i++) 77 | onf << (int)(opdiv[i]) << ","; 78 | onf << "}; " << std::endl; 79 | onf << "int8_t opinv[] = {\n"; 80 | for(int i = 0; i < 256; i++) 81 | onf << (int)(opinv[i]) << ","; 82 | onf << "}; " << std::endl; 83 | onf << "int8_t opsquare[] = {\n"; 84 | for(int i = 0; i < 256; i++) 85 | onf << (int)(opsquare[i]) << ","; 86 | onf << "}; " << std::endl; 87 | onf << "int8_t opexp2[] = {\n"; 88 | for(int i = 0; i < 256; i++) 89 | onf << (int)(opexp2[i]) << ","; 90 | onf << "}; " << std::endl; 91 | 92 | onf << "int8_t ophalf[] = {\n"; 93 | for(int i = 0; i < 256; i++) 94 | onf << (int)(ophalf[i]) << ","; 95 | onf << "}; " << std::endl; 96 | 97 | onf << "uint32_t op2float[] = {\n"; 98 | for(int i = 0; i < 256; i++) 99 | onf << (op2float[i]) << ","; 100 | onf << "};} " << std::endl; 101 | return 0; 102 | } 103 | 104 | -------------------------------------------------------------------------------- /src/floatTop8.cpp: -------------------------------------------------------------------------------- 1 | #include "mex.h" 2 | //#include "zposit8.hpp" 3 | #include "posit.h" 4 | #include 5 | using zposit_type = Posit; 6 | 7 | 8 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) 9 | { 10 | if(nrhs != 1 && nlhs != 1) 11 | { 12 | mexPrintf("floatTozp8(input) -> output\n"); 13 | return; 14 | } 15 | bool complex = mxIsComplex(prhs[0]); 16 | const auto ndims = mxGetNumberOfDimensions(prhs[0]); 17 | const mwSize * dimi = mxGetDimensions(prhs[0]); 18 | plhs[0] = mxCreateUninitNumericArray(ndims,(mwSize*)dimi,mxINT8_CLASS,!complex ? mxREAL: mxCOMPLEX); 19 | zposit_type * dst = (zposit_type*)mxGetData(plhs[0]); 20 | int n = mxGetNumberOfElements(prhs[0])*(complex?2:1); 21 | switch(mxGetClassID(prhs[0])) 22 | { 23 | case mxDOUBLE_CLASS: 24 | // double precision 25 | { 26 | double * src = (double*)mxGetData(prhs[0]); 27 | for(int i = 0;i < n; i++) 28 | { 29 | dst[i] = zposit_type(src[i]); 30 | } 31 | } 32 | break; 33 | case mxSINGLE_CLASS: 34 | { 35 | float * src = (float*)mxGetData(prhs[0]); 36 | for(int i = 0;i < n; i++) 37 | { 38 | dst[i] = zposit_type(src[i]); 39 | } 40 | } 41 | break; 42 | case mxINT16_CLASS: 43 | case mxUINT16_CLASS: 44 | { 45 | halffloat * src = (halffloat*)mxGetData(prhs[0]); 46 | for(int i = 0;i < n; i++) 47 | { 48 | dst[i] = zposit_type(typename zposit_type::UnpackedT(halffloat(src[i]))); // wrap cast to unpacked then to posit 49 | } 50 | } 51 | break; 52 | default: 53 | mexErrMsgTxt("Unsupported type of output: only double, single or 16-bit has half float\n"); 54 | break; 55 | } 56 | 57 | } -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Emanuele Ruffaldi (C) 2017 3 | * Templated C++ Posit 4 | */ 5 | #include "posit.h" 6 | 7 | using X=Posit ; // es, total 8 | #include 9 | 10 | template 11 | void checkfloat(T f) 12 | { 13 | union { 14 | T f; 15 | W i; 16 | } tmp; 17 | tmp.f = f; 18 | W x[1] = {tmp.i}; 19 | std::cout << "--\n"; 20 | X::UnpackedT fu(f); 21 | std::cout << "f:float " << f << std::endl; 22 | std::cout << "f:float hex " << std::hex << x[0] << std::dec << std::endl; 23 | std::cout << "fu:unpacked" << fu << std::endl; 24 | T fuf = (float)fu; 25 | tmp.f = fu; 26 | x[0] = {tmp.i}; 27 | std::cout << "fuf: float " << fuf << std::endl; 28 | std::cout << "fuf:float hex " << std::hex << x[0] << std::dec << std::endl; 29 | auto fup = X(fu); 30 | std::cout << "fup is " << std::hex << fup << std::endl; 31 | //X q; 32 | //q.setBits(0x10); 33 | //up = q.v; 34 | X::UnpackedT fupu(fup.unpack()); 35 | std::cout << "fupu " << fupu << std::endl; 36 | T fupuf = fupu.pack_float(); 37 | std::cout << "fupuf " << fupuf << std::endl; 38 | 39 | if(f != fuf) 40 | { 41 | std::cout << "ERROR fuf vs f\n"; 42 | } 43 | 44 | if(fupu != fu) 45 | { 46 | std::cout << "ERROR fupu != fu\n"; 47 | } 48 | else if(fupuf != f) 49 | { 50 | std::cout << "ERROR fupuf vs fup\n"; 51 | } 52 | } 53 | 54 | template 55 | void checkup(X::UnpackedT u) 56 | { 57 | W x[1]; 58 | std::cout << "--\n"; 59 | std::cout << "u:unpacked" << u << std::endl; 60 | T uf = u.pack_float(); 61 | union { 62 | T f; 63 | W i; 64 | } tmp; 65 | tmp.f = uf; 66 | x[0] = {tmp.i}; 67 | std::cout << "uf: float " << uf << std::endl; 68 | std::cout << "uf:float hex " << std::hex << x[0] << std::dec << std::endl; 69 | X::UnpackedT ufu(uf); 70 | std::cout << "ufu " << ufu << std::endl; 71 | 72 | auto up = X(u); 73 | std::cout << "fup is " << std::hex << up << std::endl; 74 | //X q; 75 | //q.setBits(0x10); 76 | //up = q.v; 77 | X::UnpackedT upu(up.unpack()); 78 | std::cout << "fupu " << upu << std::endl; 79 | T upuf = upu.pack_float(); 80 | std::cout << "fupuf " << upuf << std::endl; 81 | 82 | if(ufu != u) 83 | { 84 | std::cout << "ERROR ufu != u\n"; 85 | } 86 | 87 | } 88 | 89 | int main(int argc, char const *argv[]) { 90 | std::cout << "N " << X::PT::POSIT_SIZE << " " << X::PT::POSIT_ESP_SIZE << std::endl; 91 | std::cout << "Configuration " << std::hex << " MSB = " << X::PT::POSIT_MSB << " Mask=" << X::PT::POSIT_MASK 92 | << " Sign=" << X::PT::POSIT_SIGNBIT << " one=" << X::PT::POSIT_ONE << " inf=" << X::PT::POSIT_PINF 93 | << " half=" << (double)X(X::PT::POSIT_HALF) << " twice=" << (double)X(X::PT::POSIT_TWO) 94 | << " min=" << (double)X(X::PT::POSIT_MINNEG) << " max=" << X(X::PT::POSIT_MAXPOS) 95 | << " maxexponent " << X::PT::maxexponent() << std::dec << std::endl; 96 | if(false) 97 | { 98 | float values[] = {1, 2, INFINITY, NAN, 0, 4.0, 10.0, -1.0, -2.0, -4.0, -10.0, 1 / 2.0, 1 / 4.0}; 99 | 100 | using TQ = double; 101 | using TI = uint64_t; 102 | // TODO: bugs with fractional 1/2 and 1/4 103 | for (unsigned int i = 0; i < sizeof(values) / sizeof(values[0]); i++) { 104 | checkfloat(values[i]); 105 | } 106 | std::cout << "Zero is " << X::zero().v << " unpacked " << X::zero().unpack() << std::endl; 107 | X x(2.0); 108 | std::cout << "Inverse " << x << " " << x.inv() << " " << X(0.5) << std::endl; 109 | //static_assert(X(2.0).inv() == X(0.5),"test"); // PROBLEM DUE to UNION 110 | std::cout << "---------\n"; 111 | std::cout << "TESTING UNPACKED\n"; 112 | // now check the unpack 113 | checkup(X::UnpackedT(40,0,true)); 114 | checkup(X::UnpackedT(X::PT::maxexponent(),0,true)); 115 | checkup(X::UnpackedT(-X::PT::maxexponent(),0,true)); 116 | checkup(X::UnpackedT(X::PT::maxexponent(),0,false)); 117 | checkup(X::UnpackedT(-X::PT::maxexponent(),0,false)); 118 | } 119 | 120 | X::UnpackedT yu(0.3f); 121 | std::cout << "YUff " << (float)yu << std::endl; 122 | std::cout << "YUfd " << (double)yu << std::endl; 123 | 124 | X::UnpackedT yud(0.3); 125 | std::cout << "YUdf " << (float)yud << std::endl; 126 | std::cout << "YUdd " << (double)yud << std::endl; 127 | X y(0.3); 128 | std::cout << "One " << (float)X::one() << std::endl; 129 | std::cout << "Is in normalized range: " << (float)X::one() << " " << X::one().isUnitRange() << std::endl; 130 | std::cout << "Is in normalized range: " << (float)y << " " << y.isUnitRange() << std::endl; 131 | std::cout << "Is !negative " << y.isnegative() << std::endl; 132 | std::cout << "One Minus " << (float)y << "(hex " << std::hex << y.v << ") " << (float)(X::one()-y) << "(hex " << (X::one()-y).v << ") and fast " << (float)(y.urOneMinus()) << " (hex " << (y.urOneMinus()).v << ")"<< std::endl; 133 | 134 | /* 135 | int bits[3]; 136 | int rs,es; 137 | uint64_t fs; 138 | std::cout << "y unpacked is " << y.unpack() << std::endl; 139 | y.analy(bits[0],bits[1],bits[2],rs,es,fs); 140 | std::cout << "analyzing " << std::hex << y.v << " bits:" << bits[0] << " " << bits[1] << " " << bits[2] << " fields (rs,es,fs) " << rs << " " << es << " " << fs << std::endl; 141 | */ 142 | return 0; 143 | } 144 | -------------------------------------------------------------------------------- /src/maineigen.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Emanuele Ruffaldi (C) 2017 3 | * Templated C++ Posit 4 | */ 5 | #include "positeigen.h" 6 | 7 | // https://github.com/libigl/eigen/blob/master/unsupported/Eigen/MPRealSupport 8 | int main(int argc, char const *argv[]) 9 | { 10 | // TODO VERIFY MORE 11 | using P=Posit; 12 | using UT=typename P::UnpackedT; 13 | using MatrixP=Eigen::Matrix ; 14 | using VectorP=Eigen::Matrix ; 15 | 16 | MatrixP A = Eigen::MatrixXf::Random(10,10).cast

(); 17 | MatrixP B = MatrixP::Ones(1,1); 18 | MatrixP C = MatrixP::Ones(1,1); 19 | 20 | // std::cout << "Resulting A is " << (B.cwiseProduct(A)).cast() << std::endl; 21 | //std::cout << "Resulting B+C is " << (B+C).cast() << std::endl; 22 | //P A = MatrixP::Reandom(10,10); 23 | Eigen::VectorXd af(4),bf(4); 24 | af<< 3.2e8, 1, -1, 8.0e7; 25 | bf << 4.0e7, 1, -1, -1.6e8; 26 | //af<< 3.2e8, 8.0e7,1,-1; 27 | //bf << 4.0e7, -1.6e8,1,-1; 28 | VectorP ap,bp; 29 | ap= af.cast

(); 30 | bp= bf.cast

(); 31 | std::cout << "3.2e8 ==> unpacked " << UT(af(0)) << std::endl; 32 | std::cout << "3.2e8 ==> unpacked ==> double " << (double)P(UT(af(0))) << std::endl; 33 | std::cout << "af0 is: " << ap(0).v << " " << ap(0) << std::endl; 34 | std::cout << "af1 is: " << ap(1).v << " " << ap(1) << std::endl; 35 | VectorP abp = ap.cwiseProduct(bp); 36 | 37 | std::cout << "af " << af.transpose() << std::endl; 38 | //std::cout << "ap " << ap.transpose() << std::endl; 39 | std::cout << "ap " << ap.cast().transpose() << std::endl; 40 | std::cout << "----"<< std::endl; 41 | std::cout << "bf " << bf.transpose() << std::endl; 42 | //std::cout << "bp " << bp.transpose() << std::endl; 43 | std::cout << "bp " << bp.cast().transpose() << std::endl; 44 | 45 | std::cout << "----"<< std::endl; 46 | std::cout << "af prod bf " << af.cwiseProduct(bf).transpose() << std::endl; 47 | std::cout << "af dot bf: " << af.dot(bf) << std::endl; 48 | std::cout << "----"<< std::endl; 49 | 50 | std::cout << "ap prod bp (double)" << abp.cast().transpose() << std::endl; 51 | std::cout << "one is " << std::hex << P::PT::POSIT_ONE << std::endl; 52 | std::cout << "ap prod bp (posit)" << posit_formatter

(abp[0]) << " "<< posit_formatter

(abp[1]) << " " << posit_formatter

(abp[2]) << " " << posit_formatter

(abp[3]) << std::endl; 53 | //std::cout << "ap prod bp " << abp.cast().transpose() << std::endl; 54 | std::cout << "sum(ap .* bp) " << abp.sum() << " " << abp.sum().unpack() << " " << (double)abp.sum() << std::endl; 55 | std::cout << "ap dot bp: " << ap.dot(bp).unpack() << " " << (double)(ap.dot(bp)) << std::endl; // SHOULD PRODUCE 2 and not ZERO 56 | 57 | return 0; 58 | } -------------------------------------------------------------------------------- /src/posit10.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * ZPosit8 library 3 | * 4 | * Emanuele Ruffaldi 2017 5 | */ 6 | #include "posit10.hpp" 7 | 8 | 9 | std::ostream & operator << (std::ostream & ons, const posit10 & p) 10 | { 11 | ons << "posit(" << p.uu() << ")" ; 12 | return ons; 13 | } 14 | posit10::posit10(int a) 15 | { 16 | if(a == 0) 17 | v = 0; 18 | else if(a == 1) 19 | v = 0x100; 20 | else 21 | v = FPT(a).v; 22 | } 23 | 24 | posit10::posit10(float a) 25 | { 26 | if(a == 0) 27 | v = 0; 28 | else if(a == 1) 29 | v = 0x100; 30 | else 31 | v = FPT(a).v; 32 | } 33 | 34 | posit10::posit10(double a) 35 | { 36 | if(a == 0) 37 | v = 0; 38 | else if(a == 1) 39 | v = 0x100; 40 | else 41 | v = FPT(a).v; 42 | 43 | } 44 | 45 | /** 46 | Positive NUmber families 47 | 48 | 00000000 49 | .. 50 | 00001100 = .1875 51 | .. 52 | 00010000 = .25 53 | .. 54 | 00011000 = 0.375 55 | .. 56 | 00100001 = 0.5 57 | .. 58 | 00110000 = 0.75 59 | .. 60 | 01000000 = 1 61 | .. 62 | 01010000 = 1.5 63 | .. 64 | 01100000 = 2 65 | ... 66 | 01110000 = 4 67 | ... 68 | 01111111 = max 69 | 10000000 = infinity 70 | */ 71 | 72 | /* 73 | posit10 posit10::half() const 74 | { 75 | int8_t aa = v < 0 ? -v : v; 76 | if(has_neg_exponent()) // [0..1) 77 | aa = aa >> 1; // down to 0 78 | else if aa < 2 // [1..2) 79 | aa = ((aa & 0x3F)|0x20); 80 | else 81 | aa = ((aa << 1) & 0x7F) | 0x40; // down to 01000000 == 0x4 82 | return v < 0 ? -aa: aa; 83 | */ 84 | 85 | 86 | posit10 posit10::twice() const 87 | { 88 | /* 89 | int8_t aa = v < 0 ? -v : v; 90 | if(has_neg_exponent()) // [0..1) 91 | aa = (aa << 1) & 0x3F; // up to [1..] 92 | else // [2...] 93 | aa = (aa >> 1) | 0x40; // up to 011111111 without overflow 94 | return v < 0 ? -aa: aa; 95 | */ 96 | return (posit10)(as_posit()*(FPT)2); 97 | } 98 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /src/posit10_gen.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Emanuele Ruffaldi (C) 2017 3 | * Templated C++ Posit 4 | */ 5 | #include 6 | #include 7 | #include 8 | #include "posit.h" 9 | #include "posit10.hpp" 10 | 11 | #define SIGNEX(v, sb) ((v) | (((v) & (1 << (sb))) ? ~((1 << (sb))-1) : 0)) 12 | 13 | int16_t opinv[1024],opsquare[1024],opexp2[1024],ophalf[1024]; 14 | uint32_t op2float[1024]; 15 | int16_t opadd[1024*1024],opmul[1024*1024],opdiv[1024*1024]; 16 | 17 | 18 | int main(int argc, char const *argv[]) 19 | { 20 | // match the posit16 from posit16.hpp 21 | using X=Posit; 22 | union Q { 23 | float f; 24 | uint32_t i; 25 | } ; 26 | for(int16_t s = -512 ; s < 512; s++) 27 | { 28 | X x; 29 | x.v = s; 30 | int32_t i = ((unsigned int)(uint16_t)s) & 0x03FF; 31 | float fx(x); 32 | Q fxq; 33 | fxq.f = fx; 34 | opinv[i] = x.inv().v; 35 | opsquare[i] = X(fx*fx).v; 36 | op2float[i] = fxq.i; 37 | ophalf[i] = X(fx/2).v; 38 | opexp2[i] = X(exp2(fx)).v; 39 | } 40 | 41 | for(int16_t s1 = -512 ; s1 < 512; s1++) 42 | { 43 | X x1; 44 | x1.v = s1; 45 | int32_t i1 = ((unsigned int)(uint16_t)s1) & 0x03FF; 46 | float f1 = uint32_to_float(op2float[i1]); 47 | 48 | for(int16_t s2 = -512 ; s2 < 512; s2++) 49 | { 50 | X x2; 51 | x2.v = s2; 52 | int32_t i2 = ((unsigned int)(uint16_t)s2) & 0x03FF; 53 | float f2 = uint32_to_float(op2float[i2]); 54 | 55 | opadd[i1*1024+i2] = X(f1+f2).v; 56 | opmul[i1*1024+i2] = X(f1*f2).v; 57 | opdiv[i1*1024+i2] = X(f1/f2).v; 58 | //opadd[i*1024+j] = (x*y).v; 59 | } 60 | 61 | } 62 | 63 | std::ofstream onf(argc == 1 ? "posit10_tbl.cpp": argv[1]); 64 | onf << "#include \nnamespace posit10ns {\n"; 65 | onf << "// type is " << typeid(X).name() << std::endl; 66 | onf << "int16_t opmul[] = {\n"; 67 | for(int i = 0; i < 1024*1024; i++) 68 | onf << (int)(opmul[i]) << ","; 69 | onf << "}; " << std::endl; 70 | onf << "int16_t opadd[] = {\n"; 71 | for(int i = 0; i < 1024*1024; i++) 72 | onf << (int)(opadd[i]) << ","; 73 | onf << "}; " << std::endl; 74 | onf << "int16_t opdiv[] = {\n"; 75 | for(int i = 0; i < 1024*1024; i++) 76 | onf << (int)(opdiv[i]) << ","; 77 | onf << "}; " << std::endl; 78 | onf << "int16_t opinv[] = {\n"; 79 | for(int i = 0; i < 1024; i++) 80 | onf << (int)(opinv[i]) << ","; 81 | onf << "}; " << std::endl; 82 | 83 | onf << "int16_t ophalf[] = {\n"; 84 | for(int i = 0; i < 1024; i++) 85 | onf << (int)(ophalf[i]) << ","; 86 | onf << "}; " << std::endl; 87 | 88 | onf << "int16_t opsquare[] = {\n"; 89 | for(int i = 0; i < 1024; i++) 90 | onf << (int)(opsquare[i]) << ","; 91 | onf << "}; " << std::endl; 92 | onf << "int16_t opexp2[] = {\n"; 93 | for(int i = 0; i < 1024; i++) 94 | onf << (int)(opexp2[i]) << ","; 95 | onf << "}; " << std::endl; 96 | onf << "uint32_t op2float[] = {\n"; 97 | for(int i = 0; i < 1024; i++) 98 | onf << (op2float[i]) << ","; 99 | onf << "};} " << std::endl; 100 | return 0; 101 | } 102 | -------------------------------------------------------------------------------- /src/posit12.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * ZPosit8 library 3 | * 4 | * Emanuele Ruffaldi 2017 5 | */ 6 | #include "posit12.hpp" 7 | 8 | 9 | std::ostream & operator << (std::ostream & ons, const posit12 & p) 10 | { 11 | ons << "posit(" << p.uu() << ")" ; 12 | return ons; 13 | } 14 | posit12::posit12(int a): v((FPT(a)).v) 15 | { 16 | } 17 | 18 | posit12::posit12(float a): v((FPT(a)).v) 19 | { 20 | } 21 | 22 | posit12::posit12(double a) : v((FPT(a)).v) 23 | { 24 | } 25 | 26 | /** 27 | Positive NUmber families 28 | 29 | 00000000 30 | .. 31 | 00001100 = .1875 32 | .. 33 | 00010000 = .25 34 | .. 35 | 00011000 = 0.375 36 | .. 37 | 00100001 = 0.5 38 | .. 39 | 00110000 = 0.75 40 | .. 41 | 01000000 = 1 42 | .. 43 | 01010000 = 1.5 44 | .. 45 | 01100000 = 2 46 | ... 47 | 01110000 = 4 48 | ... 49 | 01111111 = max 50 | 10000000 = infinity 51 | */ 52 | 53 | posit12 posit12::half() const 54 | { 55 | /* 56 | int8_t aa = v < 0 ? -v : v; 57 | if(has_neg_exponent()) // [0..1) 58 | aa = aa >> 1; // down to 0 59 | else if aa < 2 // [1..2) 60 | aa = ((aa & 0x3F)|0x20); 61 | else 62 | aa = ((aa << 1) & 0x7F) | 0x40; // down to 01000000 == 0x4 63 | return v < 0 ? -aa: aa; 64 | */ 65 | return (posit12)(as_posit()/(FPT)2); 66 | } 67 | 68 | 69 | posit12 posit12::twice() const 70 | { 71 | /* 72 | int8_t aa = v < 0 ? -v : v; 73 | if(has_neg_exponent()) // [0..1) 74 | aa = (aa << 1) & 0x3F; // up to [1..] 75 | else // [2...] 76 | aa = (aa >> 1) | 0x40; // up to 011111111 without overflow 77 | return v < 0 ? -aa: aa; 78 | */ 79 | return (posit12)(as_posit()*(FPT)2); 80 | } 81 | -------------------------------------------------------------------------------- /src/posit12_gen.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Emanuele Ruffaldi (C) 2017 3 | * Templated C++ Posit 4 | */ 5 | #include 6 | #include 7 | #include 8 | #include "posit.h" 9 | #include "posit12.hpp" 10 | 11 | #define SIGNEX(v, sb) ((v) | (((v) & (1 << (sb))) ? ~((1 << (sb))-1) : 0)) 12 | 13 | int16_t opinv[4096],opsquare[4096],opexp2[4096]; 14 | uint32_t op2float[4096]; 15 | 16 | int main(int argc, char const *argv[]) 17 | { 18 | // match the posit16 from posit16.hpp 19 | using X=Posit; 20 | union Q { 21 | float f; 22 | uint32_t i; 23 | } ; 24 | for(int16_t s = -2048 ; s < 2048; s++) 25 | { 26 | X x; 27 | x.v = s; 28 | int32_t i = ((unsigned int)(uint16_t)s) & 0x0FFF; 29 | float fx = (float)x; 30 | //std::cout << s << " " << i << " " << fx << std::endl; 31 | Q fxq; 32 | fxq.f = fx; 33 | opinv[i] = x.inv().v; 34 | opsquare[i] = X(fx*fx).v; 35 | op2float[i] = fxq.i; 36 | opexp2[i] = X(exp2(fx)).v; 37 | //std::cout << "twice " << (X)(fx*2) << " half " << (X)(fx/2) << std::endl; 38 | /* 39 | for(int j = 0; j < 4096; j++) 40 | { 41 | X y(SIGNEX(j,X::vtotalbits-1)); 42 | float fy = (float)y; 43 | opadd[i*4096+j] = X(fx+fy).v; 44 | opmul[i*4096+j] = X(fx*fy).v; 45 | opdiv[i*4096+j] = X(fx/fy).v; 46 | //opadd[i*4096+j] = (x*y).v; 47 | } 48 | */ 49 | } 50 | 51 | std::ofstream onf(argc == 1 ? "posit12_tbl.cpp": argv[1]); 52 | onf << "#include \nnamespace posit12ns {\n"; 53 | onf << "// type is " << typeid(X).name() << std::endl; 54 | onf << "int16_t opinv[] = {\n"; 55 | for(int i = 0; i < 4096; i++) 56 | onf << (int)(opinv[i]) << ","; 57 | onf << "}; " << std::endl; 58 | onf << "int16_t opsquare[] = {\n"; 59 | for(int i = 0; i < 4096; i++) 60 | onf << (int)(opsquare[i]) << ","; 61 | onf << "}; " << std::endl; 62 | onf << "int16_t opexp2[] = {\n"; 63 | for(int i = 0; i < 4096; i++) 64 | onf << (int)(opexp2[i]) << ","; 65 | onf << "}; " << std::endl; 66 | onf << "uint32_t op2float[] = {\n"; 67 | for(int i = 0; i < 4096; i++) 68 | onf << (op2float[i]) << ","; 69 | onf << "};} " << std::endl; 70 | return 0; 71 | } 72 | -------------------------------------------------------------------------------- /src/posit8.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * ZPosit8 library 3 | * 4 | * Emanuele Ruffaldi 2017 5 | */ 6 | #include "posit8.hpp" 7 | 8 | 9 | std::ostream & operator << (std::ostream & ons, const posit8 & p) 10 | { 11 | ons << "posit(" << p.uu() << ")" ; 12 | return ons; 13 | } 14 | posit8::posit8(int a) 15 | { 16 | if(a == 0) 17 | v = 0; 18 | else if(a == 1) 19 | v = 64; 20 | else 21 | v = FPT(a).v; 22 | 23 | } 24 | 25 | posit8::posit8(float a) 26 | { 27 | if(a == 0) 28 | v = 0; 29 | else if(a == 1) 30 | v = 64; 31 | else 32 | v = FPT(a).v; 33 | 34 | } 35 | 36 | posit8::posit8(double a) 37 | { 38 | if(a == 0) 39 | v = 0; 40 | else if(a == 1) 41 | v = 64; 42 | else 43 | v = FPT(a).v; 44 | 45 | } 46 | 47 | 48 | /** 49 | Positive NUmber families 50 | 51 | 00000000 52 | .. 53 | 00001100 = .1875 54 | .. 55 | 00010000 = .25 56 | .. 57 | 00011000 = 0.375 58 | .. 59 | 00100001 = 0.5 60 | .. 61 | 00110000 = 0.75 62 | .. 63 | 01000000 = 1 64 | .. 65 | 01010000 = 1.5 66 | .. 67 | 01100000 = 2 68 | ... 69 | 01110000 = 4 70 | ... 71 | 01111111 = max 72 | 10000000 = infinity 73 | */ 74 | /* 75 | int8_t aa = v < 0 ? -v : v; 76 | if(has_neg_exponent()) // [0..1) 77 | aa = aa >> 1; // down to 0 78 | else if aa < 2 // [1..2) 79 | aa = ((aa & 0x3F)|0x20); 80 | else 81 | aa = ((aa << 1) & 0x7F) | 0x40; // down to 01000000 == 0x4 82 | return v < 0 ? -aa: aa; 83 | */ 84 | 85 | 86 | 87 | posit8 posit8::twice() const 88 | { 89 | /* 90 | int8_t aa = v < 0 ? -v : v; 91 | if(has_neg_exponent()) // [0..1) 92 | aa = (aa << 1) & 0x3F; // up to [1..] 93 | else // [2...] 94 | aa = (aa >> 1) | 0x40; // up to 011111111 without overflow 95 | return v < 0 ? -aa: aa; 96 | */ 97 | return (posit8)(as_posit()*(FPT)2); 98 | } 99 | 100 | 101 | -------------------------------------------------------------------------------- /src/posit8_gen.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Emanuele Ruffaldi (C) 2017 3 | * Templated C++ Posit 4 | */ 5 | #include 6 | #include 7 | #include 8 | #include "posit.h" 9 | #include "posit8.hpp" 10 | 11 | #define SIGNEX(v, sb) ((v) | (((v) & (1 << (sb))) ? ~((1 << (sb))-1) : 0)) 12 | 13 | int main(int argc, char const *argv[]) 14 | { 15 | // match the posit8 from posit8.hpp 16 | using X=Posit; 17 | union Q { 18 | float f; 19 | uint32_t i; 20 | } ; 21 | int8_t opadd[256*256],opmul[256*256],opdiv[256*256],opinv[256],opsquare[256],opexp2[256],ophalf[256]; 22 | uint32_t op2float[256]; 23 | 24 | for(int16_t s = -128 ; s < 128; s++) 25 | { 26 | X x; 27 | x.v = s; 28 | int32_t i = ((unsigned int)(uint16_t)s) & 0xFF; 29 | float fx(x); 30 | Q fxq; 31 | fxq.f = fx; 32 | 33 | opinv[i] = x.inv().v; 34 | opsquare[i] = X(fx*fx).v; 35 | op2float[i] = fxq.i; 36 | ophalf[i] = X(fx/2).v; 37 | opexp2[i] = X(exp2(fx)).v; 38 | //std::cout << "twice " << (X)(fx*2) << " half " << (X)(fx/2) << std::endl; 39 | } 40 | 41 | for(int16_t s1 = -128 ; s1 < 128; s1++) 42 | { 43 | X x1; 44 | x1.v = s1; 45 | int32_t i1 = ((unsigned int)(uint8_t)s1) & 0xFF; 46 | float f1 = uint32_to_float(op2float[i1]); 47 | 48 | for(int16_t s2 = -128 ; s2 < 128; s2++) 49 | { 50 | X x2; 51 | x2.v = s2; 52 | int32_t i2 = ((unsigned int)(uint8_t)s2) & 0xFF; 53 | float f2 = uint32_to_float(op2float[i2]); 54 | 55 | opadd[i1*256+i2] = X(f1+f2).v; 56 | opmul[i1*256+i2] = X(f1*f2).v; 57 | opdiv[i1*256+i2] = X(f1/f2).v; 58 | //opadd[i*256+j] = (x*y).v; 59 | } 60 | } 61 | 62 | std::ofstream onf(argc == 1 ? "posit8_tbl.cpp" : argv[1]); 63 | onf << "#include \nnamespace posit8ns {\n"; 64 | onf << "// type is " << typeid(X).name() << std::endl; 65 | onf << "int8_t opadd[] = {\n"; 66 | for(int i = 0; i < 256*256; i++) 67 | onf << (int)(opadd[i]) << ","; 68 | onf << "}; " << std::endl; 69 | // emit the numbers as C file 70 | onf << "int8_t opmul[] = {\n"; 71 | for(int i = 0; i < 256*256; i++) 72 | onf << (int)(opmul[i]) << ","; 73 | onf << "}; " << std::endl; 74 | onf << "int8_t opdiv[] = {\n"; 75 | for(int i = 0; i < 256*256; i++) 76 | onf << (int)(opdiv[i]) << ","; 77 | onf << "}; " << std::endl; 78 | onf << "int8_t opinv[] = {\n"; 79 | for(int i = 0; i < 256; i++) 80 | onf << (int)(opinv[i]) << ","; 81 | onf << "}; " << std::endl; 82 | onf << "int8_t opsquare[] = {\n"; 83 | for(int i = 0; i < 256; i++) 84 | onf << (int)(opsquare[i]) << ","; 85 | onf << "}; " << std::endl; 86 | onf << "int8_t opexp2[] = {\n"; 87 | for(int i = 0; i < 256; i++) 88 | onf << (int)(opexp2[i]) << ","; 89 | onf << "}; " << std::endl; 90 | 91 | onf << "int8_t ophalf[] = {\n"; 92 | for(int i = 0; i < 256; i++) 93 | onf << (int)(ophalf[i]) << ","; 94 | onf << "}; " << std::endl; 95 | 96 | onf << "uint32_t op2float[] = {\n"; 97 | for(int i = 0; i < 256; i++) 98 | onf << (op2float[i]) << ","; 99 | onf << "};} " << std::endl; 100 | return 0; 101 | } 102 | -------------------------------------------------------------------------------- /src/simdposit8.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * ZPosit8 library 3 | * 4 | * Emanuele Ruffaldi 2017 5 | */ 6 | #include "simdposit8.hpp" 7 | 8 | 9 | 10 | 11 | std::ostream & operator << (std::ostream & ons, const simd_posit8_32 & p) 12 | { 13 | ons << (std::array)p; 14 | return ons; 15 | } 16 | simd_posit8_32::simd_posit8_32(int a) 17 | { 18 | auto q = FPT(a).v; 19 | std::cout << "initing from integer " << a << " back as " << (float)(FPT(FPT::DeepInit(),q)) << " as dec " << std::dec << (int)q << std::dec <() const 57 | { 58 | std::array r; 59 | #if 0 60 | union Q { 61 | float f; 62 | uint32_t i; 63 | }; 64 | alignas(__m256i) int8_t idata[32]; // uint8 if using table 65 | _mm256_store_si256((__m256i*)idata,v); 66 | uint32_t * pp = (uint32_t*)&r[0]; 67 | for(int i = 0; i < 32; i++) 68 | { 69 | Q xyz; 70 | xyz.f = (float)FPT(FPT::DeepInit(),idata[i]); 71 | pp[i] = xyz.i; 72 | } 73 | #else 74 | alignas(__m256i) uint8_t idata[32]; // uint8 if using table 75 | _mm256_store_si256((__m256i*)idata,v); 76 | uint32_t * pp = (uint32_t*)&r[0]; 77 | for(int i = 0; i < 32; i++) 78 | { 79 | pp[i] = posit8ns::op2float[idata[i]]; 80 | } 81 | #endif 82 | return r; 83 | } 84 | 85 | -------------------------------------------------------------------------------- /src/softfloat_gen.cpp: -------------------------------------------------------------------------------- 1 | #include "softfloat.hpp" 2 | #include 3 | #include 4 | 5 | int main(int argc, char const *argv[]) 6 | { 7 | { 8 | uint16_t opexp2[65536]; 9 | for(int i = 0; i < 65536; i++) 10 | { 11 | float16_t x; 12 | x.v = i; 13 | softfloat16 u(x); 14 | float uf = (float)u; 15 | float ef = exp2(uf); 16 | softfloat16 efu(ef); 17 | opexp2[i] = ((float16_t)efu).v; 18 | } 19 | std::ofstream onf(argc == 1 ? "float16_tbl.cpp":argv[1]); 20 | onf << "#include \nuint16_t float16_opexp2[] = {\n"; 21 | for(int i = 0; i < 256*256; i++) 22 | onf << (int)(opexp2[i]) << ","; 23 | onf << "}; " << std::endl; 24 | } 25 | } -------------------------------------------------------------------------------- /tests/itest_anyfloat.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include "anyfloat.hpp" 4 | 5 | #ifndef XFT 6 | #define XFT binary8_emu 7 | #warning "Using XFT type " XFT 8 | #endif 9 | void itest_anyfloat() { 10 | XFT a(3.52); 11 | XFT b(-1.0); 12 | XFT c; 13 | XFT ra = a + b; 14 | XFT rm = a * b; 15 | XFT rs = a - b; 16 | XFT rd = a / b; 17 | std::cout << "c=" << c << std::endl; 18 | std::cout << "a=" << a << std::endl; 19 | std::cout << "b=" << b << std::endl; 20 | std::cout << "1/a=" << a.inv() << std::endl; 21 | std::cout << "1/b=" << b.inv() << std::endl; 22 | std::cout << "1/1/a=" << a.inv().inv() << std::endl; 23 | std::cout << "1/1/b=" << b.inv().inv() << std::endl; 24 | std::cout << "a=b =" << (a >= b) << std::endl; 26 | std::cout << "|a| " << a.abs() << " -a " << a.neg() << std::endl; 27 | std::cout << "|b| " << b.abs() << " -b " << b.neg() << std::endl; 28 | std::cout << "a+b " << ra << std::endl; 29 | std::cout << "a*b " << rm << std::endl; 30 | std::cout << "a-b " << rs << std::endl; 31 | std::cout << "a/b " << rd << std::endl; 32 | std::cout << "inf " << XFT::pinf() << " to " << XFT::ninf() << std::endl; 33 | std::cout << "nan " << XFT::nan() << std::endl; 34 | std::cout << "max-min " << XFT::max() << " to " << XFT::min() << std::endl; 35 | if(sizeof(XFT)==1) 36 | { 37 | //for(int i = -128; i < 127; i++) 38 | // std::cout << i << ": " << XFT(XFT::DeepInit(),*(int*)(unsigned *)&i) << std::endl; 39 | for(int i = 0 ; i < 256; i++) 40 | { 41 | XFT x(XFT::DeepInit(),i); 42 | double d = x; 43 | double dd = XFT(d); 44 | unsigned int rawexp = (i >> XFT::vfractionbits) & 0x1F; //bitmask(XFT::vexpbits); 45 | 46 | /* 47 | TODO fix conversion from that double to binary8:: unpack to denormalized 48 | 1: 1.52588e-05 -> 0 0 49 | 2: 3.05176e-05 -> 0 0 50 | */ 51 | if(d != dd) 52 | std::cout << i << ": " << d << " -> " << dd << " " << (d==dd) << " exp " << rawexp << " hex " << std::hex << i << std::dec << std::endl; 53 | else 54 | std::cout << i << ": " << d << " exp " << rawexp << std::endl; 55 | 56 | } 57 | } 58 | } -------------------------------------------------------------------------------- /tests/itest_anyvsreallimit.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "floattraits.hpp" 3 | #include 4 | 5 | /* 6 | template 7 | struct any_floattrait 8 | 9 | template <> 10 | struct float2trait 11 | { 12 | using type = double; 13 | using trait = double_trait; 14 | }; 15 | */ 16 | #ifndef FT 17 | #error "Needs macro FT" 18 | #endif 19 | void itest_anyvsreallimit() 20 | { 21 | using Trait=float2trait::trait; 22 | using AT=any_floattrait; 23 | static_assert(AT::one_h == Trait::one_h,"same one"); 24 | static_assert(AT::two_h == Trait::two_h,"same two_h"); 25 | //static_assert(AT::afterone_h == Trait::afterone_h,"same afterone_h"); 26 | static_assert(AT::min_h == Trait::min_h,"same min_h"); 27 | static_assert(AT::max_h == Trait::max_h,"same max_h"); 28 | static_assert(AT::ninfinity_h == Trait::ninfinity_h,"same ninfinity_h"); 29 | static_assert(AT::pinfinity_h == Trait::pinfinity_h,"same pinfinity_h"); 30 | static_assert(AT::nan_h == Trait::nan_h,"same nan_h"); 31 | } -------------------------------------------------------------------------------- /tests/itest_constexpr.cpp: -------------------------------------------------------------------------------- 1 | #include "posit.h" 2 | 3 | template 4 | void pp(P po) 5 | { 6 | std::cout << std::hex << po.v << " " << po << " == " << (float)po << std::endl; 7 | } 8 | 9 | int main() 10 | { 11 | using X1=Posit; 12 | static_assert(X1::one() == X1::one()*X1::one(),"same 1==1*1"); 13 | static_assert(X1::zero() == X1::one()-X1::one(),"same 0==1-1"); 14 | static_assert(X1::one() == X1::one()/X1::one(),"same 1==1/1"); 15 | static_assert(X1::one() == X1::one()+X1::zero(),"same 1==1+0"); 16 | 17 | static_assert(X1::two() == X1::one()+X1::one(),"same 2==1+1"); 18 | static_assert(X1::one() == X1::two().half(),"same 1==half(2)"); 19 | static_assert(X1::onehalf() == X1::one()/X1::two(),"same 0.5==1/2"); 20 | static_assert(X1::one().twice() == X1::two(),"same 2==1*1+1 using fma"); 21 | static_assert(X1::two() == X1::one()*X1::one()+X1::one(),"same 2==1*1+1 using fma"); 22 | 23 | static_assert(X1::PT::minexponent() < X1::PT::maxexponent() ,"exponent ordering"); 24 | 25 | static_assert(X1(1) == X1::one(),"same 1==1"); 26 | 27 | pp(X1(2)); 28 | pp(X1::two()); 29 | pp(X1::one()+X1::one()); 30 | return 1; 31 | } 32 | 33 | void itest_constexpr() 34 | { 35 | return main(); 36 | } -------------------------------------------------------------------------------- /tests/itest_float16native32.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "float16native32.hpp" 3 | #include 4 | 5 | #define FT float16n32; 6 | 7 | #ifdef __AVX__ 8 | #warning Using HW acceleration for F16 conversion 9 | #else 10 | #warning NOT Using HW acceleration for F16 conversion 11 | #endif 12 | 13 | void itest_anyfloat() 14 | { 15 | #ifdef __AVX__ 16 | std::cout <<"Using HW acceleration for F16 conversion\n"; 17 | #else 18 | std::cout <<"NOT Using HW acceleration for F16 conversion\n"; 19 | #endif 20 | 21 | FT a(1.0); 22 | FT b(1.0); 23 | FT r = a+b; 24 | 25 | std::cout << r << " " << (double)r << std::endl; 26 | } -------------------------------------------------------------------------------- /tests/itest_lowlimits.cpp: -------------------------------------------------------------------------------- 1 | // cling -I../include itest_lowlimits.cpp -DQQ=5,10,void,uint16_t 2 | // cling -I../include itest_lowlimits.cpp -DQQ=8,7,void,uint16_t 3 | // cling -I../include itest_lowlimits.cpp -DQQ=8,23,float,uint32_t 4 | // cling -I../include itest_lowlimits.cpp -DQQ=11,52,double,uint64_t 5 | // cout issue: cling -I../include itest_lowlimits.cpp -DQQ=15,112,void,__uint128_t 6 | #include "floattraits.hpp" 7 | #include 8 | 9 | void itest_lowlimits() 10 | { 11 | std::cout << "one: " << std::hex << (uint64_t)any_floattrait::one_h << std::endl; 12 | std::cout << "two: " << std::hex << (uint64_t)any_floattrait::two_h << std::endl; 13 | std::cout << "max: " << std::hex << (uint64_t)any_floattrait::max_h << std::endl; 14 | std::cout << "min: " << std::hex << (uint64_t)any_floattrait::min_h << std::endl; 15 | std::cout << "nan: " << std::hex << (uint64_t)any_floattrait::nan_h << std::endl; 16 | std::cout << "pinfinity: " << std::hex <<(uint64_t) any_floattrait::pinfinity_h << std::endl; 17 | } -------------------------------------------------------------------------------- /tests/itest_positf.cpp: -------------------------------------------------------------------------------- 1 | #include "positf.h" 2 | 3 | 4 | int main() 5 | { 6 | using XF=Posit; 7 | using X=XF::PPT; 8 | 9 | XF af0(XF::DeepInit(),50); 10 | X ai0(X::DeepInit(),50); 11 | for(int32_t i = -128; i < 127; i++) 12 | { 13 | XF af(XF::DeepInit(),i); 14 | X ai(X::DeepInit(),i); 15 | auto rf = af+af0; 16 | auto ri = ai+ai0; 17 | if(rf.v != ri.v) 18 | { 19 | std::cout << "!" << rf << " != " << ri << " from " << af << " " << ai << " + " << (double)ai0 << std::endl; 20 | } 21 | else 22 | std::cout << (double)af << "+" << (double)af0 << " == " << (double)rf << std::endl; 23 | } 24 | } 25 | 26 | void itest_positf() 27 | { 28 | main(); 29 | } 30 | -------------------------------------------------------------------------------- /tests/itest_unpackfixed.cpp: -------------------------------------------------------------------------------- 1 | #include "unpacked.h" 2 | #include 3 | 4 | void itest_unpackfixed() 5 | { 6 | static_assert(Unpacked<>(1) == Unpacked<>::one(),"one int is one"); 7 | std::cout << (float)Unpacked<>(0) << std::endl; 8 | std::cout << Unpacked<>(1) << " == " << Unpacked<>::one()<< std::endl; 9 | std::cout << (float)Unpacked<>(121) << " " << Unpacked<>(121) << std::endl; 10 | std::cout << (float)Unpacked<>::make_fixed > (121*2+1) << std::endl; 11 | std::cout << (float)Unpacked<>(1.0) << std::endl; 12 | std::cout << (float)Unpacked<>(1.0f) << std::endl; 13 | std::cout << (int)Unpacked<>(10.5) << std::endl; 14 | std::cout << Unpacked<>(10.75).pack_xfixed> () << std::endl; 15 | std::cout << Unpacked<>(10.75).pack_xfixed> () << std::endl; 16 | std::cout << Unpacked<>(10.75).pack_xfixed> () << std::endl; 17 | } -------------------------------------------------------------------------------- /tests/itest_unpackfloat.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "unpacked.h" 3 | #include 4 | 5 | void itest_anyfloat() 6 | { 7 | 8 | double x = 3.0; 9 | { 10 | Unpacked u; 11 | std::cout << "study " << x << std::endl; 12 | u.unpack_xfloat((double)x); 13 | std::cout << "unpacking as double:" << u << std::endl; 14 | std::cout << "repacking as double:" << u.pack_xfloat() << std::endl; 15 | u.unpack_xfloat((float)x); 16 | std::cout << "unpacking as float:" << u << std::endl; 17 | std::cout << "repacking as float:" << u.pack_xfloat() << std::endl; 18 | std::cout << "repacking as double:" << u.pack_xfloat() << std::endl; 19 | } 20 | { 21 | Unpacked u; 22 | std::cout << "study " << x << std::endl; 23 | u.unpack_xfloat((double)x); 24 | std::cout << "unpacking as double:" << u << std::endl; 25 | std::cout << "repacking as double:" << u.pack_xfloat() << std::endl; 26 | u.unpack_xfloat((float)x); 27 | std::cout << "unpacking as float:" << u << std::endl; 28 | std::cout << "repacking as float:" << u.pack_xfloat() << std::endl; 29 | std::cout << "repacking as double:" << u.pack_xfloat() << std::endl; 30 | } 31 | } -------------------------------------------------------------------------------- /tests/makelimits.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export cling='/Applications/cling_2017-10-26_mac1012/bin/cling --std=c++14 -march=native' 3 | $cling "-DXBT=Posit" --std=c++14 -I../include test_numeric_limits.cpp 4 | $cling "-DXBT=Posit" --std=c++14 -I../include test_numeric_limits.cpp 5 | $cling "-DXBT=Posit" --std=c++14 -I../include test_numeric_limits.cpp 6 | $cling "-DXBT=Posit" --std=c++14 -I../include test_numeric_limits.cpp 7 | $cling "-DXBT=Posit" --std=c++14 -I../include test_numeric_limits.cpp 8 | $cling "-DXBT=Posit" --std=c++14 -I../include test_numeric_limits.cpp 9 | $cling -DXBT=float --std=c++14 -I../include test_numeric_limits.cpp 10 | $cling -DXBT=double --std=c++14 -I../include test_numeric_limits.cpp 11 | #$cling -DXBT=halffloat --std=c++14 -I../include test_numeric_limits.cpp 12 | #$cling -DXBT=halffloatalt --std=c++14 -I../include test_numeric_limits.cpp 13 | -------------------------------------------------------------------------------- /tests/minimal.cpp: -------------------------------------------------------------------------------- 1 | #include "include/posit.h" 2 | #include 3 | #include 4 | 5 | using P=Posit; 6 | P a(10.0f); 7 | P b(20.0f); 8 | std::cout << std::make_tuple((float)a.inv(),(float)(a-b),(float)a*b,(float)a.abs()) << std::endl; 9 | -------------------------------------------------------------------------------- /tests/test_dot.cpp: -------------------------------------------------------------------------------- 1 | #include "positf.h" // std::cout 2 | #include // std::cout 3 | #include // std::numeric_limits 4 | 5 | #define xstr(s) str(s) 6 | #define str(s) #s 7 | #ifndef XBT 8 | #define XBT Posit 9 | #endif 10 | 11 | int main() 12 | { 13 | using T=XBT; 14 | XBT a[3] = {3,4,5}; 15 | XBT b[3] = {1,2,3}; 16 | std::cout << XBT::dot(a,b,3) << std::endl; 17 | return 0; 18 | } -------------------------------------------------------------------------------- /tests/test_numeric_limits.cpp: -------------------------------------------------------------------------------- 1 | // cling -DXBT=float --std=c++14 -I../include test_numeric_limits.cpp 2 | // cling "-DXBT=PositF --std=c++14 -I../include test_numeric_limits.cpp 3 | #include "positf.h" // std::cout 4 | #include // std::cout 5 | #include // std::numeric_limits 6 | 7 | #define xstr(s) str(s) 8 | #define str(s) #s 9 | #ifndef XBT 10 | #define XBT Posit 11 | #endif 12 | 13 | void test_numeric_limits () { 14 | using T=XBT; 15 | std::cout << std::boolalpha; 16 | std::cout << "\n\nMacro: " << xstr((XBT)) << std::endl; 17 | std::cout << "Signature: " << typeid(T).name() << std::endl; 18 | std::cout << "Minimum value for T: " << std::numeric_limits::min() <<" "<< (double) std::numeric_limits::min() << '\n'; 19 | std::cout << "Maximum value for T: " << std::numeric_limits::max() <<" "<< (double) std::numeric_limits::max()<< '\n'; 20 | std::cout << "Lowest value for T: " << std::numeric_limits::lowest() << " "<<(double) std::numeric_limits::lowest()<< '\n'; 21 | std::cout << "Epsilon for T: " << std::numeric_limits::epsilon() << " "<<(double) std::numeric_limits::epsilon()<< '\n'; 22 | 23 | std::cout << "T is signed: " << std::numeric_limits::is_signed << '\n'; 24 | //std::cout << std::numeric_limits::epsilon().v << std::endl; 25 | //std::cout << "then " << (T::one()+std::numeric_limits::epsilon() == T::one().next()) << std::endl; 26 | //std::cout << (double)(T::one().next())-(double)T::one() << std::endl; 27 | //std::cout << "Non-sign bits in T: " << std::numeric_limits::digits << '\n'; 28 | std::cout << "T has infinity: " << std::numeric_limits::has_infinity << '\n'; 29 | std::cout << "Infinity value for T: " << std::numeric_limits::infinity() << " "<< (double)std::numeric_limits::infinity() <<'\n'; 30 | std::cout << "T has nan: " << std::numeric_limits::has_quiet_NaN << '\n'; 31 | std::cout << "NaN " << std::numeric_limits::quiet_NaN() << " " << (double) std::numeric_limits::quiet_NaN() << '\n'; 32 | std::cout << "Exponent Range " << std::numeric_limits::min_exponent << " to " << std::numeric_limits::max_exponent << '\n'; 33 | return ; 34 | } -------------------------------------------------------------------------------- /tests/testbit.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Emanuele Ruffaldi (C) 2017 3 | * Templated C++ Posit 4 | */ 5 | #include 6 | #include 7 | #include "bithippop.hpp" 8 | 9 | int main(int argc, char const *argv[]) 10 | { 11 | enum X1 { w1 = bitset_get((uint32_t)0x0A,2,3) }; 12 | enum X2 { w2 = bitset_getT((uint32_t)0x0A,2,3) }; 13 | std::cout << "constant " << w1 << " " << w2 << std::endl; 14 | 15 | std::cout << "mask " << bitset_all(bitmask(5)) << std::endl; 16 | uint16_t a = 0x0110; 17 | std::cout << bitset_msb(a) << std::endl; 18 | std::cout << bitset_lsb(a) << std::endl; 19 | std::cout << "input: " << bitset_all(a) << std::endl; 20 | auto c = bitset_part(a,0xFF,2,5); 21 | std::cout << bitset_msb(c) << std::endl; 22 | std::cout << bitset_lsb(c) << std::endl; 23 | std::cout << "afterpart " << bitset_all(bitset_get(c,2,5)) << std::endl; 24 | std::cout << "beforpart " << bitset_all(bitset_get(a,2,5)) << std::endl; 25 | std::cout << "afterparthw " << bitset_all(bitset_gethw(c,2,5)) << std::endl; 26 | 27 | std::cout << "after: " << bitset_all(c) << std::endl; 28 | 29 | auto b = bitset_part(a,0xFF); 30 | std::cout << bitset_msb(b) << std::endl; 31 | std::cout << bitset_lsb(b) << std::endl; 32 | std::cout << "after: " << bitset_all(b) << std::endl; 33 | std::cout << "afterpart " << bitset_all(bitset_get(c)) << std::endl; 34 | std::cout << "beforpart " << bitset_all(bitset_get(a)) << std::endl; 35 | 36 | 37 | return 0; 38 | } -------------------------------------------------------------------------------- /tests/testeps.cpp: -------------------------------------------------------------------------------- 1 | .I../include 2 | #include "posit.h" 3 | #include "posit12.hpp" 4 | #include "posit10.hpp" 5 | #include 6 | #include 7 | 8 | #include "posit12_tbl.cpp" 9 | #include "posit12.cpp" 10 | #include "posit10_tbl.cpp" 11 | #include "posit10.cpp" 12 | 13 | using PT=posit10; 14 | auto a = PT(0.00001); 15 | auto a1 = PT(1.0); 16 | auto adiff=a1-a; 17 | auto adiff_f = PT(1.0-0.0001); 18 | auto asum = a1+a; 19 | auto asum_f = PT(1.0+0.0001); 20 | (float)a 21 | (float)a1 22 | (float)adiff 23 | (float)adiffe 24 | (float)asum 25 | (float)asume -------------------------------------------------------------------------------- /tests/testfind.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Emanuele Ruffaldi (C) 2017 3 | * Templated C++ Posit 4 | */ 5 | #include "bithippop.hpp" 6 | #include 7 | 8 | int main(int argc, char * argv[]) 9 | { 10 | //enum X { w = findbitleftmostC((uint64_t)0x0A00000000000000) }; 11 | enum X2 { w2 = findbitleftmostC((uint32_t)0x0A000000) }; 12 | enum X3 { w3 = findbitleftmostC((uint16_t)0x0A00) }; 13 | enum X4 { w4 = findbitleftmostC((uint8_t)0xA) }; 14 | 15 | std::cout << "Constants: " << " " << w2 << " " << w3 << " " << w4 << std::endl; 16 | 17 | std::cout << findbitleftmost((uint64_t)0x0A00000000000000) << std::endl; 18 | std::cout << findbitleftmost((uint32_t)0x0A000000) << std::endl; 19 | std::cout << findbitleftmost((uint16_t)0x0A00) << std::endl; 20 | std::cout << findbitleftmost((uint8_t)0x0A) << std::endl; 21 | std::cout << findbitleftmost((uint16_t)0) << std::endl; 22 | return 0; 23 | } -------------------------------------------------------------------------------- /tests/testnextprev.cpp: -------------------------------------------------------------------------------- 1 | #include // std::cout 2 | #include "posit.h" 3 | 4 | int main () { 5 | using X1=Posit; 6 | -------------------------------------------------------------------------------- /tests/testposit.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Emanuele Ruffaldi (C) 2017 3 | * Templated C++ Posit 4 | */ 5 | #define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do this in one cpp file 6 | #include "catch.hpp" 7 | #include "posit.h" 8 | #include 9 | 10 | #define STR_EXPAND(tok) #tok 11 | #define STR(tok) STR_EXPAND(tok) 12 | 13 | 14 | using X1=Posit; 15 | #define X X1 16 | #define XA "T28_E5 " 17 | #define SPOSIT "Posit" 18 | #include "testsposit.hpp" 19 | #undef X 20 | #undef XA 21 | #undef SPOSIT 22 | 23 | /*using X2=Posit; 24 | #define X X2 25 | #define XA "T16_E0 " 26 | #define SPOSIT "Posit" 27 | #include "testsposit.hpp" 28 | #undef X 29 | #undef SPOSIT 30 | #undef XA 31 | */ 32 | 33 | // af<< 3.2e8, 1, -1, 8.0e7; 34 | 35 | TEST_CASE ("Special Conversion", "") 36 | { 37 | // IEEE32 All integers with 6 or fewer significant decimal digits can be converted to an IEEE 754 floating-point value without loss of precision 38 | //using Y1=Posit; 39 | using Y=Posit; 40 | 41 | double values1[] = { 3.2e8, 100, 10000, 20000, 1,-1,8.0e7, 4.0e7, 1, -1, -1.6e8}; 42 | double values[] = { -1.6e8}; 43 | for(unsigned int i = 0; i < sizeof(values)/sizeof(values[0]); i++) 44 | { 45 | char w[128]; 46 | sprintf(w,"Special SECTION %f",values[i]); 47 | SECTION(w) 48 | { 49 | double d = values[i]; 50 | Y::UnpackedT du(d); 51 | Y dp(values[i]); 52 | Y::UnpackedT dpu = dp; 53 | Y dup = Y(du); 54 | double dupd = dup; 55 | double dud = du; 56 | double dpd = dp; 57 | /* 58 | std::cout << "d is " << d << std::endl; 59 | std::cout << "du is " << du << std::endl; 60 | std::cout << "dp is " << posit_formatter(dp) << std::endl; 61 | std::cout << "dup is " << posit_formatter(dup) << std::endl; 62 | std::cout << "dpu is " << dpu << std::endl; 63 | std::cout << "dpd is " << dpd << std::endl; 64 | */ 65 | REQUIRE(dud == d); 66 | REQUIRE(dup == dp); 67 | REQUIRE(dpu == du); 68 | REQUIRE(dpd == d); 69 | REQUIRE(dupd == d); 70 | } 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /tests/testposit10.cpp: -------------------------------------------------------------------------------- 1 | #include "posit10.hpp" 2 | #include 3 | 4 | template 5 | struct pof_ 6 | { 7 | public: 8 | pof_(T p): posit(p) {} 9 | 10 | friend std::ostream & operator << (std::ostream & ons, const pof_ & x) 11 | { 12 | ons << "(" << " unsigned " << x.posit.uu() << " signed " << (int)x.posit.v << " float " << (float)x.posit<<")"; 13 | return ons; 14 | } 15 | 16 | T posit; 17 | }; 18 | 19 | template 20 | pof_ pof(T x) 21 | { 22 | return pof_(x); 23 | } 24 | 25 | int main(int argc, char const *argv[]) 26 | { 27 | posit10 a(20.0); 28 | posit10 b(-10.0); 29 | std::cout << "a: " << pof(a) << "\nb: " << pof(b) << std::endl; 30 | std::cout << "a+b: " << pof(a+b) << "\na-b: " << pof(a-b) << "\na*b: " << pof(a*b) << "\na/b: " << pof(a/b) << "\ninv a:" << pof(a.inv()) << "\ninv b:" << pof(b.inv()) << std::endl; 31 | return 0; 32 | } -------------------------------------------------------------------------------- /tests/testposit12.cpp: -------------------------------------------------------------------------------- 1 | #include "posit12.hpp" 2 | #include 3 | 4 | template 5 | struct pof_ 6 | { 7 | public: 8 | pof_(T p): posit(p) {} 9 | 10 | friend std::ostream & operator << (std::ostream & ons, const pof_ & x) 11 | { 12 | ons << "(" << " unsigned " << x.posit.uu() << " signed " << (int)x.posit.v << " float " << (float)x.posit<<")"; 13 | return ons; 14 | } 15 | 16 | T posit; 17 | }; 18 | 19 | template 20 | pof_ pof(T x) 21 | { 22 | return pof_(x); 23 | } 24 | 25 | int main(int argc, char const *argv[]) 26 | { 27 | posit12 a(20.0); 28 | posit12 b(-10.0); 29 | std::cout << "a: " << pof(a) << "\nb: " << pof(b) << std::endl; 30 | std::cout << "a+b: " << pof(a+b) << "\na-b: " << pof(a-b) << "\na*b: " << pof(a*b) << "\na/b: " << pof(a/b) << "\ninv a:" << pof(a.inv()) << "\ninv b:" << pof(b.inv()) << std::endl; 31 | return 0; 32 | } -------------------------------------------------------------------------------- /tests/testposit8.cpp: -------------------------------------------------------------------------------- 1 | #include "posit8.hpp" 2 | #include 3 | 4 | template 5 | struct pof_ 6 | { 7 | public: 8 | pof_(T p): posit(p) {} 9 | 10 | friend std::ostream & operator << (std::ostream & ons, const pof_ & x) 11 | { 12 | ons << "(" << " unsigned " << x.posit.uu() << " signed " << (int)x.posit.v << " float " << (float)x.posit<<")"; 13 | return ons; 14 | } 15 | 16 | T posit; 17 | }; 18 | 19 | template 20 | pof_ pof(T x) 21 | { 22 | return pof_(x); 23 | } 24 | 25 | int main(int argc, char const *argv[]) 26 | { 27 | posit8 a(20.0); 28 | posit8 b(-10.0); 29 | std::cout << "a: " << pof(a) << "\nb: " << pof(b) << std::endl; 30 | std::cout << "a+b: " << pof(a+b) << "\na-b: " << pof(a-b) << "\na*b: " << pof(a*b) << "\na/b: " << pof(a/b) << "\ninv a:" << pof(a.inv()) << "\ninv b:" << pof(b.inv()) << std::endl; 31 | return 0; 32 | } -------------------------------------------------------------------------------- /tests/testregime.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Emanuele Ruffaldi (C) 2017 3 | * Templated C++ Posit 4 | */ 5 | // g++ -march=native -I. --std=c++14 testregime.cpp && ./a.out 6 | #include "bithippop.hpp" 7 | #include 8 | #include 9 | #include 10 | #include 11 | using POSIT_UTYPE = uint32_t; 12 | namespace PT 13 | { 14 | enum {POSIT_SIZE = 32, POSIT_INVERTBIT = 1<<(sizeof(POSIT_UTYPE)*8)-1}; 15 | } 16 | 17 | 18 | std::pair extract(int pars) 19 | 20 | 21 | { 22 | const bool x = (pars & PT::POSIT_INVERTBIT) != 0; 23 | int aindex = x ? (~pars == 0 ? PT::POSIT_SIZE : findbitleftmostC((POSIT_UTYPE)~pars)) : (pars == 0 ? PT::POSIT_SIZE : findbitleftmostC((POSIT_UTYPE)pars)); // where is stop 0... 1 beyond 24 | int index = aindex > PT::POSIT_SIZE ? PT::POSIT_SIZE : aindex; 25 | int reg = x ? index-1 : -index; 26 | int rs = index == PT::POSIT_SIZE ? PT::POSIT_SIZE: index+1; 27 | return {reg,rs}; 28 | } 29 | 30 | 31 | 32 | int main(int argc , char * argv[]) 33 | { 34 | if(0) 35 | { 36 | for(int i = 0; i < 1<< PT::POSIT_SIZE; i++) 37 | { 38 | POSIT_UTYPE a = (i <<4 )|0x0F; 39 | auto q = extract(a); 40 | std::cout << (std::bitset(a)) << " reg=" << q.first << " rs=" << q.second << std::endl; 41 | } 42 | } 43 | else 44 | { 45 | uint32_t a = 0xe0000000; 46 | auto q = extract(a); 47 | std::cout << (std::bitset(a)) << " reg=" << q.first << " rs=" << q.second << std::endl; 48 | } 49 | return 0; 50 | } 51 | -------------------------------------------------------------------------------- /tests/testsimdposit8.cpp: -------------------------------------------------------------------------------- 1 | #include "simdposit8.hpp" 2 | #include 3 | 4 | /** 5 | * TODO: 6 | load from array 7 | verification speed test 8 | */ 9 | 10 | int main(int argc, char const *argv[]) 11 | { 12 | simd_posit8_32 a(0.5f); 13 | simd_posit8_32 b(1.0f); 14 | 15 | std::cout << "as integers:\n\t" << a.uu() << "\n\t" << a << "\n\t" << b.uu() << "\n\t" << b << std::endl; 16 | std::cout << "then product:\n\t" << a*b << "\ndifference:\n\t" << (a-b) << "\ndivision:\n\t" << (a/b) << "\ninversion:\n\t" << a.inv() << "\ninversionb:\n\t" << b.inv() << std::endl; 17 | return 0; 18 | } -------------------------------------------------------------------------------- /tests/testsoftfloat.cpp: -------------------------------------------------------------------------------- 1 | #include "softfloat.hpp" 2 | #include "exp2t.hpp" 3 | 4 | 5 | extern uint16_t float16_opexp2[]; 6 | 7 | template <> 8 | struct exp2t_ 9 | { 10 | softfloat16 operator() (softfloat16 a) 11 | { 12 | float16_t w; 13 | w.v = float16_opexp2[((float16_t)a).v]; 14 | return softfloat16(w); 15 | } 16 | }; 17 | 18 | int main(int argc, char * argv[]) 19 | { 20 | std::cout << softfloat16(3)+softfloat16(2) << std::endl; 21 | std::cout << ((float)exp2t(softfloat16(5.2))) << " vs " << exp2t(5.2f) << std::endl; 22 | return 0; 23 | } -------------------------------------------------------------------------------- /tests/testsposit.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2017-2019 Emanuele Ruffaldi 3 | * Distributed under the terms of the BSD 3-Clause License. 4 | * 5 | * (See accompanying file LICENSE) 6 | * 7 | * -- 8 | */ 9 | /** 10 | * Emanuele Ruffaldi (C) 2017 11 | * Templated C++ Posit 12 | */ 13 | // 14 | // Created by Emanuele Ruffaldi on 20/10/2017. 15 | // 16 | 17 | #include 18 | 19 | TEST_CASE( XA "Unpacked Tests", SPOSIT ) { 20 | REQUIRE(X::UnpackedT(2.0).inv().inv() == X::UnpackedT(2.0)); 21 | } 22 | 23 | TEST_CASE( XA "Posits Unpacked Tests", SPOSIT ) { 24 | REQUIRE(X(2.0).unpack() == X::UnpackedT(2.0)); 25 | REQUIRE(X((X(2.0).unpack())) == X(2.0)); 26 | } 27 | 28 | TEST_CASE( XA "Other Tests",SPOSIT) { 29 | 30 | std::cout << X(2.0) << "\n\n" << X(2.0).inv() << "\n\n" << X(2.0).inv().inv() << std::endl; 31 | REQUIRE(X(2.0).inv().inv() == X(2.0)); 32 | 33 | } 34 | 35 | TEST_CASE (XA "Conversions", SPOSIT) 36 | { 37 | float values[] = {1, 2, INFINITY, 4.0, 10.0, -1.0, -2.0, -4.0, -10.0, 1 / 2.0, 1 / 4.0}; 38 | 39 | using TQ = double; 40 | //using TI = uint64_t; 41 | // TODO: bugs with fractional 1/2 and 1/4 42 | for (unsigned int i = 0; i < sizeof(values) / sizeof(values[0]); i++) { 43 | 44 | TQ f = values[i]; 45 | X::UnpackedT fu(f); 46 | X fp(f); 47 | TQ fuf = fu.pack_float(); 48 | X fup(fu); 49 | X::UnpackedT fupu(fup.unpack()); 50 | TQ fupuf = fupu.pack_float(); 51 | 52 | std::cout << "step " << values[i] << std::endl; 53 | if(fupu != fu) 54 | { 55 | std::cout << "d is " << f << std::endl; 56 | std::cout << "du is " << fu << std::endl; 57 | std::cout << "dp is " << std::oct << fp << std::endl; 58 | std::cout << "dup is " << std::oct << fup << std::endl; 59 | std::cout << "dupu is " << fupu << std::endl; 60 | // std::cout << "dpu is " << fpu << std::endl; 61 | std::cout << "dpud is " << fupuf << std::endl; 62 | 63 | } 64 | REQUIRE(f == fuf); // unpack bug 65 | REQUIRE(fp == fup); // impossible bug due to using fu to go to posit 66 | REQUIRE(fupu == fu); // bug in unpack posit 67 | REQUIRE(fupuf == f); // impossible after the above 68 | 69 | } 70 | 71 | } 72 | -------------------------------------------------------------------------------- /tests/testunpacked.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Emanuele Ruffaldi (C) 2017 3 | * Templated C++ Posit 4 | */ 5 | #define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do this in one cpp file 6 | #include "catch.hpp" 7 | #include "posit.h" 8 | #include 9 | #include 10 | 11 | float values[] = {1, 2, INFINITY, 0, 4.0, 10.0, -1.0, -2.0, -4.0, -10.0, 1 / 2.0, 1 / 4.0, 0.3, 1200, 1/1200.0, 1e20}; 12 | 13 | 14 | const double epscheck = 1e-5; 15 | TEST_CASE ("Unpacked Conversions double") 16 | { 17 | using TQ = double; 18 | using TI = uint64_t; 19 | 20 | for (unsigned int i = 0; i < sizeof(values) / sizeof(values[0]); i++) 21 | { 22 | char what[128]; 23 | sprintf(what,"Testing TQ=%s TI=%s value=%f case=%d",typeid(TQ).name(),typeid(TI).name(),(double)values[i],i); 24 | SECTION( what) 25 | { 26 | TQ f = values[i]; 27 | TQ fi = ((TQ)1.0)/f; 28 | Unpacked fu(f); 29 | auto fui = fu.inv(); 30 | auto fuii = fui.inv(); 31 | auto fu_by_fui = fu*fui; 32 | auto fu2 = Unpacked((TQ)2)*fu; 33 | auto fu2_div_fu = fu2/fu; 34 | auto fu_plus_fu = fu+fu; 35 | switch(std::fpclassify(f)) { 36 | case FP_INFINITE: REQUIRE(fu.type == Unpacked::Infinity); break; 37 | case FP_NAN: REQUIRE(fu.type == Unpacked::NaN); break; 38 | case FP_ZERO: 39 | { 40 | REQUIRE(fu.type == Unpacked::Zero); 41 | int i; 42 | //std::cout << " " << f << " becomes " << fu << std::endl; 43 | //REQUIRE(f2 == fu.fraction); 44 | //REQUIRE(i == fu.exponent+1); 45 | REQUIRE(std::signbit(f) == fu.negativeSign); 46 | REQUIRE(f == (double)fu); 47 | break; 48 | } 49 | break; 50 | default: 51 | { 52 | REQUIRE(fu.type == Unpacked::Regular); 53 | int i; 54 | auto f2 = std::frexp(f, &i); 55 | //std::cout << " " << f << " becomes " << fu << std::endl; 56 | //REQUIRE(f2 == fu.fraction); 57 | REQUIRE(i == fu.exponent+1); 58 | REQUIRE(std::signbit(f) == fu.negativeSign); 59 | REQUIRE((double)fu == Approx(f)); 60 | REQUIRE((double)fuii == Approx(f)); 61 | REQUIRE((double)fui == Approx(fi)); 62 | REQUIRE((double)fu_by_fui == Approx((TQ)1.0)); 63 | REQUIRE((double)fu2_div_fu == Approx((TQ)2.0)); 64 | REQUIRE(fu_plus_fu == fu2); 65 | REQUIRE((double)fu_plus_fu == Approx((TQ)2*f)); 66 | break; 67 | } 68 | } 69 | } 70 | } 71 | } 72 | 73 | TEST_CASE ("Unpacked Conversions float") 74 | { 75 | using TQ = float; 76 | using TI = uint32_t; 77 | 78 | for (unsigned int i = 0; i < sizeof(values) / sizeof(values[0]); i++) 79 | { 80 | char what[128]; 81 | sprintf(what,"Testing TQ=%s TI=%s value=%f case=%d",typeid(TQ).name(),typeid(TI).name(),(double)values[i],i); 82 | SECTION( what) 83 | { 84 | TQ f = values[i]; 85 | Unpacked fu(f); 86 | auto fui = fu.inv(); 87 | auto fuii = fui.inv(); 88 | TQ fi = ((TQ)1.0)/f; 89 | auto fu_by_fui = fu*fui; 90 | auto fu2 = Unpacked((TQ)2)*fu; 91 | auto fu2_div_fu = fu2/fu; 92 | auto fu_plus_fu = fu+fu; 93 | 94 | switch(std::fpclassify(f)) { 95 | case FP_INFINITE: REQUIRE(fu.type == Unpacked::Infinity); break; 96 | case FP_NAN: REQUIRE(fu.type == Unpacked::NaN); break; 97 | case FP_ZERO: 98 | { 99 | REQUIRE(fu.type == Unpacked::Zero); 100 | int i; 101 | //std::cout << " " << f << " becomes " << fu << std::endl; 102 | //REQUIRE(f2 == fu.fraction); 103 | //REQUIRE(i == fu.exponent+1); 104 | REQUIRE(std::signbit(f) == fu.negativeSign); 105 | REQUIRE(fabs(f - fu.pack_float()) < epscheck); 106 | break; 107 | } 108 | break; 109 | default: 110 | { 111 | REQUIRE(fu.type == Unpacked::Regular); 112 | int i; 113 | auto f2 = std::frexp(f, &i); 114 | //std::cout << " " << f << " becomes " << fu << std::endl; 115 | //REQUIRE(f2 == fu.fraction); 116 | REQUIRE(i == fu.exponent+1); 117 | REQUIRE(std::signbit(f) == fu.negativeSign); 118 | REQUIRE(fu.pack_float() == Approx(f)); 119 | REQUIRE(fuii.pack_float() == Approx(f)); 120 | REQUIRE(fui.pack_float() == Approx(fi)); 121 | REQUIRE(fu_by_fui.pack_float() == Approx((TQ)1.0)); 122 | REQUIRE(fu2_div_fu.pack_float() == Approx((TQ)2.0)); 123 | REQUIRE(fu_plus_fu == fu2); 124 | REQUIRE(fu_plus_fu.pack_float() == Approx((TQ)2*f)); 125 | break; 126 | } 127 | } 128 | } 129 | } 130 | } 131 | 132 | TEST_CASE("Sum") 133 | { 134 | using TI=double; 135 | using TQ = uint64_t; 136 | double values[] = { 3200.0, 2.0, 1500000.0 }; 137 | for(unsigned int i = 0; i < sizeof(values)/sizeof(values[0]); i++) 138 | { 139 | for(unsigned int j=i; j < sizeof(values)/sizeof(values[0]); j++) 140 | { 141 | char what[128]; 142 | sprintf(what,"Sum TQ=%s TI=%s value1=%f value2=%f",typeid(TQ).name(),typeid(TI).name(),(double)values[i],(double)values[j]); 143 | Unpacked a(values[i]); 144 | Unpacked b(values[j]); 145 | Unpacked ab(values[i]+values[j]); 146 | Unpacked amb(values[i]-values[j]); 147 | Unpacked mab(-values[i]+values[j]); 148 | 149 | sprintf(what,"Sum TQ=%s TI=%s value1=%f value2=%f f(a+b)=%f u(a+b)=%f",typeid(TQ).name(),typeid(TI).name(),(double)values[i],(double)values[j],(double)ab,(double)(a+b)); 150 | SECTION( what) 151 | { 152 | REQUIRE((a+b) == ab); 153 | REQUIRE((a-b) == amb); 154 | REQUIRE((-a+b) == mab); 155 | } 156 | 157 | /* 158 | sprintf(what,"fSum TQ=%s TI=%s value1=%f value2=%f",typeid(TQ).name(),typeid(TI).name(),(double)values[i],(double)values[j]); 159 | SECTION( what) 160 | { 161 | REQUIRE((a+b).pack_float() == Approx((double)(values[i]+values[j]))); 162 | REQUIRE((a-b).pack_float() == Approx((double)(values[i]-values[j]))); 163 | REQUIRE((-a+b).pack_float() == Approx((double)(-values[i]+values[j]))); 164 | } 165 | */ 166 | } 167 | 168 | } 169 | 170 | } 171 | 172 | // https://www.h-schmidt.net/FloatConverter/IEEE754.html 173 | // Matlab Symbolic Toolbox says: sum(sym([3.2e8, 1, -1, 8.0e7]).*sym([4.0e7, 1, -1, -1.6e8])) == 2 174 | // each [ 12800000000000000, 1, 1, -12800000000000000] 175 | // Matlab Says: sum([3.2e8, 1, -1, 8.0e7].*[ 4.0e7, 1, -1, -1.6e8]) == 0 176 | // each 1E16 [ 1.280000000000000 0.000000000000000 0.000000000000000 -1.280000000000000 ] 177 | TEST_CASE ("High Precision Test") 178 | { 179 | using TI = uint64_t; 180 | double a[] = { 3.2e8, 1, -1, 8.0e7}; 181 | double b[] = { 4.0e7, 1, -1, -1.6e8}; 182 | Unpacked r = Unpacked::zero(); // ZERO is NOT DETECTED HERE 183 | for(int i = 0; i < 4 ; i++) 184 | { 185 | auto z = (Unpacked(a[i])*Unpacked(b[i])); 186 | r = r + z; 187 | //std::cout << " " << i << " " << z << " " << z << "\n\ttmp " << r << " " << r << " left is " << Unpacked(a[i]) << std::endl; 188 | } 189 | REQUIRE((double)r == 2); 190 | } 191 | --------------------------------------------------------------------------------