├── .gitignore
├── .gitmodules
├── .travis.yml
├── CMakeLists.txt
├── LICENSE
├── README.md
├── bitbucket-pipelines.yml
├── doc
    ├── CMakeLists.txt
    ├── Doxyfile.in
    └── posit_with_nan.png
├── extern
    └── args.hxx
├── include
    ├── FindInt128.cmake
    ├── anyfloat.hpp
    ├── binary8.hpp
    ├── bithippop.hpp
    ├── catch.hpp
    ├── exp2.hpp
    ├── exp2t.hpp
    ├── fixedtraits.hpp
    ├── float12.hpp
    ├── float16native32.hpp
    ├── float2posit.hpp
    ├── floatconst2bits.hpp
    ├── floattraits.hpp
    ├── posit.h
    ├── posit10.hpp
    ├── posit12.hpp
    ├── posit8.hpp
    ├── positeigen.h
    ├── positf.h
    ├── simd
    │   ├── asimd_16.h
    │   ├── asimd_32.h
    │   ├── asimd_8.h
    │   ├── asimd_all.h
    │   ├── asimd_base.h
    │   ├── asimd_d.h
    │   └── asimd_f.h
    ├── simdposit8.hpp
    ├── softfloat.hpp
    ├── tposit.hpp
    ├── tvalids.hpp
    ├── typehelpers.hpp
    └── unpacked.h
├── jupyter
    ├── .ipynb_checkpoints
    │   └── first-checkpoint.ipynb
    └── first.ipynb
├── makeone.sh
├── matlab
    ├── Lenna.png
    ├── cellindex.m
    ├── cellvcat.m
    ├── compareposits.m
    ├── compareposits2.m
    ├── describetab.m
    ├── dump2table.m
    ├── fromindex.m
    ├── halfinrange.m
    ├── halfprecision
    │   ├── halfprecision.c
    │   ├── halfprecision.m
    │   ├── halfprecisionmax.m
    │   ├── halfprecisionmin.m
    │   ├── ieeehalfprecision.c
    │   ├── license.txt
    │   └── origin.txt
    ├── limittab.m
    ├── loadpositdump.m
    ├── meandiff.m
    ├── mex_make.m
    ├── morton.m
    ├── normr.m
    ├── p8Tofloat.cpp
    ├── p8binop.cpp
    ├── p8unop.cpp
    ├── sift_fea.m
    ├── test_p8conv.m
    ├── testfloat11.m
    └── toindex.m
├── preamble.txt
├── scripts
    ├── float2bin.py
    ├── listpositany.sh
    ├── listposits8_16.sh
    ├── listposits8_16bin.sh
    ├── punum.py
    ├── valid2tvalid.py
    └── validsgentable.py
├── src
    ├── binary8.cpp
    ├── binary8_gen.cpp
    ├── floatTop8.cpp
    ├── listposits.cpp
    ├── main.cpp
    ├── maineigen.cpp
    ├── posit10.cpp
    ├── posit10_gen.cpp
    ├── posit10_tbl.cpp
    ├── posit12.cpp
    ├── posit12_gen.cpp
    ├── posit12_tbl.cpp
    ├── posit8.cpp
    ├── posit8_gen.cpp
    ├── posit8_tbl.cpp
    ├── simdposit8.cpp
    └── softfloat_gen.cpp
└── tests
    ├── itest_anyfloat.cpp
    ├── itest_anyvsreallimit.cpp
    ├── itest_constexpr.cpp
    ├── itest_float16native32.cpp
    ├── itest_lowlimits.cpp
    ├── itest_positf.cpp
    ├── itest_unpackfixed.cpp
    ├── itest_unpackfloat.cpp
    ├── makelimits.sh
    ├── minimal.cpp
    ├── test_dot.cpp
    ├── test_numeric_limits.cpp
    ├── testbit.cpp
    ├── testeps.cpp
    ├── testfind.cpp
    ├── testnextprev.cpp
    ├── testposit.cpp
    ├── testposit10.cpp
    ├── testposit12.cpp
    ├── testposit8.cpp
    ├── testregime.cpp
    ├── testsimdposit8.cpp
    ├── testsoftfloat.cpp
    ├── testsposit.hpp
    └── testunpacked.cpp


/.gitignore:
--------------------------------------------------------------------------------
 1 | */posit12_tbl.cpp
 2 | */posit16_tbl.cpp
 3 | */float12_tbl.cpp
 4 | */float16_tbl.cpp
 5 | # Prerequisites
 6 | *.d
 7 | 
 8 | # Compiled Object files
 9 | *.slo
10 | *.lo
11 | *.o
12 | *.obj
13 | 
14 | # Precompiled Headers
15 | *.gch
16 | *.pch
17 | 
18 | # Compiled Dynamic libraries
19 | *.so
20 | *.dylib
21 | *.dll
22 | 
23 | # Fortran module files
24 | *.mod
25 | *.smod
26 | 
27 | # Compiled Static libraries
28 | *.lai
29 | *.la
30 | *.a
31 | *.lib
32 | 
33 | # Executables
34 | *.exe
35 | *.out
36 | *.app
37 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "softfloat"]
2 | 	path = extern/softfloat
3 | 	url = https://github.com/eruffaldi/berkeley-softfloat-3.git
4 | 
5 | [submodule "extern/fixed_point"]
6 | 	path = extern/fixed_point
7 | 	url = https://github.com/johnmcfarlane/fixed_point
8 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: cpp
 2 | script: cmake
 3 | compiler: g++
 4 | dist: xenial
 5 | 
 6 | before_script:
 7 |   - gcc --version
 8 |   - sudo unlink /usr/bin/gcc && sudo ln -s /usr/bin/gcc-5 /usr/bin/gcc
 9 |   - gcc --version
10 |   # create a build folder for the out-of-source build
11 |   - mkdir build
12 |   # switch to build directory
13 |   - cd build
14 |   # run cmake; here we assume that the project's
15 |   # top-level CMakeLists.txt is located at '..'
16 |   - cmake -DSOFTFLOAT=OFF ..
17 | 
18 | script:
19 |   # once CMake has done its job we just build using make as usual
20 |   - make
21 |   # if the project uses ctest we can run the tests like this
22 |   #- make test
23 | 
24 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required(VERSION 3.0)
  2 | project(cppposit)
  3 | 
  4 | #Issues with clang. Build soft float manually 
  5 | #add_subdirectory(extern/softfloat)
  6 | #link_directories(extern/lib)
  7 | 
  8 | #set(SYSPROC arm)
  9 | if(SYSPROC MATCHES "arm")
 10 | add_definitions (-D__arm__ ) 
 11 | ENDIF()
 12 | 
 13 | if(MSVC)
 14 |   # Force to always compile with W4
 15 |   if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]")
 16 |     string(REGEX REPLACE "/W[0-4]" /WX "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 17 |   else()
 18 |     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /WX")
 19 |   endif()
 20 | else()
 21 |   # Update if necessary
 22 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wall -Wno-long-long -pedantic")
 23 | endif()
 24 | 
 25 | option(USE_CXX14 "C++14" )
 26 | if(USE_CXX14)
 27 | add_definitions(--std=c++14)
 28 | else()
 29 | add_definitions(--std=c++11)
 30 | endif()
 31 | 
 32 | option(SOFTFLOAT "softfloat" TRUE)
 33 | if(SOFTFLOAT)
 34 | add_definitions(-DWITH_SOFTFLOAT)
 35 | endif()
 36 | add_definitions(-march=native)
 37 | 
 38 | include_directories(include)
 39 | find_package(Eigen3 )
 40 | include_directories(${EIGEN3_INCLUDE_DIRS})
 41 | add_executable(main src/main.cpp)
 42 | #add_executable(maineigen src/maineigen.cpp)
 43 | add_executable(testunpacked tests/testunpacked.cpp)
 44 | add_executable(testposit tests/testposit.cpp)
 45 | 
 46 | # TABULATED posits
 47 | # TODO Generalize to any arbitrary configuration of posit: given a posit configuration then
 48 | # automatically make the generator, the table file and the cmake
 49 | 
 50 | option(POSIT12 "posit12 tabulated" TRUE)
 51 | option(POSIT10 "posit10 tabulated" TRUE)
 52 | option(POSIT8 "posit8 tabulated" TRUE)
 53 | option(BINARY8 "binary8_5 tabulated" TRUE)
 54 | if(POSIT12)
 55 |   add_executable(posit12_gen src/posit12_gen.cpp)
 56 |   set(POSIT12_SRC src/posit12.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/posit12_tbl.cpp)
 57 |   if(NOT CMAKE_GENERATOR STREQUAL Xcode)
 58 |   add_custom_command(OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/src/posit12_tbl.cpp COMMAND $<TARGET_FILE:posit12_gen> ${CMAKE_CURRENT_SOURCE_DIR}/src/posit12_tbl.cpp DEPENDS posit12_gen )
 59 |   endif()
 60 | endif()
 61 | 
 62 | if(POSIT10)
 63 | 	add_executable(posit10_gen src/posit10_gen.cpp)
 64 | 	set(POSIT10_SRC src/posit10.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/posit10_tbl.cpp)
 65 |   if(NOT CMAKE_GENERATOR STREQUAL Xcode)
 66 |   	add_custom_command(OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/src/posit10_tbl.cpp COMMAND $<TARGET_FILE:posit10_gen> ${CMAKE_CURRENT_SOURCE_DIR}/src/posit10_tbl.cpp DEPENDS posit10_gen )
 67 |   endif()
 68 | endif()
 69 | 
 70 | if(POSIT8)
 71 |   add_executable(posit8_gen src/posit8_gen.cpp)
 72 |   set(POSIT8_SRC src/posit8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/posit8_tbl.cpp)
 73 |   if(NOT CMAKE_GENERATOR STREQUAL Xcode)
 74 |     add_custom_command(OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/src/posit8_tbl.cpp COMMAND $<TARGET_FILE:posit8_gen>   ${CMAKE_CURRENT_SOURCE_DIR}/src/posit8_tbl.cpp DEPENDS posit8_gen )
 75 |   endif()
 76 | endif()
 77 | 
 78 | if(BINARY8)
 79 |   add_executable(binary8_gen src/binary8_gen.cpp)
 80 |   set(BINARY8_SRC src/binary8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/binary8_tbl.cpp)
 81 |   if(NOT CMAKE_GENERATOR STREQUAL Xcode)
 82 |     add_custom_command(OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/src/binary8_tbl.cpp COMMAND $<TARGET_FILE:binary8_gen>  ${CMAKE_CURRENT_SOURCE_DIR}/src/binary8_tbl.cpp DEPENDS binary8_gen )
 83 |   endif()
 84 | endif()
 85 | 
 86 | if(BINARY8 OR POSIT8 OR POSIT10 OR POSIT12)
 87 | set(POSITTABTARGET posittab)
 88 | add_library(posittab STATIC ${POSIT8_SRC} ${BINARY8_SRC} ${POSIT10_SRC} ${POSIT12_SRC})
 89 | set_property(TARGET posittab PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_CURRENT_SOURCE_DIR}/include)
 90 | endif()
 91 | 
 92 | #if(BINARY8)
 93 | #add_executable(testbinary8 tests/testbinary8.cpp)
 94 | #target_link_libraries(testbinary8 posittab)
 95 | #endif()
 96 | 
 97 | 
 98 | if(POSIT8)
 99 | add_executable(testposit8 tests/testposit8.cpp)
100 | target_link_libraries(testposit8 posittab)
101 | endif()
102 | 
103 | if(POSIT10)
104 | add_executable(testposit10 tests/testposit10.cpp)
105 | target_link_libraries(testposit10 posittab)
106 | endif()
107 | 
108 | if(POSIT12)
109 | add_executable(testposit12 tests/testposit12.cpp)
110 | target_link_libraries(testposit12 posittab)
111 | endif()
112 | 
113 | #if(SYSPROC MATCHES "arm")
114 | #else()
115 | #include_directories(include/simd)
116 | #add_executable(testsimdposit8 tests/testsimdposit8.cpp src/simdposit8.cpp)
117 | #target_link_libraries(testsimdposit8 ${POSITTABTARGET})
118 | #endif()
119 | add_library(posit INTERFACE)
120 | set_property(TARGET posit PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_CURRENT_SOURCE_DIR}/include)
121 | 
122 | if(${POSITTABTARGET})
123 | add_executable(listposits src/listposits.cpp)
124 | target_link_libraries(listposits posittab)
125 | endif()
126 | 
127 | if(SOFTFLOAT)
128 | link_directories(${CMAKE_CURRENT_SOURCE_DIR}/extern)
129 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/extern)
130 | add_library(p::softfloat INTERFACE IMPORTED)
131 | set_property(TARGET p::softfloat PROPERTY INTERFACE_LINK_LIBRARIES softfloat.a)
132 | set_property(TARGET p::softfloat PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_CURRENT_SOURCE_DIR}/extern)
133 | 
134 | add_executable(softfloat_gen src/softfloat_gen.cpp)
135 | target_link_libraries(softfloat_gen p::softfloat)
136 | 
137 | if(NOT CMAKE_GENERATOR STREQUAL Xcode)
138 | add_custom_command(OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/src/float16_tbl.cpp COMMAND DYLD_LIBRARY_PATH=${CMAKE_CURRENT_SOURCE_DIR}/extern LD_LIBRARY_PATH=${CMAKE_CURRENT_SOURCE_DIR}/extern $<TARGET_FILE:softfloat_gen> ${CMAKE_CURRENT_SOURCE_DIR}/src/float16_tbl.cpp DEPENDS softfloat_gen)
139 | endif()
140 | add_library(softfloat16 STATIC ${CMAKE_CURRENT_SOURCE_DIR}/src/float16_tbl.cpp)
141 | set_property(TARGET softfloat16 PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_CURRENT_SOURCE_DIR}/extern)
142 | target_link_libraries(softfloat16 p::softfloat)
143 | 
144 | 
145 | add_executable(testsoftfloat tests/testsoftfloat.cpp)
146 | target_link_libraries(testsoftfloat softfloat16)
147 | 
148 | #set_property(TARGET softfloat16 PROPERTY INTERFACE_LINK_LIBRARIES ${CMAKE_CURRENT_SOURCE_DIR}/extern/softfloat)
149 | 
150 | install (TARGETS softfloat16 DESTINATION lib)
151 | endif()
152 | 
153 | #export(EXPORT cppPosit  FILE cppPosit.cmake)
154 | export(TARGETS ${POSITTABTARGET} posit FILE cppPosit.cmake)
155 | include_directories(extern)
156 | 
157 | install (TARGETS ${POSITTABTARGET} DESTINATION lib) #INTERFACE_INCLUDE_DIRECTORIES include )
158 | install(DIRECTORY include/ DESTINATION include/cppPosit)
159 | #install(EXPORT cppPosit DESTINATION lib/cppPosit )
160 | 
161 | add_subdirectory(doc EXCLUDE_FROM_ALL)
162 | 
163 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | COPYRIGHT
 2 | 
 3 | All contributions by Emanuele Ruffaldi
 4 | Copyright (c) 2016-2019, E
 5 | All rights reserved.
 6 | 
 7 | All other contributions:
 8 | Copyright (c) 2019, the respective contributors.
 9 | All rights reserved.
10 | 
11 | Each contributor holds copyright over their respective contributions.
12 | The project versioning (Git) records all such contribution source information.
13 | 
14 | LICENSE
15 | 
16 | The BSD 3-Clause License
17 | 
18 | Redistribution and use in source and binary forms, with or without
19 | modification, are permitted provided that the following conditions are met:
20 | 
21 | * Redistributions of source code must retain the above copyright notice, this
22 |   list of conditions and the following disclaimer.
23 | 
24 | * Redistributions in binary form must reproduce the above copyright notice,
25 |   this list of conditions and the following disclaimer in the documentation
26 |   and/or other materials provided with the distribution.
27 | 
28 | * Neither the name of tiny-dnn nor the names of its
29 |   contributors may be used to endorse or promote products derived from
30 |   this software without specific prior written permission.
31 | 
32 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
33 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
35 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
36 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
37 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
38 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
40 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
41 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 | 


--------------------------------------------------------------------------------
/bitbucket-pipelines.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eruffaldi/cppPosit/3c497041063dc87de2f9615a52a55b0ed5222b12/bitbucket-pipelines.yml


--------------------------------------------------------------------------------
/doc/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(cppPositDoc)
 2 | 
 3 | option(EIGEN_INTERNAL_DOCUMENTATION "Build internal documentation" OFF)
 4 | 
 5 | 
 6 | # Set some Doxygen flags
 7 | set(DOXY_PROJECT_NAME             "cppPosit")
 8 | set(DOXY_PROJECT_VERSION		   "")
 9 | set(DOXY_OUTPUT_DIRECTORY_SUFFIX  "")
10 | set(DOXY_BINARY_DIR "\"${CMAKE_CURRENT_BINARY_DIR}\"")
11 | set(DOXY_INPUT                    "\"${CMAKE_CURRENT_SOURCE_DIR}/../include\" \"${CMAKE_CURRENT_SOURCE_DIR}\"")
12 | set(DOXY_HTML_COLORSTYLE_HUE      "220")
13 | set(DOXY_TAGFILES                 "")
14 | if(INTERNAL_DOCUMENTATION)
15 |   set(DOXY_INTERNAL                 "YES")
16 | else(EINTERNAL_DOCUMENTATION)
17 |   set(DOXY_INTERNAL                 "NO")
18 | endif(INTERNAL_DOCUMENTATION)
19 | 
20 | configure_file(
21 |   ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in
22 |   ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
23 | )
24 | 
25 | add_custom_target(doc ALL
26 |   COMMAND doxygen
27 |   WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})


--------------------------------------------------------------------------------
/doc/posit_with_nan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eruffaldi/cppPosit/3c497041063dc87de2f9615a52a55b0ed5222b12/doc/posit_with_nan.png


--------------------------------------------------------------------------------
/include/FindInt128.cmake:
--------------------------------------------------------------------------------
  1 | # - this module looks for 128 bit integer support.  It sets up the
  2 | # type defs in util/int128_types.hpp.  Simply add ${INT128_FLAGS} to the
  3 | # compiler flags.
  4 | 
  5 | include(CheckTypeSize)
  6 | 
  7 | MACRO(CHECK_128_BIT_HASH_FUNCTION VAR_NAME DEF_NAME)
  8 | 
  9 |   message("Testing for presence of 128 bit unsigned integer hash function for ${VAR_NAME}.")
 10 | 
 11 | 
 12 |   CHECK_CXX_SOURCE_COMPILES("
 13 |   #include <functional>
 14 |   #include <cstdint>
 15 |   int main(int argc, char** argv) {
 16 |     std::hash<${VAR_NAME}>()(0);
 17 |   return 0;
 18 |   }"
 19 |   has_hash_${VAR_NAME})
 20 | 
 21 |   if(has_hash_${VAR_NAME})
 22 |     message("std::hash<${VAR_NAME}> defined.")
 23 |     SET(${DEF_NAME} 1)
 24 |   else()
 25 |     message("std::hash<${VAR_NAME}> not defined.")
 26 |   endif()
 27 | endmacro()
 28 | 
 29 | 
 30 | MACRO(CHECK_INT128 INT128_NAME VARIABLE DEFINE_NAME)
 31 | 
 32 |   if(NOT INT128_FOUND)
 33 |     message("Testing for 128 bit integer support with ${INT128_NAME}.")
 34 |     check_type_size("${INT128_NAME}" int128_t_${DEFINE_NAME})
 35 |     if(HAVE_int128_t_${DEFINE_NAME})
 36 |       if(int128_t_${DEFINE_NAME} EQUAL 16)
 37 | 	message("Found: Enabling support for 128 bit integers using ${INT128_NAME}.")
 38 | 	SET(INT128_FOUND 1)
 39 |   CHECK_128_BIT_HASH_FUNCTION(${INT128_NAME} HAS_INT128_STD_HASH)
 40 | 
 41 | 	SET(${VARIABLE} "${DEFINE_NAME}")
 42 |       else()
 43 | 	message("${INT128_NAME} has incorrect size, can't use.")
 44 |       endif()
 45 |     endif()
 46 |   endif()
 47 | endmacro()
 48 | 
 49 | MACRO(CHECK_UINT128 UINT128_NAME VARIABLE DEFINE_NAME)
 50 | 
 51 |   if(NOT UINT128_FOUND)
 52 |     message("Testing for 128 bit unsigned integer support with ${UINT128_NAME}.")
 53 |     check_type_size("${UINT128_NAME}" uint128_t_${DEFINE_NAME})
 54 |     if(HAVE_uint128_t_${DEFINE_NAME})
 55 |       if(uint128_t_${DEFINE_NAME} EQUAL 16)
 56 | 	message("Found: Enabling support for 128 bit integers using ${UINT128_NAME}.")
 57 | 	SET(UINT128_FOUND 1)
 58 |   CHECK_128_BIT_HASH_FUNCTION(${UINT128_NAME} HAS_UINT128_STD_HASH)
 59 | 	SET(${VARIABLE} "${DEFINE_NAME}")
 60 |       else()
 61 | 	message("${UINT128_NAME} has incorrect size, can't use.")
 62 |       endif()
 63 |     endif()
 64 |   endif()
 65 | endmacro()
 66 | 
 67 | MACRO(FIND_INT128_TYPES)
 68 | 
 69 |   Check_Int128("long long"  INT128_DEF "HAVEint128_as_long_long")
 70 |   Check_Int128("int128_t"   INT128_DEF "HAVEint128_t")
 71 |   Check_Int128("__int128_t" INT128_DEF "HAVE__int128_t")
 72 |   Check_Int128("__int128"   INT128_DEF "HAVE__int128")
 73 |   Check_Int128("int128"     INT128_DEF "HAVEint128")
 74 | 
 75 |   if(INT128_FOUND)
 76 |     set(INT128_FLAGS "-D${INT128_DEF}")
 77 | 
 78 |     if(HAS_INT128_STD_HASH)
 79 |       set(INT128_FLAGS "${INT128_FLAGS} -DHASH_FOR_INT128_DEFINED")
 80 |     endif()
 81 | 
 82 |   else()
 83 |     message("Compiler/platform support for 128 bit integers not found, falling back to boost mpfr.")
 84 |     set(INT128_FLAGS "")
 85 |   endif()
 86 | 
 87 |   Check_UInt128("unsigned long long"  UINT128_DEF "HAVEuint128_as_u_long_long")
 88 |   Check_UInt128("uint128_t"           UINT128_DEF "HAVEuint128_t")
 89 |   Check_UInt128("__uint128_t"         UINT128_DEF "HAVE__uint128_t")
 90 |   Check_UInt128("__uint128"           UINT128_DEF "HAVE__uint128")
 91 |   Check_UInt128("uint128"             UINT128_DEF "HAVEuint128")
 92 |   Check_UInt128("unsigned __int128_t" UINT128_DEF "HAVEunsigned__int128_t")
 93 |   Check_UInt128("unsigned int128_t"   UINT128_DEF "HAVEunsignedint128_t")
 94 |   Check_UInt128("unsigned __int128"   UINT128_DEF "HAVEunsigned__int128")
 95 |   Check_UInt128("unsigned int128"     UINT128_DEF "HAVEunsignedint128")
 96 | 
 97 |   if(UINT128_FOUND)
 98 |     set(INT128_FLAGS "${INT128_FLAGS} -D${UINT128_DEF}")
 99 | 
100 |     if(HAS_UINT128_STD_HASH)
101 |       set(INT128_FLAGS "${INT128_FLAGS} -DHASH_FOR_UINT128_DEFINED")
102 |     endif()
103 | 
104 |   else()
105 |     message("Compiler/platform support for unsigned 128 bit integers not found, falling back to boost mpfr.")
106 |   endif()
107 | 
108 | endmacro()


--------------------------------------------------------------------------------
/include/binary8.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2017-2019 Emanuele Ruffaldi
  3 |  * Distributed under the terms of the BSD 3-Clause License.  
  4 |  * 
  5 |  * (See accompanying file LICENSE)
  6 |  * 
  7 |  * --
  8 |  */
  9 | /**
 10 |  * Zbinary8tabn library
 11 |  *
 12 |  * Emanuele Ruffaldi 2017
 13 |  */
 14 | #pragma once
 15 | #include <stdint.h>
 16 | #include <iostream>
 17 | #include <limits>
 18 | 
 19 | #ifndef BINARY8_SPEC
 20 | #define BINARY8_SPEC
 21 | #define BINARY8_EXP 5
 22 | #define BINARY8_MAN 2
 23 | #endif
 24 | 
 25 | #include "floattraits.hpp"
 26 | 
 27 | #ifndef FPGAHLS
 28 | inline float uint32_to_float(uint32_t i)
 29 | {
 30 | 	union {
 31 | 		float f;
 32 | 		uint32_t i;
 33 | 	} x;
 34 | 	x.i = i;
 35 | 	return x.f;
 36 | }
 37 | #endif
 38 | 
 39 | 
 40 | namespace binary8ns
 41 | {
 42 | 	extern int8_t opadd[];
 43 | 	extern int8_t opmul[];
 44 | 	extern int8_t opdiv[];
 45 | 	extern int8_t opinv[];
 46 | 	extern int8_t opexp2[];
 47 | 	extern int8_t ophalf[];
 48 | 	extern int8_t opsquare[];
 49 | 	extern uint32_t op2float[];
 50 | }
 51 | 
 52 | class binary8tab
 53 | {
 54 | public:
 55 | 	// template <class T,int totalbits, int esbits, class FT, bool withnan>
 56 | 
 57 | 
 58 | 	using value_t=int8_t;
 59 | 	using fraction_t=uint16_t;
 60 | 	static constexpr int vtotalbits = 8;
 61 | 	static constexpr int vesbits = BINARY8_EXP;
 62 | 	using exponenttype=int32_t; 
 63 | 	using trait_t=any_floattrait<BINARY8_EXP,BINARY8_MAN,int8_t,uint8_t>;
 64 | 	using FT=fraction_t;
 65 | 
 66 | 	struct DeepInit{};
 67 | 
 68 | 	explicit binary8tab(double a);
 69 | 	explicit binary8tab(float a);
 70 | 	binary8tab(int a);
 71 | 	explicit binary8tab(DeepInit , int8_t a) : v(a) {}
 72 | 	binary8tab(): v(0) {}
 73 | 
 74 | 	bool is_negative() const { return v < 0; }
 75 | 	//bool is_nan() const { return trait_t::withnan && v == trait_t::POSIT_NAN; }
 76 | 	//bool is_infinity() const { return (v == trait_t::POSIT_PINF || v == trait_t::POSIT_NINF);}
 77 | 	static binary8tab max() { return binary8tab(DeepInit(),trait_t::max_h); } // 126 or 127
 78 | 	static binary8tab min() { return binary8tab(DeepInit(),trait_t::min_h); } //  ? -126 : -127
 79 | 	static binary8tab one() { return binary8tab(DeepInit(),trait_t::one_h); } // 0x40
 80 | 	static binary8tab afterone() { return binary8tab(DeepInit(),trait_t::afterone_h); } // 0x40
 81 | 	//static binary8tab two() { return binary8tab(DeepInit(),trait_t::two); }
 82 | 	static binary8tab zero() { return binary8tab(DeepInit(),0); }
 83 | 	//static binary8tab onehalf() { return binary8tab(DeepInit(),trait_t::onehalf); }
 84 | 	//static binary8tab pinfinity() { return binary8tab(DeepInit(),trait_t::pinfinity); } // ? 127: -128); }
 85 | 	//static binary8tab ninfinity() { return binary8tab(DeepInit(),trait_t::ninfinity); } //,trait_t::withnan ?-127: -128); }
 86 | 	//static binary8tab nan() { return binary8tab(DeepInit(),trait_t::nan); } // withnan ?-128: 0); }
 87 | 
 88 | 	binary8tab inv() const { return binary8tab(DeepInit(),binary8ns::opinv[uu()]); }
 89 | 	binary8tab half() const { return binary8tab(DeepInit(),binary8ns::ophalf[uu()]);}
 90 | 	binary8tab twice() const;
 91 | 	binary8tab exp2() const { return binary8tab(DeepInit(),binary8ns::opexp2[uu()]); }
 92 | 	binary8tab square() const { return binary8tab(DeepInit(),binary8ns::opsquare[uu()]); }
 93 | 	binary8tab operator-() const { return binary8tab(DeepInit(),-v);  }
 94 | 
 95 | 	friend bool operator<(binary8tab a, binary8tab b) { return a.v < b.v; }
 96 | 	friend bool operator>(binary8tab a, binary8tab b) { return a.v > b.v; }
 97 | 	friend bool operator<=(binary8tab a, binary8tab b) { return a.v <= b.v;}
 98 | 	friend bool operator>=(binary8tab a, binary8tab b) { return a.v >= b.v; }
 99 | 	friend bool operator==(binary8tab a, binary8tab b) { return a.v == b.v; }
100 | 	friend bool operator!=(binary8tab a, binary8tab b) { return a.v !=b.v; }
101 | 
102 | 	friend binary8tab operator+(binary8tab a, binary8tab b) { return binary8tab(DeepInit(),binary8ns::opadd[a.uu()*256+b.uu()]); }
103 | 	friend binary8tab operator*(binary8tab a, binary8tab b) { return binary8tab(DeepInit(),binary8ns::opmul[a.uu()*256+b.uu()]); }
104 | 	friend binary8tab operator-(binary8tab a, binary8tab b) { return a+(-b); }
105 | 	friend binary8tab operator/(binary8tab a, binary8tab b) { return binary8tab(DeepInit(),binary8ns::opdiv[a.uu()*256+b.uu()]); }
106 | 	friend std::ostream & operator << (std::ostream & ons, const binary8tab & p);
107 | 
108 | 	binary8tab& operator+=(const binary8tab &a) { binary8tab r = *this+a; v = r.v; return *this; }
109 | 	binary8tab& operator*=(const binary8tab &a) { binary8tab r = *this*a; v = r.v; return *this; }
110 | 
111 | 	unsigned int uu() const { return (unsigned int)(uint8_t)v;}
112 | 
113 | 	operator float() const { return uint32_to_float(binary8ns::op2float[uu()]); }
114 | 	operator double() const { return (float)*this; }
115 | 	operator int() const { return (float)*this; }
116 | 
117 | 	int8_t v;
118 | 
119 | };
120 | 
121 | inline binary8tab half(binary8tab z) { return z.half(); }
122 | 
123 | inline binary8tab twice(binary8tab z) { return z.twice(); }
124 | 
125 | inline binary8tab square(binary8tab z) { return z.square(); }
126 | 
127 | inline binary8tab inv(binary8tab x) { return x.inv(); }
128 | 
129 | inline binary8tab neg(binary8tab z) { return -z; }
130 | 
131 | inline binary8tab exp2(binary8tab z) { return z.exp2(); }
132 | 
133 | inline bool is_negative(binary8tab a)
134 | {
135 | 	return a.v < 0;
136 | }
137 | 
138 | 
139 | namespace std {
140 |     template<> class numeric_limits<binary8tab> {
141 |     public:
142 |        static binary8tab max() {return binary8tab::max(); };
143 |        static binary8tab min() {return binary8tab::min(); };
144 |        static binary8tab epsilon() {return binary8tab::afterone()-binary8tab::one(); };
145 |         // One can implement other methods if needed
146 |     };
147 | }
148 | 
149 | 
150 | 
151 | namespace std
152 | {
153 | 	inline binary8tab abs(binary8tab z) 
154 | 	{
155 | 		return binary8tab(binary8tab::DeepInit(),z.v & 0x7F);
156 | 	}
157 | 
158 | 	inline binary8tab min(binary8tab a, binary8tab b)
159 | 	{
160 | 		return a <=  b ? a : b;
161 | 	}
162 | 
163 | 	inline binary8tab max(binary8tab a, binary8tab b)
164 | 	{
165 | 		return a >= b ? a : b;
166 | 	}
167 | }
168 | 
169 | inline std::ostream & operator << (std::ostream & ons, const binary8tab & p)
170 | {
171 | 	ons << "binary8tab(" << p.v << ")" ;
172 | 	return ons;
173 | }


--------------------------------------------------------------------------------
/include/exp2.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2017-2019 Emanuele Ruffaldi
  3 |  * Distributed under the terms of the BSD 3-Clause License.  
  4 |  * 
  5 |  * (See accompanying file LICENSE)
  6 |  * 
  7 |  * --
  8 |  */
  9 | /**
 10 |  - express the constants as integer and then use constexpr conversion to the Unpacked template
 11 |  - implement polevl and p1evl
 12 |  - implement ldexp
 13 |  
 14 |  */
 15 | // https://github.com/aterrel/libdynd/blob/master/thirdparty/cephes/exp2.c
 16 | // https://github.com/jeremybarnes/cephes/blob/master/cprob/polevl.c
 17 | 
 18 | #ifdef UNK
 19 | static double P[] = {
 20 |     2.30933477057345225087E-2,
 21 |     2.02020656693165307700E1,
 22 |     1.51390680115615096133E3,
 23 | };
 24 | 
 25 | static double Q[] = {
 26 |     /* 1.00000000000000000000E0, */
 27 |     2.33184211722314911771E2,
 28 |     4.36821166879210612817E3,
 29 | };
 30 | 
 31 | 
 32 | #ifdef IBMPC
 33 | static unsigned short P[] = {
 34 |     0xead3, 0x549a, 0xa5c8, 0x3f97,
 35 |     0x5bde, 0x9361, 0x33ba, 0x4034,
 36 |     0x7693, 0x907b, 0xa7a0, 0x4097,
 37 | };
 38 | 
 39 | static unsigned short Q[] = {
 40 |     /*0x0000,0x0000,0x0000,0x3ff0, */
 41 |     0x5c3c, 0x0ffb, 0x25e5, 0x406d,
 42 |     0x0bae, 0x2fed, 0x1036, 0x40b1,
 43 | };
 44 | 
 45 | template <class FT, class ES>
 46 | Unpacked<FT,ES> polevl(Unpacked<FT,ES> x)
 47 | {
 48 | 
 49 | }
 50 | 
 51 | template <class FT, class ES>
 52 | Unpacked<FT,ES> p1evl(Unpacked<FT,ES> x)
 53 | {
 54 | 	
 55 | }
 56 | 
 57 | template <class FT, class ES>
 58 | Unpacked<FT,ES> ldexp(Unpacked<FT,ES> x, int n)
 59 | {
 60 | 	
 61 | }
 62 | 
 63 | template <class FT, class ES>
 64 | Unpacked<FT,ES> x exp2(Unpacked<FT,ES> x)
 65 | {
 66 | 	using UT=Unpacked<FT,ES>;
 67 | 
 68 | 	switch(x.type)
 69 | 	{
 70 | 		case UT::NaN: return x;
 71 | 		case UT::Infinity: return x.negativeSign ? UT(UT::Zero) : x;
 72 | 		default:
 73 | 			break;
 74 | 	}
 75 |     if(x > 1024) // TODO
 76 |     	return UT(UT::Infinity);
 77 |     else if(x < -1024) // TODO
 78 |     	return UT(UT::Zero); 
 79 |     T px, xx;
 80 |     short n;
 81 | 
 82 |     xx = x;			/* save x */
 83 |     /* separate into integer and fractional parts */
 84 |     px = floor(x + 0.5);
 85 |     n = px;
 86 |     x = x - px;
 87 | 
 88 |     /* rational approximation
 89 |      * exp2(x) = 1 +  2xP(xx)/(Q(xx) - P(xx))
 90 |      * where xx = x**2
 91 |      */
 92 |     xx = x * x;
 93 |     px = x * polevl(xx, P, 2);
 94 |     x = px / (p1evl(xx, Q, 2) - px);
 95 |     x = 1.0 + ldexp(x, 1);
 96 | 
 97 |     /* scale by power of 2 */
 98 |     x = ldexp(x, n);
 99 |     return (x);
100 | }


--------------------------------------------------------------------------------
/include/exp2t.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2017-2019 Emanuele Ruffaldi
 3 |  * Distributed under the terms of the BSD 3-Clause License.  
 4 |  * 
 5 |  * (See accompanying file LICENSE)
 6 |  * 
 7 |  * --
 8 |  */
 9 | #pragma once
10 | 
11 | template <class T>
12 | struct exp2t_
13 | {
14 | 	T operator() (T a)
15 | 	{
16 | 		return (T)(exp2((double)a));
17 | 	}
18 | };
19 | 
20 | 
21 | 
22 | template <class T>
23 | T exp2t(T a)
24 | {
25 | 	return exp2t_<T>()(a);
26 | }


--------------------------------------------------------------------------------
/include/fixedtraits.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2017-2019 Emanuele Ruffaldi
 3 |  * Distributed under the terms of the BSD 3-Clause License.  
 4 |  * 
 5 |  * (See accompanying file LICENSE)
 6 |  * 
 7 |  * --
 8 |  */
 9 | #pragma once
10 | 
11 | 
12 | template <class T, int N, int F>
13 | struct fixedtrait
14 | {
15 |     static_assert(sizeof(T)*8 <= N,"fixedtrait holding type is too small");
16 |     static_assert(N > 0,"fixedtrait total bits should be positive");
17 |     static_assert(F <= N && F >= 0,"fraction bits should be less than N and not negative");
18 |     static_assert(std::is_integral<T>::value && std::is_signed<T>::value,"only for signed integrals");
19 | 	using value_t = T;
20 | 	static constexpr int totalbits = N;
21 | 	static constexpr int fraction_bits = F;
22 | };
23 | 
24 | #if 0
25 | #if __cplusplus >= 201402L
26 | template <class T>
27 | struct fixedtrait<T,sizeof(T)*8,0>
28 | {
29 |     static_assert(std::is_integral<T>::value && std::is_signed<T>::value,"only for signed integrals");
30 | 	using value_t = T;
31 | 	static constexpr int totalbits = sizeof(T)*8;
32 | 	static constexpr int fraction_bits = 0;
33 | };
34 | #endif
35 | #endif
36 | 


--------------------------------------------------------------------------------
/include/float12.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2017-2019 Emanuele Ruffaldi
  3 |  * Distributed under the terms of the BSD 3-Clause License.  
  4 |  * 
  5 |  * (See accompanying file LICENSE)
  6 |  * 
  7 |  * --
  8 |  */
  9 | #include <stdbool.h>
 10 | #include <stdint.h>
 11 | #include "platform.h"
 12 | #include "internals.h"
 13 | #include "specialize.h"
 14 | #include "softfloat.h"
 15 | 
 16 | struct ui12_f12
 17 | {
 18 | 
 19 | };
 20 | float12_t
 21 | 
 22 | #define packToF12UI( sign, exp, sig ) (((uint12_t) (sign)<<15) + ((uint12_t) (exp)<<10) + (sig))
 23 | #define softfloat_commonNaNToF12UI(aPtr) (uint_fast12_t) ((aPtr)->sign<<15 | 0x7E00 | aPtr->v64>>54)
 24 | 
 25 | 
 26 | int_float12_t
 27 | uint_float12_t
 28 | 
 29 | // f8  is typically 1 4 3
 30 | // f12 is typically 1 5 6 or 1 4 7
 31 | // f16 is           1 6 9
 32 | template <int exponentbits, int mantissabits, int mantissamask>
 33 | float12_t f32_to_f12( float32_t a )
 34 | {
 35 |     union ui32_f32 uA;
 36 |     uint_fast32_t uiA;
 37 |     bool sign;
 38 |     int_fast12_t exp;
 39 |     uint_fast32_t frac;
 40 |     struct commonNaN commonNaN;
 41 |     uint_fast12_t uiZ, frac16;
 42 |     union ui12_f12 uZ;
 43 | 
 44 |     /*------------------------------------------------------------------------
 45 |     *------------------------------------------------------------------------*/
 46 |     uA.f = a;
 47 |     uiA = uA.ui;
 48 |     sign = signF32UI( uiA );
 49 |     exp  = expF32UI( uiA );
 50 |     frac = fracF32UI( uiA );
 51 |     /*------------------------------------------------------------------------
 52 |     *------------------------------------------------------------------------*/
 53 |     if ( exp == 0xFF ) {
 54 |         if ( frac ) {
 55 |             softfloat_f32UIToCommonNaN( uiA, &commonNaN );
 56 |             uiZ = softfloat_commonNaNToF12UI( &commonNaN );
 57 |         } else {
 58 |             uiZ = packToF12UI( sign, 0x1F, 0 );
 59 |         }
 60 |         goto uiZ;
 61 |     }
 62 |     /*------------------------------------------------------------------------
 63 |     *------------------------------------------------------------------------*/
 64 |     frac16 = frac>> mantissabits| ((frac & mantissamask) != 0);
 65 |     if ( ! (exp | frac16) ) {
 66 |         uiZ = packToF12UI( sign, 0, 0 );
 67 |         goto uiZ;
 68 |     }
 69 |     /*------------------------------------------------------------------------
 70 |     *------------------------------------------------------------------------*/
 71 |     return softfloat_roundPackToF12( sign, exp - 0x71, frac16 | 0x4000 );
 72 |  uiZ:
 73 |     uZ.ui = uiZ;
 74 |     return uZ.f;
 75 | 
 76 | }
 77 | 
 78 | 
 79 | 
 80 | float12_t
 81 |  softfloat_roundPackToF12( bool sign, int_fast12_t exp, uint_fast12_t sig )
 82 | {
 83 |     uint_fast8_t roundingMode;
 84 |     bool roundNearEven;
 85 |     uint_fast8_t roundIncrement, roundBits;
 86 |     bool isTiny;
 87 |     uint_fast12_t uiZ;
 88 |     union ui12_f12 uZ;
 89 | 
 90 |     /*------------------------------------------------------------------------
 91 |     *------------------------------------------------------------------------*/
 92 |     roundingMode = softfloat_roundingMode;
 93 |     roundNearEven = (roundingMode == softfloat_round_near_even);
 94 |     roundIncrement = 0x8;
 95 |     if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
 96 |         roundIncrement =
 97 |             (roundingMode
 98 |                  == (sign ? softfloat_round_min : softfloat_round_max))
 99 |                 ? 0xF
100 |                 : 0;
101 |     }
102 |     roundBits = sig & 0xF;
103 |     /*------------------------------------------------------------------------
104 |     *------------------------------------------------------------------------*/
105 |     if ( 0x1D <= (unsigned int) exp ) {
106 |         if ( exp < 0 ) {
107 |             /*----------------------------------------------------------------
108 |             *----------------------------------------------------------------*/
109 |             isTiny =
110 |                 (softfloat_detectTininess == softfloat_tininess_beforeRounding)
111 |                     || (exp < -1) || (sig + roundIncrement < 0x8000);
112 |             sig = softfloat_shiftRightJam32( sig, -exp );
113 |             exp = 0;
114 |             roundBits = sig & 0xF;
115 |             if ( isTiny && roundBits ) {
116 |                 softfloat_raiseFlags( softfloat_flag_underflow );
117 |             }
118 |         } else if ( (0x1D < exp) || (0x8000 <= sig + roundIncrement) ) {
119 |             /*----------------------------------------------------------------
120 |             *----------------------------------------------------------------*/
121 |             softfloat_raiseFlags(
122 |                 softfloat_flag_overflow | softfloat_flag_inexact );
123 |             uiZ = packToF12UI( sign, 0x1F, 0 ) - ! roundIncrement;
124 |             goto uiZ;
125 |         }
126 |     }
127 |     /*------------------------------------------------------------------------
128 |     *------------------------------------------------------------------------*/
129 |     sig = (sig + roundIncrement)>>4;
130 |     if ( roundBits ) {
131 |         softfloat_exceptionFlags |= softfloat_flag_inexact;
132 | #ifdef SOFTFLOAT_ROUND_ODD
133 |         if ( roundingMode == softfloat_round_odd ) {
134 |             sig |= 1;
135 |             goto packReturn;
136 |         }
137 | #endif
138 |     }
139 |     sig &= ~(uint_fast12_t) (! (roundBits ^ 8) & roundNearEven);
140 |     if ( ! sig ) exp = 0;
141 |     /*------------------------------------------------------------------------
142 |     *------------------------------------------------------------------------*/
143 |  packReturn:
144 |     uiZ = packToF12UI( sign, exp, sig );
145 |  uiZ:
146 |     uZ.ui = uiZ;
147 |     return uZ.f;
148 | 
149 | }
150 | 
151 | struct exp8_sig16 softfloat_normSubnormalF12Sig( uint_fast12_t sig )
152 | {
153 |     int_fast8_t shiftDist;
154 |     struct exp8_sig16 z;
155 | 
156 |     shiftDist = softfloat_countLeadingZeros16( sig ) - 5; // TODO
157 |     z.exp = 1 - shiftDist;
158 |     z.sig = sig<<shiftDist;
159 |     return z;
160 | 
161 | }
162 | 
163 | 
164 | void softfloat_f12UIToCommonNaN( uint_fast12_t uiA, struct commonNaN *zPtr )
165 | {
166 |     if ( softfloat_isSigNaNF12UI( uiA ) ) {
167 |         softfloat_raiseFlags( softfloat_flag_invalid );
168 |     }
169 |     zPtr->sign = uiA>>15;
170 |     zPtr->v64  = (uint_fast64_t) uiA<<54;
171 |     zPtr->v0   = 0;
172 | }
173 | 
174 | 
175 | float32_t f12_to_f32( float12_t a )
176 | {
177 |     union ui12_f12 uA;
178 |     uint_fast12_t uiA;
179 |     bool sign;
180 |     int_fast8_t exp;
181 |     uint_fast12_t frac;
182 |     struct commonNaN commonNaN;
183 |     uint_fast32_t uiZ;
184 |     struct exp8_sig16 normExpSig;
185 |     union ui32_f32 uZ;
186 | 
187 |     /*------------------------------------------------------------------------
188 |     *------------------------------------------------------------------------*/
189 |     uA.f = a;
190 |     uiA = uA.ui;
191 |     sign = signF12UI( uiA );
192 |     exp  = expF12UI( uiA );
193 |     frac = fracF12UI( uiA );
194 |     /*------------------------------------------------------------------------
195 |     *------------------------------------------------------------------------*/
196 |     if ( exp == 0x1F ) {
197 |         if ( frac ) {
198 |             softfloat_f12UIToCommonNaN( uiA, &commonNaN );
199 |             uiZ = softfloat_commonNaNToF32UI( &commonNaN );
200 |         } else {
201 |             uiZ = packToF32UI( sign, 0xFF, 0 );
202 |         }
203 |         goto uiZ;
204 |     }
205 |     /*------------------------------------------------------------------------
206 |     *------------------------------------------------------------------------*/
207 |     if ( ! exp ) {
208 |         if ( ! frac ) {
209 |             uiZ = packToF32UI( sign, 0, 0 );
210 |             goto uiZ;
211 |         }
212 |         normExpSig = softfloat_normSubnormalF12Sig( frac );
213 |         exp = normExpSig.exp - 1;
214 |         frac = normExpSig.sig;
215 |     }
216 |     /*------------------------------------------------------------------------
217 |     *------------------------------------------------------------------------*/
218 |     uiZ = packToF32UI( sign, exp + 0x70, (uint_fast32_t) frac<<13 );
219 |  uiZ:
220 |     uZ.ui = uiZ;
221 |     return uZ.f;
222 | 
223 | }
224 | 
225 | 
226 | 


--------------------------------------------------------------------------------
/include/float16native32.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2017-2019 Emanuele Ruffaldi
  3 |  * Distributed under the terms of the BSD 3-Clause License.  
  4 |  * 
  5 |  * (See accompanying file LICENSE)
  6 |  * 
  7 |  * --
  8 |  */
  9 | /**
 10 |  * Float 16 native 32
 11 |  * Emanueel Ruffaldi 2017
 12 |  */
 13 | #pragma once
 14 | #include <cmath>
 15 | #include <iostream>
 16 | #include "unpacked.h"
 17 | 
 18 | using float16n_t = halffloat;
 19 | 
 20 | // use native F16C operation for conversion back & forth
 21 | // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi16
 22 | // https://db.in.tum.de/~finis/x86-intrin-cheatsheet-v2.1.pdf
 23 | #if defined(__AVX__) && defined(__F16C__)
 24 | 
 25 | // for _mm_cvtph_ps 
 26 | // for _mm_cvtss_f32
 27 | // for _mm_cvtsi128_si32
 28 | // for _mm_extract_epi16 SSE2
 29 | #include <immintrin.h>
 30 | 
 31 | 
 32 | // these intrinsics are rot constexpr
 33 | inline float float16nto32(float16n_t x)
 34 | {	
 35 | 	// FP16C cost (4-5,1)
 36 | 	return _mm_cvtss_f32(_mm_cvtph_ps( _mm_set1_epi16 (x.what) ));
 37 | }
 38 | 
 39 | inline float16n_t float32to16n(float x)
 40 | {
 41 | 	// rounding from 32 to 16
 42 | 	// _mm_cvtsi128_si32(x) &0xFFFF  with cost (2,1)
 43 | 	// 		or
 44 | 	// _mm_extract(x,0)  cost (3,1)
 45 | 	return float16n_t((uint16_t)(_mm_cvtsi128_si32(_mm_cvtps_ph( _mm_set_ps1 (x),_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC )) & 0xFFFF));
 46 | }
 47 | 
 48 | #else
 49 | 
 50 | // TODO: use optimized quicker version
 51 | // TODO: constexpr requires floatconst2bits.hpp
 52 | inline float float16nto32(float16n_t x)
 53 | {	
 54 | 	// template <class SrcTrait, class DstTrait, class FT>
 55 | 	// constexpr DstTrait::holder_t convertfloats(SrcTrait::holder_t src)
 56 |     return Unpacked<uint32_t,int>::template make_floati<half_trait>(x.what).template pack_xfloati<single_trait>();
 57 | }
 58 | 
 59 | // TODO: use optimized quicker version
 60 | inline float16n_t float32to16n(float x)
 61 | {
 62 |     return float16n_t(Unpacked<uint32_t,int>::template make_float<single_trait>(x).template pack_xfloati<half_trait>());
 63 | }
 64 | 
 65 | 
 66 | #endif
 67 | 
 68 | 
 69 | struct float16n32
 70 | {
 71 | 	using trait_t = half_trait;
 72 | 	using value_t= float16n_t;
 73 | 	using impl_t = float;
 74 | 	explicit float16n32() : value (0) {}
 75 | 	explicit float16n32(float f): value(float32to16n(f)) {}
 76 | 	explicit float16n32(float16n_t f): value(f) {}
 77 | 	explicit constexpr float16n32(bool , uint16_t x): value(x) {}
 78 | 
 79 | 	operator float () const { return float16nto32(value); }
 80 | 	operator double () const { return float16nto32(value); }
 81 | 	operator float16n_t() const { return value; }
 82 | 
 83 | 	uint16_t raw() const { return value.what; }
 84 | 	constexpr float16n32 abs()  const { return float16n32(0,value.what & ~0x8000);  }  // could be >= infinity because infinity is sign symmetric
 85 | 	constexpr float16n32 neg()  const { return float16n32(0,value.what ^ 0x8000); }; 
 86 | 
 87 | 	/// replace them with constants
 88 | 	static float16n32 two() { return float16n32(false,trait_t::two_h); };
 89 | 	static float16n32 one() { return float16n32(false,trait_t::one_h); };
 90 | 	static float16n32 zero() { return float16n32(false,0); };
 91 | 
 92 | 	friend float16n32 operator+ (float16n32 a, float16n32 b)  { return float16n32((impl_t)a+(impl_t)b); }
 93 | 	friend float16n32 operator- (float16n32 a, float16n32 b)  { return float16n32((impl_t)a-(impl_t)b); }
 94 | 	friend float16n32 operator* (float16n32 a, float16n32 b)  { return float16n32((impl_t)a*(impl_t)b); }
 95 | 	friend float16n32 operator/ (float16n32 a, float16n32 b)  { return float16n32((impl_t)a/(impl_t)b); }
 96 | 	float16n32 & operator+=(const float16n32 &a) { float16n32 x = *this + a; value = x.value; return *this; }
 97 | 	float16n32 & operator*=(const float16n32 &a) { float16n32 x = *this * a; value = x.value; return *this; }
 98 | 
 99 | 	float16n32 operator-() const {		return neg(); }
100 | 
101 | 	friend float16n32 rem(float16n32 a,float16n32 b)  { return float16n32(remainder((impl_t)a,(impl_t)b));  }
102 | 	friend float16n32 sqrt(float16n32 a) { return float16n32(sqrt((impl_t)a));  }
103 | 	friend float16n32 mulAdd(float16n32 a, float16n32 b, float16n32 c) { return float16n32((impl_t)a*(impl_t)b+(impl_t)c); }
104 | 
105 | 	friend float16n32 half(float16n32 a) { return a/two(); }
106 | 	friend float16n32 square(float16n32 a) { return a*a; }
107 | 	friend float16n32 inv(float16n32 a) { return one()/a; }
108 | 
109 | 	friend bool operator == (float16n32 a, float16n32 b)  { return ((impl_t)a==(impl_t)b); }
110 | 	friend bool operator < (float16n32 a, float16n32 b)  { return ((impl_t)a<(impl_t)b); }
111 | 	friend bool operator > (float16n32 a, float16n32 b)  { return ((impl_t)a>(impl_t)b); }
112 | 	friend bool operator >= (float16n32 a, float16n32 b)  { return ((impl_t)a>=(impl_t)b); }
113 | 	friend bool operator <= (float16n32 a, float16n32 b)  { return ((impl_t)a<=(impl_t)b); }
114 | 
115 | 	friend std::ostream & operator << (std::ostream & ons, const float16n32 & v)
116 | 	{
117 | 		ons << "f16(" << ((impl_t)v) << ")";
118 | 		return ons;
119 | 	}
120 | 
121 | private:
122 | 	value_t value;
123 | };
124 | 
125 | namespace std
126 | {
127 | 	inline CONSTEXPR14 float16n32 abs(float16n32 z)
128 | 	{
129 | 		return z.abs();
130 | 	}
131 | }
132 | 
133 | #if 0
134 | namespace std
135 | {
136 | 	template <>
137 | 	struct numeric_limits<float16n32>
138 | 	{
139 | 		using T=float16n32;
140 | 		using FT=typename T::value_t;
141 | 		using TT=typename T::trait_t;
142 | 		//static constexpr T mk(uint32_t v) { return T(false,v);}
143 | 		#define mk(x) T(FT({x}))
144 | 		static constexpr bool is_specialized = true;
145 | 		static constexpr T min() noexcept { return mk(TT::min_h); }
146 | 		static constexpr T max() noexcept { return mk(TT::max_h); }
147 | 		static constexpr T lowest() noexcept { return mk(TT::lowest_h); }
148 | 		static constexpr int  digits = 24; //number of digits (in radix base) in the mantissa 
149 | 		static constexpr int  digits10 = 6;
150 | 		static constexpr bool is_signed = true;
151 | 		static constexpr bool is_integer = false;
152 | 		static constexpr bool is_exact = false;
153 | 		static constexpr int radix = 2;
154 | 		static constexpr T epsilon() noexcept { return mk(TT::epsilon_h); }
155 | 		//static constexpr T round_error() noexcept { return T(); } // 0.5f
156 | 
157 | 		// this is also the maximum integer
158 | 		static constexpr int  min_exponent = -126;
159 | 		static constexpr int  min_exponent10 = -37;
160 | 		static constexpr int  max_exponent = 127;
161 | 		static constexpr int  max_exponent10 = 38;
162 | 
163 | 		static constexpr bool has_infinity = true;
164 | 		static constexpr bool has_quiet_NaN = true;
165 | 		static constexpr bool has_signaling_NaN = false;
166 | 		static constexpr float_denorm_style has_denorm = std::denorm_present;
167 | 		static constexpr bool has_denorm_loss = false;
168 | 		static constexpr T infinity() noexcept { return mk(TT::pinfinity_h); }
169 | 		static constexpr T quiet_NaN() noexcept { return mk(TT::nan_h); }
170 | 		static constexpr T signaling_NaN() noexcept { return mk(TT::nan_h); }
171 | 		static constexpr T denorm_min() noexcept { return mk(TT::denorm_min_h); }
172 | 
173 | 		static constexpr bool is_iec559 = true;
174 | 		static constexpr bool is_bounded = true;
175 | 		static constexpr bool is_modulo = false;
176 | 
177 | 		static constexpr bool traps = false;
178 | 		static constexpr bool tinyness_before = false;
179 | 		#undef mk
180 | 	};	
181 | }
182 | #endif


--------------------------------------------------------------------------------
/include/float2posit.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2017-2019 Emanuele Ruffaldi
  3 |  * Distributed under the terms of the BSD 3-Clause License.  
  4 |  * 
  5 |  * (See accompanying file LICENSE)
  6 |  * 
  7 |  * --
  8 |  */
  9 | /**
 10 |  * Expandedn one
 11 |  *
 12 |  g++ -I. --std=c++14 float2posit.hpp  -DTESTFLOAT2POSIT -c
 13 |  */
 14 | #include "posit.h"
 15 | #include "floatconst2bits.hpp"
 16 | 
 17 | //template <class T,int totalbits, int esbits, class FT, bool withnan, class Trait>
 18 | template <class PP, class Trait>
 19 | CONSTEXPR14 PP float2posit(typename Trait::holder_t value)
 20 | {
 21 | 	//using PP =  Posit<T,totalbits,esbits,FT,withnan>;
 22 |     using PT=typename PP::PT; // trait
 23 |     constexpr auto totalbits = PP::vtotalbits;
 24 |     constexpr auto esbits = PP::vesbits;
 25 |     using FT=typename PP::fraction_t;
 26 |     using POSIT_UTYPE = typename PT::POSIT_UTYPE;
 27 |     using POSIT_STYPE = typename PT::POSIT_STYPE;
 28 |     using ET=typename PT::exponenttype;
 29 |     using UT=Unpacked<FT,typename PT::exponenttype>;
 30 | 
 31 |     // Phase 2: float to expanded (Unpacked) handling special cases
 32 | 
 33 |     ET rawexp = bitset_getT(value,Trait::fraction_bits,Trait::exponent_bits) ;
 34 |     bool negativeSign = value & (((typename Trait::holder_t)1) << (Trait::data_bits-1));
 35 | 	auto exponentF = rawexp - Trait::exponent_bias; // ((un.u >> Trait::fraction_bits) & Trait::exponent_mask)
 36 |     auto fractionF = cast_right_to_left<typename Trait::holder_t,Trait::fraction_bits,FT,UT::POSIT_FRAC_TYPE_SIZE_BITS>()(value);
 37 | 
 38 | 	if(rawexp == ((1 << Trait::exponent_bits)-1))
 39 | 	{
 40 | 		if(fractionF == 0)
 41 | 		{
 42 | 			return PP(typename PP::DeepInit(),negativeSign ? PT::POSIT_NINF : PT::POSIT_PINF);
 43 | 		}
 44 | 		else
 45 | 		{
 46 | 			return PP(typename PP::DeepInit(),PT::POSIT_NAN);
 47 | 		}
 48 | 	}
 49 | 	else if (rawexp == 0)
 50 |     {
 51 |         if(fractionF == 0)
 52 |         {
 53 |             negativeSign = false;
 54 |             return PP::zero();
 55 |         }
 56 |         else
 57 |         {
 58 |             int k = findbitleftmostC(fractionF);
 59 | 
 60 |             exponentF -= k;
 61 |             fractionF <<= k+1; // plus normalization
 62 |         }	
 63 |    	}
 64 | 
 65 |     // Phase 3: compute low as regime (Unpacked_Low)
 66 | 
 67 | 	auto eexponent = clamp<decltype(exponentF)>(exponentF,PT::minexponent,PT::maxexponent); // no overflow
 68 | 	auto rr = PT::split_reg_exp(exponentF);
 69 | 	auto fraction = cast_msb<FT,sizeof(FT)*8,typename PT::POSIT_UTYPE,sizeof(typename PT::POSIT_UTYPE)*8>()(fractionF); 
 70 | 	auto reg = rr.first;
 71 | 	auto exp = rr.second;
 72 | 
 73 | 	// Phase 4: UnpackedLow to Posit
 74 | 
 75 |     auto rs = -reg+1 > reg+2 ? -reg+1:reg+2; //std::max(-reg + 1, reg + 2);  MSVC issue
 76 |     auto es = (totalbits-rs-1) < esbits ? (totalbits-rs-1): esbits; //std::min((int)(totalbits-rs-1),(int)esbits);  MSVC issue
 77 | 
 78 |     POSIT_UTYPE regbits = reg < 0 ? (PT::POSIT_HOLDER_MSB >> -reg) : (PT::POSIT_MASK << (PT::POSIT_HOLDER_SIZE-(reg+1))); // reg+1 bits on the left
 79 | 	POSIT_UTYPE eexp = msb_exp<POSIT_UTYPE,PT::POSIT_HOLDER_SIZE,esbits,(esbits == 00)>()(exp);
 80 | 	POSIT_STYPE p = ((fraction >> (rs+es+1)) | (eexp >> (rs+1)) | (regbits>>1)) >> (sizeof(PP)*8-totalbits);
 81 | 
 82 |     return PP(typename PP::DeepInit(),negativeSign ? -p : p);
 83 | }
 84 | 
 85 | template <class PP, class Trait>
 86 | //template <class T,int totalbits, int esbits, class FT, bool withnan, class Trait>
 87 | PP float2positF(typename Trait::value_t fvalue)
 88 | {
 89 |     union {
 90 |         typename Trait::holder_t i;
 91 |         typename Trait::value_t  f; 
 92 |     } uu;
 93 |     uu.f = fvalue;
 94 |     return float2posit<PP,Trait>(uu.i);
 95 | }
 96 | 
 97 | template <class PP>
 98 | CONSTEXPR14 PP float2positx(float value)
 99 | {
100 |     return float2posit<PP,single_trait>(float2bits(value));
101 | }
102 | 
103 | template <class PP>
104 | CONSTEXPR14 PP float2positx(double value)
105 | {
106 |     return float2posit<PP,double_trait>(float2bits(value));
107 | }
108 | 
109 | 
110 | #ifdef TESTFLOAT2POSIT
111 | 
112 | // import struct; print("%04X" % struct.unpack("I",struct.pack('f',3.5)))
113 | using PP=Posit<int16_t,16, 2, uint16_t, false>;
114 | enum Q : int16_t { 
115 |     v1 = float2posit<PP,single_trait>(0x40600000).v,
116 |     v2 = float2posit<PP,single_trait>(float2bits(3.5f)).v,
117 |     v3 = float2positx<PP>(3.5f).v
118 |     //v4 = float2posit<PP,double_trait>(float2bits(3.5)).v,
119 | 
120 | };
121 | 
122 | int main(int argc, char const *argv[])
123 | {
124 |     std::cout << PP(typename PP::DeepInit(),Q::v1) << " " << (float)PP(typename PP::DeepInit(),Q::v1) << std::endl;
125 |     std::cout << PP(typename PP::DeepInit(),Q::v2) << std::endl;
126 |     std::cout << PP(typename PP::DeepInit(),Q::v3) << std::endl;
127 |     //std::cout << PP(typename PP::DeepInit(),Q::v4) << std::endl; // FAILS
128 |     return 0;
129 | }
130 | #endif


--------------------------------------------------------------------------------
/include/floatconst2bits.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2017-2019 Emanuele Ruffaldi
  3 |  * Distributed under the terms of the BSD 3-Clause License.  
  4 |  * 
  5 |  * (See accompanying file LICENSE)
  6 |  * 
  7 |  * --
  8 |  */
  9 | #pragma once
 10 | #include <floattraits.hpp>
 11 | // http://brnz.org/hbr/?p=1518
 12 | // Based on code from 
 13 | // https://graphics.stanford.edu/~seander/bithacks.html
 14 | //
 15 | // TODO double version by Emanuele Ruffaldi
 16 | constexpr int count_leading_zeroes(uint64_t v) 
 17 | { 
 18 |   constexpr char bit_position[64] = {  
 19 |      0,  1,  2,  7,  3, 13,  8, 19,  4, 25, 14, 28,  9, 34, 20, 40, 
 20 |      5, 17, 26, 38, 15, 46, 29, 48, 10, 31, 35, 54, 21, 50, 41, 57, 
 21 |     63,  6, 12, 18, 24, 27, 33, 39, 16, 37, 45, 47, 30, 53, 49, 56, 
 22 |     62, 11, 23, 32, 36, 44, 52, 55, 61, 22, 43, 51, 60, 42, 59, 58 }; 
 23 |     
 24 |   v |= v >> 1; // first round down to one less than a power of 2  
 25 |   v |= v >> 2; 
 26 |   v |= v >> 4; 
 27 |   v |= v >> 8; 
 28 |   v |= v >> 16; 
 29 |   v |= v >> 32; 
 30 |   v = (v >> 1) + 1; 
 31 |     
 32 |   return 63 - bit_position[(v * 0x0218a392cd3d5dbf)>>58]; // [3]
 33 | }
 34 |  
 35 | // original 
 36 | constexpr uint32_t float2bits(float f) 
 37 | { 
 38 |   if (f == 0.0f) 
 39 |     return 0; // also matches -0.0f and gives wrong result 
 40 |   else if (f == INFINITY) 
 41 |     return 0x7f800000; 
 42 |   else if (f == -INFINITY) 
 43 |     return 0xff800000; 
 44 |   else if (f != f) // NaN 
 45 |     return 0x7fc00000; // This is my NaN...
 46 |   
 47 |   uint32_t  sign = f < 0.0f; 
 48 |   float abs_f = sign ? -f : f; 
 49 |   
 50 |   int exponent = 254; 
 51 |   
 52 |   while(abs_f < 0x1p87f) 
 53 |   { 
 54 |     abs_f *= 0x1p41f; 
 55 |     exponent -= 41; 
 56 |   } 
 57 |   
 58 |   auto a = (uint64_t)(abs_f * 0x1p-64f); 
 59 |   int lz = count_leading_zeroes(a);
 60 |   exponent -= lz;
 61 |   
 62 |   if (exponent <= 0) 
 63 |   { 
 64 |     exponent = 0; 
 65 |     lz = 8 - 1;
 66 |   } 
 67 |   
 68 |   uint32_t significand = (a << (lz + 1)) >> (64 - 23); // [3]
 69 |   return (sign << 31) | (exponent << 23) | significand; 
 70 | }
 71 | 
 72 | #if 0
 73 | // GENERALIZE for double 
 74 | template <class float_trait>
 75 | constexpr typename float_trait::holder_t float2bitsx(typename float_trait::value_t f) 
 76 | { 
 77 |   if (f == float_trait::zero) 
 78 |     return 0; // also matches -0.0f and gives wrong result 
 79 |   else if (f == float_trait::pinfinity) 
 80 |     return float_trait::pinfinity_h; 
 81 |   else if (f == float_trait::ninfinity) 
 82 |     return float_trait::ninfinity_h;  
 83 |   else if (f != f) // NaN 
 84 |     return float_trait::nan_h; // This is my NaN... 
 85 |   
 86 |   typename float_trait::holder_t  sign = f < float_trait::zero; 
 87 |   typename float_trait::value_t abs_f = sign ? -f : f; 
 88 |   
 89 |   int exponent = float_trait::exponent_max; 
 90 |   
 91 |   while(abs_f < 0x1p87)  // TODO
 92 |   { 
 93 |     abs_f *= 0x1p41; // TODO  
 94 |     exponent -= 41;  // TODO
 95 |   } 
 96 |   
 97 |   auto a = (uint64_t)(abs_f * 0x1p-64);  // TODO
 98 |   int lz = count_leading_zeroes(a);
 99 |   exponent -= lz;
100 |   
101 |   if (exponent <= 0) 
102 |   { 
103 |     exponent = 0; 
104 |     lz = float_trait::exponent_bits - 1;
105 |   } 
106 |   
107 |   uint64_t significand = (a << (lz + 1)) >> (64 - float_trait::fraction_bits); // [3]
108 |   return (sign << (float_trait::data_bits-1)) | (((typename float_trait::holder_t )exponent) << float_trait::fraction_bits) | significand;  // TODO:
109 | }
110 | 
111 | constexpr double float2bits(double f) 
112 | { 
113 |   return float2bitsx<double_trait>(f);
114 | }
115 | #endif


--------------------------------------------------------------------------------
/include/posit10.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2017-2019 Emanuele Ruffaldi
  3 |  * Distributed under the terms of the BSD 3-Clause License.  
  4 |  * 
  5 |  * (See accompanying file LICENSE)
  6 |  * 
  7 |  * --
  8 |  */
  9 | /**
 10 |  * Zposit10n library
 11 |  *
 12 |  * Emanuele Ruffaldi 2017
 13 |  */
 14 | #pragma once
 15 | #include <stdint.h>
 16 | #include <iostream>
 17 | 
 18 | #ifndef POSIT0_SPEC
 19 | #define POSIT10_SPEC
 20 | #define POSIT10_ES 2
 21 | #define POSIT10_MODE PositSpec::WithNan
 22 | #endif
 23 | 
 24 | #include <limits>
 25 | #include "posit.h"
 26 | 
 27 | namespace posit10ns
 28 | {
 29 | 	extern int16_t opinv[];
 30 | 	extern int16_t ophalf[];
 31 | 	extern int16_t opexp2[];
 32 | 	extern int16_t opsquare[];
 33 | 	extern uint32_t op2float[];
 34 | 	extern int16_t opadd[];
 35 | 	extern int16_t opmul[];
 36 | 	extern int16_t opdiv[];
 37 | }
 38 | 
 39 | 
 40 | class posit10
 41 | {
 42 | public:
 43 | 	// template <class T,int totalbits, int esbits, class FT, bool withnan>
 44 | 
 45 | 	using value_t=int16_t;
 46 | 	using fraction_t=uint16_t;
 47 | 	static constexpr int vtotalbits = 10;
 48 | 	static constexpr int vesbits = POSIT10_ES;
 49 | 	using exponenttype=int32_t; 
 50 | 	using PT=PositTrait<int16_t,vtotalbits,POSIT10_ES,POSIT10_MODE>;
 51 | 	using FT=fraction_t;
 52 | 	using FPT=Posit<int16_t,vtotalbits,POSIT10_ES,FT,POSIT10_MODE>;
 53 | 	using UnpackedLow=typename FPT::UnpackedLow;
 54 | 	using UnpackedT=typename FPT::UnpackedT;
 55 | 	using info=typename FPT::info;
 56 | 
 57 | 	struct DeepInit{};
 58 | 
 59 | 	explicit posit10(double a);
 60 | 	explicit posit10(float a);
 61 | 	posit10(int a);
 62 | 	explicit posit10(DeepInit , int16_t a) : v(a) {}
 63 | #ifdef POSITMUL
 64 | 	posit10(FPT::PositMul x) : v(x.asPosit().v) {}
 65 | #endif
 66 | 	posit10() : v(0) {}
 67 | 	explicit posit10(FPT x) : v(x.v) {}
 68 | 	explicit posit10(UnpackedLow u) : v(FPT(u).v) {}
 69 | 	explicit posit10(UnpackedT u) : v(FPT(u).v) {}
 70 | 	posit10(const posit10 & a): v(a.v) {}
 71 | 
 72 | 	bool is_negative() const { return v < 0; }
 73 | 	bool has_neg_exponent() const { return (v & 0x100) == 0; }
 74 | 	bool is_nan() const { return PT::withnan && v == -512; }
 75 | 	bool is_infinity() const { return PT::withnan ? (v == -511 || v == 511) : v == -512;}
 76 | 	static posit10 max() { return posit10(DeepInit(),PT::withnan ? 510 : 511); }
 77 | 	static posit10 min() { return posit10(DeepInit(),1); }
 78 | 	static posit10 one() { return posit10(DeepInit(),0x100); }
 79 | 	static posit10 afterone() { return posit10(DeepInit(),0x101); }
 80 | 	static posit10 zero() { return posit10(DeepInit(),0); }
 81 | 	static posit10 two() { return posit10(DeepInit(),288); }
 82 | 	static posit10 onehalf() { return posit10(DeepInit(),0x80); }
 83 | 	static posit10 pinfinity() { return posit10(DeepInit(),PT::withnan ? 511: -512); }
 84 | 	static posit10 ninfinity() { return posit10(DeepInit(),PT::withnan ?-511: -512); }
 85 | 	static posit10 nan() { return posit10(DeepInit(),PT::withnan ?-512: 0); }
 86 | 
 87 | 
 88 | 	friend bool operator<(const posit10 &a, const posit10 & b) { return a.v < b.v; }
 89 | 	friend bool operator>(const posit10 &a, const posit10 & b) { return a.v > b.v; }
 90 | 	friend bool operator<=(const posit10 &a, const posit10 & b) { return a.v <= b.v;}
 91 | 	friend bool operator>=(const posit10 &a, const posit10 & b) { return a.v >= b.v; }
 92 | 	friend bool operator==(const posit10 &a, const posit10 & b) { return a.v == b.v; }
 93 | 	friend bool operator!=(const posit10 &a, const posit10 & b) { return a.v !=b.v; }
 94 | 
 95 | 	posit10 inv() const { return posit10(DeepInit(),posit10ns::opinv[uu()]); }
 96 | 	posit10 half() const { return posit10(DeepInit(),posit10ns::ophalf[uu()]);}
 97 | 	posit10 twice() const;
 98 | 	posit10 exp2() const { return posit10(DeepInit(),posit10ns::opexp2[uu()]); }
 99 | 	posit10 square() const { return posit10(DeepInit(),posit10ns::opsquare[uu()]); }
100 | 	posit10 operator-() const { return posit10(DeepInit(),-v);  }
101 | 
102 | #if 0
103 | //	friend posit10 operator+(const posit10 &a, const posit10 & b) { return posit10(DeepInit(),a.v<0? -posit10ns::opadd[(-a).uu()*1024+(-b).uu()] : posit10ns::opadd[a.uu()*1024+b.uu()]); }
104 | 	friend posit10 operator+(const posit10 &a, const posit10 & b) { return posit10(DeepInit(),posit10ns::opadd[a.uu()*1024+b.uu()]); }
105 | 	friend posit10 operator*(const posit10 &a, const posit10 & b) { return posit10(DeepInit(),a.v<0? -posit10ns::opmul[-a.uu()*1024+(-b).uu()] : posit10ns::opmul[a.uu()*1024+b.uu()]); }
106 | 	friend posit10 operator-(const posit10 &a, const posit10 & b) { return posit10(DeepInit(),a.v<0? -posit10ns::opadd[(-a).uu()*1024+(b).uu()] : posit10ns::opadd[a.uu()*1024+(-b).uu()]); }
107 | 	friend posit10 operator/(const posit10 &a, const posit10 & b) { return posit10(DeepInit(),posit10ns::opdiv[a.uu()*1024+b.uu()]); }
108 | #else
109 | 	friend posit10 operator+(const posit10 &a, const posit10 & b) { return posit10(DeepInit(),posit10ns::opadd[a.uu()*1024+b.uu()]); }
110 | 	friend posit10 operator*(const posit10 &a, const posit10 & b) { return posit10(DeepInit(),posit10ns::opmul[a.uu()*1024+b.uu()]); }
111 | 	friend posit10 operator-(const posit10 &a, const posit10 & b) { return a+(-b); }
112 | 	friend posit10 operator/(const posit10 &a, const posit10 & b) { return posit10(DeepInit(),posit10ns::opdiv[a.uu()*1024+b.uu()]); }
113 | #endif
114 | 	friend std::ostream & operator << (std::ostream & ons, const posit10 & p);
115 | 
116 | 	posit10& operator+=(const posit10 &a) { posit10 r = *this+a; v = r.v; return *this; }
117 | 	posit10& operator*=(const posit10 &a) { posit10 r = *this*a; v = r.v; return *this; }
118 | 
119 | 	unsigned int uu() const { return ((unsigned int)(uint16_t)v) & 0x03FF;}
120 | 	UnpackedLow unpack_low() const { return as_posit().unpack_low(); }
121 | 	UnpackedT unpack() const { return as_posit().unpack(); }
122 | 	info analyze() const { return as_posit().analyze(); }
123 | 
124 | 	FPT as_posit() const { return FPT(typename FPT::DeepInit(), v); }
125 | 	operator FPT() const { return as_posit(); }
126 | 	operator float() const { return uint32_to_float(posit10ns::op2float[uu()]); }
127 | 	operator double() const { return (float)*this; }
128 | 	operator int() const { return (float)*this; }
129 | 
130 | 	int16_t v;
131 | 
132 | };
133 | 
134 | inline posit10 half(posit10 z) { return z.half(); }
135 | 
136 | inline posit10 twice(posit10 z) { return z.twice(); }
137 | 
138 | inline posit10 square(posit10 z) { 	return z.square();  }
139 | 
140 | inline posit10 inv(posit10 x) { return x.inv(); }
141 | 
142 | inline posit10 neg(posit10 z) { return -z; }
143 | 
144 | inline posit10 exp2(posit10 z) { return z.exp2(); }
145 | 
146 | inline bool is_negative(posit10 a)
147 | {
148 | 	return a.v < 0;
149 | }
150 | 
151 | namespace std {
152 |     template<> class numeric_limits<posit10> {
153 |     public:
154 |        static posit10 max() {return posit10::max(); };
155 |        static posit10 min() {return posit10::min(); };
156 |        static posit10 epsilon() {return posit10::afterone()-posit10::one(); };
157 |         // One can implement other methods if needed
158 |     };
159 | }
160 | 
161 | namespace std
162 | {
163 | 	inline posit10 abs(posit10 z) 
164 | 	{
165 | 		return posit10(posit10::DeepInit(),z.v < 0 ? -z.v : z.v);
166 | 	}
167 | 
168 | 	inline posit10 min(const posit10 &a, const posit10 & b)
169 | 	{
170 | 		return a <=  b ? a : b;
171 | 	}
172 | 
173 | 	inline posit10 max(const posit10 &a, const posit10 & b)
174 | 	{
175 | 		return a >= b ? a : b;
176 | 	}
177 | }
178 | 


--------------------------------------------------------------------------------
/include/posit12.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2017-2019 Emanuele Ruffaldi
  3 |  * Distributed under the terms of the BSD 3-Clause License.  
  4 |  * 
  5 |  * (See accompanying file LICENSE)
  6 |  * 
  7 |  * --
  8 |  */
  9 | /**
 10 |  * ZPosit8n library
 11 |  *
 12 |  * Emanuele Ruffaldi 2017
 13 |  */
 14 | #pragma once
 15 | #include <stdint.h>
 16 | #include <iostream>
 17 | 
 18 | #ifndef POSIT12_SPEC
 19 | #define POSIT12_SPEC
 20 | #define POSIT12_ES 2
 21 | #define POSIT12_MODE  PositSpec::WithNan
 22 | #endif
 23 | 
 24 | #include "posit.h"
 25 | 
 26 | namespace posit12ns
 27 | {
 28 | 	extern int16_t opinv[];
 29 | 	extern int16_t opexp2[];
 30 | 	extern int16_t opsquare[];
 31 | 	extern uint32_t op2float[];
 32 | }
 33 | 
 34 | 
 35 | class posit12
 36 | {
 37 | public:
 38 | 	// template <class T,int totalbits, int esbits, class FT, bool withnan>
 39 | 	using value_t=int16_t;
 40 | 	using fraction_t=uint16_t;
 41 | 	static constexpr int vtotalbits = 12;
 42 | 	static constexpr int vesbits = POSIT12_ES;
 43 | 	using exponenttype=int32_t; 
 44 | 	using PT=PositTrait<int16_t,vtotalbits,POSIT12_ES,POSIT12_MODE>;
 45 | 	using FT=fraction_t;
 46 | 	using FPT=Posit<int16_t,vtotalbits,POSIT12_ES,FT,POSIT12_MODE>;
 47 | 	using UnpackedLow=typename FPT::UnpackedLow;
 48 | 	using UnpackedT=typename FPT::UnpackedT;
 49 | 	using info=typename FPT::info;
 50 | 
 51 | 	struct DeepInit{};
 52 | 
 53 | 	explicit posit12(double a);
 54 | 	explicit posit12(float a);
 55 | 	posit12(int a);
 56 | #ifdef POSITMUL
 57 | 	posit12(FPT::PositMul x) : v(x.asPosit().v) {}
 58 | #endif
 59 | 	explicit posit12(DeepInit , int16_t a) : v(a) {}
 60 | 	posit12(): v(0) {}
 61 | 	explicit posit12(FPT x) : v(x.v) {}
 62 | 	explicit posit12(UnpackedLow u) : v(FPT(u).v) {}
 63 | 	explicit posit12(UnpackedT u) : v(FPT(u).v) {}
 64 | 
 65 | 	bool is_negative() const { return v < 0; }
 66 | 	bool has_neg_exponent() const { return (v & 0x400) == 0; }
 67 | 	bool is_nan() const { return PT::withnan && v == -2048; }
 68 | 	bool is_infinity() const { return PT::withnan ? (v == -2047 || v == 2047) : v == -2048;}
 69 | 	static posit12 max() { return posit12(DeepInit(),PT::POSIT_MAXPOS); } // 126 or 127
 70 | 	static posit12 min() { return posit12(DeepInit(),1); } //  ? -126 : -127
 71 | 	static posit12 one() { return posit12(DeepInit(),0x400); }
 72 | 	static posit12 afterone() { return posit12(DeepInit(),0x401); } // 0x40
 73 | 	static posit12 onehalf() { return posit12(DeepInit(),0x200); }
 74 | 	static posit12 pinfinity() { return posit12(DeepInit(),PT::withnan ? 2047: -2048); }
 75 | 	static posit12 ninfinity() { return posit12(DeepInit(),PT::withnan ?-2047: -2048); }
 76 | 	static posit12 nan() { return posit12(DeepInit(),PT::withnan ?-2048: 0); }
 77 | 
 78 | 	posit12 inv() const { return posit12(DeepInit(),posit12ns::opinv[uu()]); }
 79 | 	posit12 half() const;
 80 | 	posit12 twice() const;
 81 | 	posit12 exp2() const { return posit12(DeepInit(),posit12ns::opexp2[uu()]); }
 82 | 	posit12 square() const { return posit12(DeepInit(),posit12ns::opsquare[uu()]); }
 83 | 	posit12 operator-() const { return posit12(DeepInit(),-v);  }
 84 | 
 85 | 	friend bool operator<(posit12 a, posit12 b) { return a.v < b.v; }
 86 | 	friend bool operator>(posit12 a, posit12 b) { return a.v > b.v; }
 87 | 	friend bool operator<=(posit12 a, posit12 b) { return a.v <= b.v;}
 88 | 	friend bool operator>=(posit12 a, posit12 b) { return a.v >= b.v; }
 89 | 	friend bool operator==(posit12 a, posit12 b) { return a.v == b.v; }
 90 | 	friend bool operator!=(posit12 a, posit12 b) { return a.v !=b.v; }
 91 | 
 92 | 	friend posit12 operator+(posit12 a, posit12 b) { return (posit12)(a.as_posit()+b.as_posit()); }
 93 | 	friend posit12 operator*(posit12 a, posit12 b) { return (posit12)(a.as_posit()*b.as_posit()); }
 94 | 	friend posit12 operator-(posit12 a, posit12 b) { return a+(-b); }
 95 | 	friend posit12 operator/(posit12 a, posit12 b) { return (posit12)(a.as_posit()/b.as_posit()); }
 96 | 	friend std::ostream & operator << (std::ostream & ons, const posit12 & p);
 97 | 
 98 | 	posit12& operator+=(const posit12 &a) { posit12 r = *this+a; v = r.v; return *this; }
 99 | 	posit12& operator*=(const posit12 &a) { posit12 r = *this*a; v = r.v; return *this; }
100 | 
101 | 	unsigned int uu() const { return ((unsigned int)(uint16_t)v) & 0x0FFF;}
102 | 
103 | 	UnpackedLow unpack_low() const { return as_posit().unpack_low(); }
104 | 	UnpackedT unpack() const { return as_posit().unpack(); }
105 | 	info analyze() const { return as_posit().analyze(); }
106 | 
107 | 	FPT as_posit() const { return FPT(typename FPT::DeepInit(), v); }
108 | 	operator FPT() const { return as_posit(); }
109 | 	operator float() const { return uint32_to_float(posit12ns::op2float[uu()]); }
110 | 	operator double() const  { return (double)(float)*this; }
111 | 	operator int() const  { return (int)(float)*this; }
112 | 
113 | 	int16_t v;
114 | 
115 | };
116 | 
117 | inline posit12 half(posit12 z) { return z.half(); }
118 | 
119 | inline posit12 twice(posit12 z) { return z.twice(); }
120 | 
121 | inline posit12 square(posit12 z) { return z.square(); }
122 | 
123 | inline posit12 inv(posit12 x) { return x.inv(); }
124 | 
125 | inline posit12 neg(posit12 z) { return -z; }
126 | 
127 | inline posit12 exp2(posit12 z) { return z.exp2(); }
128 | 
129 | namespace std {
130 |     template<> class numeric_limits<posit12> {
131 |     public:
132 |        static posit12 max() {return posit12::max(); };
133 |        static posit12 min() {return posit12::min(); };
134 |        static posit12 epsilon() {return posit12::afterone()-posit12::one(); };
135 |         // One can implement other methods if needed
136 |     };
137 | }
138 | 
139 | 
140 | namespace std
141 | {
142 | 	inline posit12 abs(posit12 z) 
143 | 	{
144 | 		return posit12(posit12::DeepInit(),z.v < 0 ? -z.v : z.v);
145 | 	}
146 | 
147 | 
148 | 	inline posit12 min(posit12 a, posit12 b)
149 | 	{
150 | 		return a <=  b ? a : b;
151 | 	}
152 | 
153 | 	inline posit12 max(posit12 a, posit12 b)
154 | 	{
155 | 		return a >= b ? a : b;
156 | 	}
157 | }


--------------------------------------------------------------------------------
/include/posit8.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2017-2019 Emanuele Ruffaldi
  3 |  * Distributed under the terms of the BSD 3-Clause License.  
  4 |  * 
  5 |  * (See accompanying file LICENSE)
  6 |  * 
  7 |  * --
  8 |  */
  9 | /**
 10 |  * ZPosit8n library
 11 |  *
 12 |  * Emanuele Ruffaldi 2017
 13 |  */
 14 | #pragma once
 15 | #include <stdint.h>
 16 | #include <iostream>
 17 | #include <limits>
 18 | 
 19 | #ifndef POSIT8_SPEC
 20 | #define POSIT8_SPEC
 21 | #define POSIT8_ES 0
 22 | #define POSIT8_MODE  PositSpec::WithNan
 23 | #endif
 24 | 
 25 | #include "posit.h"
 26 | 
 27 | namespace posit8ns
 28 | {
 29 | 	extern int8_t opadd[];
 30 | 	extern int8_t opmul[];
 31 | 	extern int8_t opdiv[];
 32 | 	extern int8_t opinv[];
 33 | 	extern int8_t opexp2[];
 34 | 	extern int8_t ophalf[];
 35 | 	extern int8_t opsquare[];
 36 | 	extern uint32_t op2float[];
 37 | }
 38 | 
 39 | class posit8
 40 | {
 41 | public:
 42 | 	// template <class T,int totalbits, int esbits, class FT, bool withnan>
 43 | 
 44 | 
 45 | 	using value_t=int8_t;
 46 | 	using fraction_t=uint8_t;
 47 | 	static constexpr int vtotalbits = 8;
 48 | 	static constexpr int vesbits = POSIT8_ES;
 49 | 	using exponenttype=int32_t; 
 50 | 	using PT=PositTrait<int8_t,vtotalbits,POSIT8_ES,POSIT8_MODE>;
 51 | 	using FT=fraction_t;
 52 | 	using FPT=Posit<int8_t,vtotalbits,POSIT8_ES,FT,POSIT8_MODE>;
 53 | 	using UnpackedLow=typename FPT::UnpackedLow;
 54 | 	using UnpackedT=typename FPT::UnpackedT;
 55 | 	using info=typename FPT::info;
 56 | 
 57 | 	struct DeepInit{};
 58 | 
 59 | 	explicit posit8(double a);
 60 | 	explicit posit8(float a);
 61 | 	posit8(int a);
 62 | #ifdef POSITMUL
 63 | 	posit8(FPT::PositMul x) : v(x.asPosit().v) {}
 64 | #endif
 65 | 	explicit posit8(DeepInit , int8_t a) : v(a) {}
 66 | 	posit8(): v(0) {}
 67 | 	explicit posit8(FPT x) : v(x.v) {}
 68 | 	explicit posit8(UnpackedLow u) : v(FPT(u).v) {}
 69 | 	explicit posit8(UnpackedT u) : v(FPT(u).v) {}
 70 | 
 71 | 	bool is_negative() const { return v < 0; }
 72 | 	bool has_neg_exponent() const { return (v & 0x80) == 0; }
 73 | 	bool is_nan() const { return PT::withnan && v == PT::POSIT_NAN; }
 74 | 	bool is_infinity() const { return (v == PT::POSIT_PINF || v == PT::POSIT_NINF);}
 75 | 	static posit8 max() { return posit8(DeepInit(),PT::POSIT_MAXPOS); } // 126 or 127
 76 | 	static posit8 min() { return posit8(DeepInit(),1); } //  ? -126 : -127
 77 | 	static posit8 one() { return posit8(DeepInit(),PT::POSIT_ONE); } // 0x40
 78 | 	static posit8 afterone() { return posit8(DeepInit(),PT::POSIT_ONE+1); } // 0x40
 79 | 	static posit8 two() { return posit8(DeepInit(),72); }
 80 | 	static posit8 zero() { return posit8(DeepInit(),0); }
 81 | 	static posit8 onehalf() { return posit8(DeepInit(),0x20); }
 82 | 	static posit8 pinfinity() { return posit8(DeepInit(),PT::POSIT_PINF); } // ? 127: -128); }
 83 | 	static posit8 ninfinity() { return posit8(DeepInit(),PT::POSIT_NINF); } //,PT::withnan ?-127: -128); }
 84 | 	static posit8 nan() { return posit8(DeepInit(),PT::POSIT_NAN); } // withnan ?-128: 0); }
 85 | 
 86 | 	posit8 inv() const { return posit8(DeepInit(),posit8ns::opinv[uu()]); }
 87 | 	posit8 half() const { return posit8(DeepInit(),posit8ns::ophalf[uu()]);}
 88 | 	posit8 twice() const;
 89 | 	posit8 exp2() const { return posit8(DeepInit(),posit8ns::opexp2[uu()]); }
 90 | 	posit8 square() const { return posit8(DeepInit(),posit8ns::opsquare[uu()]); }
 91 | 	posit8 operator-() const { return posit8(DeepInit(),-v);  }
 92 | 
 93 | 	friend bool operator<(posit8 a, posit8 b) { return a.v < b.v; }
 94 | 	friend bool operator>(posit8 a, posit8 b) { return a.v > b.v; }
 95 | 	friend bool operator<=(posit8 a, posit8 b) { return a.v <= b.v;}
 96 | 	friend bool operator>=(posit8 a, posit8 b) { return a.v >= b.v; }
 97 | 	friend bool operator==(posit8 a, posit8 b) { return a.v == b.v; }
 98 | 	friend bool operator!=(posit8 a, posit8 b) { return a.v !=b.v; }
 99 | 
100 | 	friend posit8 operator+(posit8 a, posit8 b) { return posit8(DeepInit(),posit8ns::opadd[a.uu()*256+b.uu()]); }
101 | 	friend posit8 operator*(posit8 a, posit8 b) { return posit8(DeepInit(),posit8ns::opmul[a.uu()*256+b.uu()]); }
102 | 	friend posit8 operator-(posit8 a, posit8 b) { return a+(-b); }
103 | 	friend posit8 operator/(posit8 a, posit8 b) { return posit8(DeepInit(),posit8ns::opdiv[a.uu()*256+b.uu()]); }
104 | 	friend std::ostream & operator << (std::ostream & ons, const posit8 & p);
105 | 
106 | 	posit8& operator+=(const posit8 &a) { posit8 r = *this+a; v = r.v; return *this; }
107 | 	posit8& operator*=(const posit8 &a) { posit8 r = *this*a; v = r.v; return *this; }
108 | 
109 | 	unsigned int uu() const { return (unsigned int)(uint8_t)v;}
110 | 
111 | 	FPT as_posit() const { return FPT(typename FPT::DeepInit(), v); }
112 | 	operator FPT() const { return as_posit(); }
113 | 	operator float() const { return uint32_to_float(posit8ns::op2float[uu()]); }
114 | 	operator double() const { return (float)*this; }
115 | 	operator int() const { return (float)*this; }
116 | 
117 | 	UnpackedLow unpack_low() const { return as_posit().unpack_low(); }
118 | 	UnpackedT unpack() const { return as_posit().unpack(); }
119 | 	info analyze() const { return as_posit().analyze(); }
120 | 
121 | 
122 | 	int8_t v;
123 | 
124 | };
125 | 
126 | inline posit8 half(posit8 z) { return z.half(); }
127 | 
128 | inline posit8 twice(posit8 z) { return z.twice(); }
129 | 
130 | inline posit8 square(posit8 z) { return z.square(); }
131 | 
132 | inline posit8 inv(posit8 x) { return x.inv(); }
133 | 
134 | inline posit8 neg(posit8 z) { return -z; }
135 | 
136 | inline posit8 exp2(posit8 z) { return z.exp2(); }
137 | 
138 | inline bool is_negative(posit8 a)
139 | {
140 | 	return a.v < 0;
141 | }
142 | 
143 | 
144 | namespace std {
145 |     template<> class numeric_limits<posit8> {
146 |     public:
147 |        static posit8 max() {return posit8::max(); };
148 |        static posit8 min() {return posit8::min(); };
149 |        static posit8 epsilon() {return posit8::afterone()-posit8::one(); };
150 |         // One can implement other methods if needed
151 |     };
152 | }
153 | 
154 | 
155 | 
156 | namespace std
157 | {
158 | 	inline posit8 abs(posit8 z) 
159 | 	{
160 | 		return posit8(posit8::DeepInit(),z.v < 0 ? -z.v : z.v);
161 | 	}
162 | 
163 | 	inline posit8 min(posit8 a, posit8 b)
164 | 	{
165 | 		return a <=  b ? a : b;
166 | 	}
167 | 
168 | 	inline posit8 max(posit8 a, posit8 b)
169 | 	{
170 | 		return a >= b ? a : b;
171 | 	}
172 | }
173 | 
174 | 


--------------------------------------------------------------------------------
/include/positeigen.h:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2017-2019 Emanuele Ruffaldi
  3 |  * Distributed under the terms of the BSD 3-Clause License.  
  4 |  * 
  5 |  * (See accompanying file LICENSE)
  6 |  * 
  7 |  * --
  8 |  */
  9 | /**
 10 |  * Emanuele Ruffaldi (C) 2017
 11 |  * Templated C++ Posit
 12 |  */
 13 | #pragma once
 14 | #include "posit.h"
 15 | #include <Eigen/Core>
 16 | 
 17 | namespace Eigen {
 18 | 
 19 | template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
 20 | struct NumTraits<Posit<T,totalbits,esbits,FT, positspec> >
 21 |  :  GenericNumTraits<Posit<T,totalbits,esbits,FT,positspec> > // permits to get the epsilon, dummy_precision, lowest, highest functions
 22 | {
 23 |   typedef Posit<T,totalbits,esbits,FT,positspec> P;
 24 | 
 25 |   typedef P Real;
 26 |   typedef P NonInteger;
 27 |   typedef P Nested;
 28 | 
 29 |   //static inline Real epsilon() { return 0; }
 30 |   //static inline Real dummy_precision() { return 0; }
 31 |   //static inline Real digits10() { return 0; }
 32 |   // highest() and lowest() functions returning the highest and lowest possible values respectively.
 33 |   // An epsilon() function which, unlike std::numeric_limits::epsilon(), it returns a Real instead of a T.
 34 |   // digits10() function returning the number of decimal digits that can be represented without change. This is the analogue of std::numeric_limits<T>::digits10 which is used as the default implementation if specialized.
 35 |   
 36 | 
 37 |   enum {
 38 |     IsComplex = 0,
 39 |     IsInteger = 0,
 40 |     IsSigned = 1,
 41 |     RequireInitialization = 1,
 42 |     ReadCost = 1,
 43 |     AddCost = 3,
 44 |     MulCost = 3
 45 |   };
 46 | };
 47 | 
 48 | 
 49 | 
 50 | template <class FT, class ET>
 51 | struct NumTraits<Unpacked<FT,ET> >
 52 |  :  GenericNumTraits<Unpacked<FT,ET> > // permits to get the epsilon, dummy_precision, lowest, highest functions
 53 | {
 54 |   typedef Unpacked<FT,ET> P;
 55 | 
 56 |   typedef P Real;
 57 |   typedef P NonInteger;
 58 |   typedef P Nested;
 59 | 
 60 |   //static inline Real epsilon() { return 0; }
 61 |   //static inline Real dummy_precision() { return 0; }
 62 |   //static inline Real digits10() { return 0; }
 63 |   // highest() and lowest() functions returning the highest and lowest possible values respectively.
 64 |   // An epsilon() function which, unlike std::numeric_limits::epsilon(), it returns a Real instead of a T.
 65 |   // digits10() function returning the number of decimal digits that can be represented without change. This is the analogue of std::numeric_limits<T>::digits10 which is used as the default implementation if specialized.
 66 |   
 67 | 
 68 |   enum {
 69 |     IsComplex = 0,
 70 |     IsInteger = 0,
 71 |     IsSigned = 1,
 72 |     RequireInitialization = 1,
 73 |     ReadCost = 1,
 74 |     AddCost = 3,
 75 |     MulCost = 3
 76 |   };
 77 | };
 78 | 
 79 | namespace internal {
 80 | 
 81 |   template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
 82 |       inline typename Posit<T,totalbits,esbits,FT,positspec>::UnpackedT cast(const Posit<T,totalbits,esbits,FT,positspec>& x)
 83 |     { return x.unpack(); }
 84 | }
 85 | 
 86 | #if 0
 87 | namespace internal {
 88 | 
 89 |   template <class T,int totalbits, int esbits, class FT>
 90 |       inline double cast<Posit<T,totalbits,esbits,FT>,double>(const Posit<T,totalbits,esbits,FT>& x)
 91 |     { return x.todouble(); }
 92 | 
 93 |   template <class T,int totalbits, int esbits, class FT>
 94 |   inline float cast<Posit<T,totalbits,esbits,FT>,float>(const Posit<T,totalbits,esbits,FT>& x)
 95 |     { return x.tofloat(); }
 96 | 
 97 |   template <class T,int totalbits, int esbits, class FT>
 98 |     inline Posit<T,totalbits,esbits,FT> cast<float,Posit<T,totalbits,esbits,FT> >(const float & x)
 99 |     { return Posit<T,totalbits,esbits,FT>::convert(x); }
100 | 
101 |   template <class T,int totalbits, int esbits, class FT>
102 |     inline Posit<T,totalbits,esbits,FT> cast<double,Posit<T,totalbits,esbits,FT> >(const double & x)
103 |     { return Posit<T,totalbits,esbits,FT>::convert(x); }
104 | 
105 | }
106 | #endif
107 | 
108 | }


--------------------------------------------------------------------------------
/include/simd/asimd_all.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2017-2019 Emanuele Ruffaldi
 3 |  * Distributed under the terms of the BSD 3-Clause License.  
 4 |  * 
 5 |  * (See accompanying file LICENSE)
 6 |  * 
 7 |  * --
 8 |  */
 9 | /**
10 |  * Requires: AVX2, allows for AVX512F
11 |  *
12 |  - missing: 64bit integer, unsigned integers except uint8
13 |  - AVX2: double 4, float/int32 8, int16 16, int8 32
14 | 
15 |  */
16 | #pragma once
17 | #include "asimd_base.h"
18 | #include "asimd_32.h"
19 | #include "asimd_8.h"
20 | #include "asimd_16.h"
21 | #include "asimd_f.h"
22 | #include "asimd_d.h"
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/include/simd/asimd_f.h:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2017-2019 Emanuele Ruffaldi
  3 |  * Distributed under the terms of the BSD 3-Clause License.  
  4 |  * 
  5 |  * (See accompanying file LICENSE)
  6 |  * 
  7 |  * --
  8 |  */
  9 | /**
 10 |  * Requires: AVX2, allows for AVX512F
 11 |  *
 12 |  - missing: 64bit integer, unsigned integers except uint8
 13 |  - AVX2: double 4, float/int32 8, int16 16, int8 32
 14 | 
 15 |  * 128bit for f: 4  => int32
 16 |  * 256bit for f: 8  => int32
 17 |  * 512bit for f: 16 => int32 
 18 |  */
 19 | #pragma once
 20 | #include "asimd_32.h"
 21 | 
 22 | // DUE TO _mm_i32gather_epi32
 23 | #ifdef __AVX2__
 24 | 
 25 | class simd_f_4
 26 | {
 27 | public:
 28 |     typedef float type;
 29 |     typedef __m128 simdtype;
 30 |     typedef simd_i32_4 indextype; // for indices
 31 |     typedef WithGather gathermode;
 32 |     typedef simd_f_4 self;
 33 |     typedef self cmpresult;
 34 |     enum { csize = 4 };
 35 |     
 36 |     inline simd_f_4() {}
 37 |     inline simd_f_4(type v) : x(_mm_set1_ps(v)) {}
 38 |     inline simd_f_4(simdtype y): x(y) {}
 39 |     inline simd_f_4(type v1, type v2, type v3, type v4) : x(_mm_setr_ps(v1,v2,v3,v4)) {}
 40 |         inline void load(const type * ptr) { x = _mm_loadu_ps((const type*)ptr); }
 41 |     inline void store(type * ptr) const { _mm_storeu_ps((type*)ptr,x); }
 42 |     
 43 |     inline self max(self & y) const { return self(_mm_max_ps(x,y.x)); }
 44 |     inline cmpresult cmplt(self & y) const { return  cmpresult(_mm_cmplt_ps(x,y.x)); }
 45 |     inline void gather(const type * ptr, indextype idx) { x = _mm_i32gather_ps(ptr, idx.x, 1); }
 46 | 
 47 |     inline unsigned int size() const { return csize; }    
 48 | 
 49 |     /*
 50 |     inline type operator[] (unsigned int idx) const
 51 |     {
 52 |         type temp[csize];
 53 |         store(temp);
 54 |         return temp[idx];
 55 |     } 
 56 |     */   
 57 | 
 58 |     void initincrement(type x)
 59 |     {
 60 |         type a[csize];
 61 |         for(int i = 0; i < csize; i++)
 62 |             a[i] = i*x;
 63 |         load(a);
 64 |     }
 65 | 
 66 |     inline void blend(self & other, self mask)
 67 |     {
 68 |         x = _mm_blendv_ps(x,other.x,mask.x);
 69 |     }
 70 | 
 71 |     static inline void blendindex(indextype & oindex, indextype other, self mask);
 72 | 
 73 |     simdtype x;
 74 | };
 75 | 
 76 | 
 77 | inline void simd_f_4::blendindex(indextype & oindex, indextype other, self mask)
 78 | {
 79 |     oindex.x = _mm_blendv_ps(oindex.x,other.x,mask.x);
 80 | }
 81 | #endif
 82 | 
 83 | #ifdef __AVX2__
 84 | 
 85 | class simd_f_8
 86 | {
 87 | public:
 88 |     typedef float type;
 89 |     typedef __m256 simdtype;
 90 |     typedef simd_i32_8 indextype;
 91 |     typedef WithGather gathermode;
 92 |     typedef simd_f_8 self;
 93 |     typedef self cmpresult;
 94 |     enum { csize = 8 };
 95 |     
 96 |     inline simd_f_8() {}
 97 |     inline simd_f_8(type v) : x(_mm256_set1_ps(v)) {}
 98 |     inline simd_f_8(simdtype y): x(y) {}
 99 |     //inline simd_f_8(type v1, type v2, type v3, type v4) : x(_mm256_setr_ps(v1,v2,v3,v4)) {}
100 |     inline void load(const type * ptr) { x = _mm256_loadu_ps((const type*)ptr); }
101 |     inline void store(type * ptr) const { _mm256_storeu_ps((type*)ptr,x); }
102 |     
103 |     inline self max(simd_f_8 & y) const { return self(_mm256_max_ps(x,y.x)); }
104 |     inline cmpresult cmplt(self & y) const { return  cmpresult(_mm256_cmp_ps(x,y.x, 1 /*_CMP_LT_OS*/)); }
105 |     inline void gather(const type * ptr, indextype idx) { x = _mm256_i32gather_ps(ptr, idx.x, 1); }
106 | 
107 |     inline unsigned int size() const { return csize; }    
108 |     inline type operator[] (unsigned int idx) const
109 |     {
110 |         type temp[csize];
111 |         store(temp);
112 |         return temp[idx];
113 |     }    
114 | 
115 |     void initincrement(type x)
116 |     {
117 |         type a[csize];
118 |         for(int i = 0; i < csize; i++)
119 |             a[i] = i*x;
120 |         load(a);
121 |     }
122 | 
123 |     inline void blend(self & other, self mask)
124 |     {
125 |         x = _mm256_blendv_ps(x,other.x,mask.x);
126 |     }
127 | 
128 |     static inline void blendindex(indextype & oindex, indextype other, self mask);
129 | 
130 |     simdtype x;
131 | };
132 | 
133 | 
134 | inline void simd_f_8::blendindex(indextype & oindex, indextype other, self mask)
135 | {
136 |     oindex.x = _mm256_blendv_ps(oindex.x,other.x,mask.x);
137 | }
138 | 
139 | 
140 | #endif
141 | 
142 | #ifdef WITH_AVX512F
143 | class simd_f_16
144 | {
145 | public:
146 |     typedef float type;
147 |     typedef __m512 simdtype;
148 |     typedef simd_i32_16 indextype;
149 |     typedef WithGather gathermode;
150 |     typedef simd_f_16 self;
151 |     typedef __mmask16 cmpresult;
152 |     enum { csize = 16 };
153 | 
154 | 
155 |     inline simd_f_16() {}
156 |     inline simd_f_16(type v) : x(_mm512_set1_ps(v)) {}
157 |     inline simd_f_16(simdtype y): x(y) {}
158 |     //inline simd_f_8(type v1, type v2, type v3, type v4) : x(_mm256_setr_ps(v1,v2,v3,v4)) {}
159 |     inline void load(const type * ptr) { x = _mm512_loadu_ps((const type*)ptr); }
160 |     inline void store(type * ptr) const { _mm512_storeu_ps((type*)ptr,x); }
161 |     
162 |     inline self max(self & y) const { return self(_mm512_max_ps(x,y.x)); }
163 |     inline cmpresult cmplt(self & y) const { return  (_mm512_cmp_ps_mask(x,y.x, 1 /*_CMP_LT_OS*/)); }
164 |     inline void gather(const type * ptr, indextype idx) { x = _mm512_i32gather_ps(idx.x,ptr, 1); }
165 | 
166 |     inline unsigned int size() const { return csize; }    
167 |     inline type operator[] (unsigned int idx) const
168 |     {
169 |         type temp[csize];
170 |         store(temp);
171 |         return temp[idx];
172 |     }    
173 | 
174 |     void initincrement(type x)
175 |     {
176 |         type a[csize];
177 |         for(int i = 0; i < csize; i++)
178 |             a[i] = i*x;
179 |         load(a);
180 |     }
181 | 
182 |     inline void blend(self & other, cmpresult mask)
183 |     {
184 |         x = _mm512_mask_blend_ps(mask,x,other.x);
185 |     }
186 | 
187 |     static inline void blendindex(indextype & oindex, indextype other, cmpresult mask)
188 |     {
189 |         oindex.x = _mm512_mask_blend_epi32(mask, oindex.x,other.x);
190 |     }
191 | 
192 |     simdtype x;
193 | };
194 | #endif
195 | 
196 | #ifdef __AVX2__
197 | DECLAREOSTREAM(simd_f_4,"f_4")
198 | DECLAREOSTREAM(simd_f_8,"f_8")
199 | #endif
200 | 
201 | #ifdef WITH_AVX512F
202 | DECLAREOSTREAM(simd_f_16,"f_16")
203 | #endif
204 | 
205 | 
206 | #ifdef WITH_AVX512F
207 | 
208 | template <int n>
209 | struct simdgenn<float,n>
210 | {
211 |     using type = typename std::conditional<n <= 4, simd_f_4,typename std::conditional<n <= 8, simd_f_8, simd_f_16>::type >::type;
212 |     typedef SimdMode simdmarker;
213 | };
214 | 
215 | 
216 | template <>
217 | struct simdgen<float>
218 | {
219 |     typedef simd_f_16 type;
220 |     typedef SimdMode simdmarker;
221 | };
222 | #else
223 | #ifdef __AVX2__
224 | template <int n>
225 | struct simdgenn<float,n>
226 | {
227 |     using type = typename std::conditional<n <= 4, simd_f_4, simd_f_8>::type;
228 |     typedef SimdMode simdmarker;
229 | };
230 | 
231 | template <>
232 | struct simdgen<float>
233 | {
234 |     typedef simd_f_8 type;
235 |     typedef SimdMode simdmarker;
236 | };
237 | #endif
238 | #endif
239 | 
240 | 
241 | 
242 | 


--------------------------------------------------------------------------------
/include/simdposit8.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2017-2019 Emanuele Ruffaldi
  3 |  * Distributed under the terms of the BSD 3-Clause License.  
  4 |  * 
  5 |  * (See accompanying file LICENSE)
  6 |  * 
  7 |  * --
  8 |  */
  9 | /**
 10 |  * ZPosit8n library
 11 |  *
 12 |  * Emanuele Ruffaldi 2017
 13 |  */
 14 | #pragma once
 15 | #include <stdint.h>
 16 | #include <iostream>
 17 | #include <array>
 18 | 
 19 | #ifndef POSIT8_SPEC
 20 | #define POSIT8_SPEC
 21 | #define POSIT8_ES 2
 22 | #define POSIT8_NAN PositSpec::WithNan
 23 | #endif
 24 | 
 25 | #include "posit.h"
 26 | 
 27 | namespace posit8ns
 28 | {
 29 | 	extern uint8_t opadd[];
 30 | 	extern uint8_t opmul[];
 31 | 	extern uint8_t opdiv[];
 32 | 	extern uint8_t opinv[];
 33 | 	extern uint8_t opexp2[];
 34 | 	extern uint8_t opsquare[];
 35 | 	extern uint32_t op2float[];
 36 | }
 37 | 
 38 | /*
 39 |  * Idea: optimize the extraction of 
 40 | 
 41 | 
 42 |  */
 43 | 
 44 | class simd_posit8_32
 45 | {
 46 | public:
 47 | 
 48 | 	static __m256i lookup1(__m256i a, const uint8_t * tab)
 49 | 	{
 50 | 		alignas(__m256i) uint8_t idata[32];
 51 | 		alignas(__m256i) uint8_t odata[32];
 52 | 
 53 | 		_mm256_store_si256((__m256i*)idata,a);
 54 | 		for(int i = 0; i < 32; i++)
 55 | 			odata[i] = tab[idata[i]];
 56 | 		return _mm256_load_si256((__m256i*)odata);
 57 | 	}
 58 | 
 59 | 	static __m256i lookup2(__m256i a, __m256i b, const uint8_t * tab)
 60 | 	{
 61 | 		alignas(__m256i) uint8_t idata1[32];
 62 | 		alignas(__m256i) uint8_t idata2[32];
 63 | 		alignas(__m256i) uint8_t odata[32];
 64 | 
 65 | 		_mm256_store_si256((__m256i*)idata1,a);
 66 | 		_mm256_store_si256((__m256i*)idata2,b);
 67 | 		for(int i = 0; i < 32; i++)
 68 | 			odata[i] = tab[idata1[i]*256+idata2[i]];
 69 | 		return _mm256_load_si256((__m256i*)odata);		
 70 | 	}
 71 | 
 72 | 	using this_t=simd_posit8_32;
 73 | 	using cmpresult=__m256i;
 74 | 	using FPT=Posit<int8_t,8,POSIT8_ES,uint16_t,POSIT8_NAN>;
 75 | 	using PT=typename FPT::PT;
 76 | 	using FT=uint16_t;
 77 | 	using base_t=FPT;
 78 | 
 79 | 	enum { csize = 32 };
 80 | 	struct Init{};
 81 | 
 82 | 	explicit simd_posit8_32(float a);
 83 | 	explicit simd_posit8_32(int a);
 84 | 	explicit simd_posit8_32(Init , __m256i x) : v(x) {}
 85 | 	explicit simd_posit8_32() {}
 86 | 	explicit simd_posit8_32(__m256i x) : v(x) {}
 87 | 
 88 | 	cmpresult is_negative() const { return (_mm256_cmpgt_epi8(_mm256_set1_epi8(0),v)); }
 89 | 	cmpresult has_neg_exponent() const { return (_mm256_cmpeq_epi8(_mm256_set1_epi8(0),_mm256_and_si256 (_mm256_set1_epi8(0x40),v))); } //   (v & 0x40) == 0; }
 90 | 	cmpresult is_nan() const { return (_mm256_cmpeq_epi8(_mm256_set1_epi8(-128),v)); }
 91 | 	cmpresult is_infinity() const { return PT::withnan ? (_mm256_or_si256(  _mm256_cmpeq_epi8(_mm256_set1_epi8(-127),v), _mm256_cmpeq_epi8(_mm256_set1_epi8(127),v))) : (_mm256_cmpeq_epi8(_mm256_set1_epi8(-128),v));}
 92 | 
 93 | 	this_t inv() const { return this_t(lookup1(v,posit8ns::opinv)); }
 94 | 	this_t half() const { return *this / simd_posit8_32(2); }
 95 | 	this_t twice() const { return *this * simd_posit8_32(2); }
 96 | 
 97 | 	this_t exp2() const { return this_t(lookup1(v,posit8ns::opexp2)); }
 98 | 	this_t square() const { return this_t(lookup1(v,posit8ns::opsquare)); }
 99 | 	this_t operator-() const { return this_t(_mm256_sign_epi8 (v,v)); }
100 | 
101 | 
102 | 	friend this_t operator+(this_t a, this_t b) { return this_t(lookup2(a.uu(),b.uu(),posit8ns::opadd)); }
103 | 	friend this_t operator*(this_t a, this_t b) { return this_t(lookup2(a.uu(),b.uu(),posit8ns::opmul)); }
104 | 	friend this_t operator-(this_t a, this_t b) { return a+(-b); }
105 | 	friend this_t operator/(this_t a, this_t b) { return this_t(lookup2(a.uu(),b.uu(),posit8ns::opdiv)); }
106 | 	friend std::ostream & operator << (std::ostream & ons, const this_t & p);
107 | 
108 | 
109 | 	__m256i uu() const { return v;}
110 | 
111 | 	operator std::array<float,32> () const ;
112 | 
113 | private:
114 | 	__m256i v;
115 | 
116 | };
117 | 
118 | inline std::ostream & operator << (std::ostream & ons, const std::array<float,32> & a)
119 | {
120 | 	ons << "a32(" ;
121 | 	for(int i = 0; i < 32; i++)
122 | 		ons << " " << a[i];
123 | 	ons << ")";
124 | 	return ons;
125 | }
126 | 
127 | inline std::ostream & operator << (std::ostream & ons, const __m256i & p)
128 | {
129 | 		alignas(__m256i) uint8_t idata1[32];
130 | 		_mm256_store_si256((__m256i*)idata1,p);
131 | 		ons << "256i(" << std::hex ;
132 | 		for(int i = 0; i < 32; i++)
133 | 			ons << " " << (int)idata1[i];
134 | 		ons << std::dec << ")";
135 | 		return ons;
136 | }
137 | 
138 | inline simd_posit8_32 half(simd_posit8_32 z) { return z.half(); }
139 | 
140 | inline simd_posit8_32 twice(simd_posit8_32 z) { return z.twice(); }
141 | 
142 | inline simd_posit8_32 square(simd_posit8_32 z) { return z.square(); }
143 | 
144 | inline simd_posit8_32 inv(simd_posit8_32 x) { return x.inv(); }
145 | 
146 | inline simd_posit8_32 neg(simd_posit8_32 z) { return -z; }
147 | 
148 | inline simd_posit8_32 exp2(simd_posit8_32 z) { return z.exp2(); }
149 | 
150 | template <class T> 
151 | T gauss2(T x, T mu, T sigma)
152 | {
153 | 	return exp2(-half(square((x-mu)/sigma)));
154 | }


--------------------------------------------------------------------------------
/include/tposit.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2017-2019 Emanuele Ruffaldi
  3 |  * Distributed under the terms of the BSD 3-Clause License.  
  4 |  * 
  5 |  * (See accompanying file LICENSE)
  6 |  * 
  7 |  * --
  8 |  */
  9 | /**
 10 |  * TPosit is a generalized tabled posit
 11 |  *
 12 |  * Emanuele Ruffaldi 2017
 13 |  */
 14 | #pragma once
 15 | #include <stdint.h>
 16 | #include <iostream>
 17 | 
 18 | #ifndef CONSTEXPR14
 19 | #define CONSTEXPR14
 20 | #endif
 21 | 
 22 | #include "posit.h"
 23 | 
 24 | 
 25 | template <class T, int totalbits, int esbits, class FT, bool withnan>
 26 | class tposit
 27 | {
 28 | public:
 29 | 	// template <class T,int totalbits, int esbits, class FT, bool withnan>
 30 | 
 31 | 
 32 | 	using value_t=T;
 33 | 	using fraction_t=FT;
 34 | 	static constexpr int vtotalbits = totalbits;
 35 | 	static constexpr int vesbits = esbits;
 36 | 	using FPT=Posit<T,totalbits,esbits,FT,withnan>;
 37 | 	using PT=typename FPT::PT;
 38 | 	using exponenttype=typename PT::exponenttype;
 39 | 	using FT=fraction_t;
 40 | 	using PTU=typename PT::POSIT_UTYPE;
 41 | 	using UnpackedLow=typename FPT::UnpackedLow;
 42 | 	using UnpackedT=typename FPT::UnpackedT;
 43 | 	using info=typename FPT::info;
 44 | 
 45 | private:
 46 | 	static T opinv[];
 47 | 	static T ophalf[];
 48 | 	static T opexp2[];
 49 | 	static T optwice[];
 50 | 	static T opsquare[];
 51 | 	static T opmul[];
 52 | 	static T opadd[];
 53 | 	static T opdiv[];
 54 | 	static float op2float[];
 55 | 
 56 | public:
 57 | 	struct DeepInit{};
 58 | 
 59 | 	explicit tposit(double a)
 60 | 	explicit tposit(float a);
 61 | 	explicit tposit(int a);
 62 | 	explicit tposit(DeepInit , int8_t a) : v(a) {}
 63 | 	explicit tposit() {}
 64 | 	explicit tposit(FPT x) : v(x.v) {}
 65 | 	explicit tposit(UnpackedLow u) : v(FPT(u).v) {}
 66 | 	explicit tposit(UnpackedT u) : v(FPT(u).v) {}
 67 | 
 68 | 	bool is_negative() const { return v < 0; }
 69 | 	bool has_neg_exponent() const { return (v & PT::POSIT_INVERTBIT) == 0; }
 70 | 	bool is_nan() const { return PT::withnan && v == PT::POSIT_NAN; }
 71 | 	bool is_infinity() const { return v==PT::POSIT_NINF || v==PT::POSIT_PINF; }
 72 | 	static tposit max() { return tposit(DeepInit(),PT::POSIT_MAX); }
 73 | 	static tposit min() { return tposit(DeepInit(),PT::POSIT_MIN); }  // <------
 74 | 	static tposit one() { return tposit(DeepInit(),PT::POSIT_ONE); }
 75 | 
 76 | 	//static tposit two() { return tposit(DeepInit(),72); }  // <------
 77 | 	//static tposit onehalf() { return tposit(DeepInit(),0x20); }  // <------
 78 | 
 79 | 	static tposit pinfinity() { return tposit(DeepInit(),PT::POSIT_PINF); 
 80 | 	static tposit ninfinity() { return tposit(DeepInit(),PT::POSIT_NINF); 
 81 | 	static tposit nan() { return tposit(DeepInit(),PT::POSIT_NAN); }
 82 | 
 83 | 	tposit inv() const { return tposit(DeepInit(),opinv[uu()]); }
 84 | 	tposit half() const { return tposit(DeepInit(),ophalf[uu()]);}
 85 | 	tposit twice() const { return tposit(DeepInit(),optwice[uu()]); }
 86 | 	tposit exp2() const { return tposit(DeepInit(),opexp2[uu()]); }
 87 | 	tposit square() const { return tposit(DeepInit(),opsquare[uu()]); }
 88 | 	tposit operator-() const { return tposit(DeepInit(),-v);  }
 89 | 
 90 | 	friend bool operator<(tposit a, tposit b) { return a.v < b.v; }
 91 | 	friend bool operator>(tposit a, tposit b) { return a.v > b.v; }
 92 | 	friend bool operator<=(tposit a, tposit b) { return a.v <= b.v;}
 93 | 	friend bool operator>=(tposit a, tposit b) { return a.v >= b.v; }
 94 | 	friend bool operator==(tposit a, tposit b) { return a.v == b.v; }
 95 | 	friend bool operator!=(tposit a, tposit b) { return a.v !=b.v; }
 96 | 
 97 | 	friend tposit operator+(tposit a, tposit b) { return tposit(DeepInit(),opadd[a.uu()*256+b.uu()]); }  // <------
 98 | 	friend tposit operator*(tposit a, tposit b) { return tposit(DeepInit(),opmul[a.uu()*256+b.uu()]); }  // <------
 99 | 	friend tposit operator-(tposit a, tposit b) { return a+(-b); }
100 | 	friend tposit operator/(tposit a, tposit b) { return tposit(DeepInit(),opdiv[a.uu()*256+b.uu()]); }  // <------
101 | 	
102 | 	tposit& operator+=(const tposit &a) { tposit r = *this+a; v = r.v; return *this; }
103 | 	tposit& operator*=(const tposit &a) { tposit r = *this*a; v = r.v; return *this; }
104 | 
105 | 	PTU uu() const { return (PTU)v;}
106 | 
107 | 	FPT as_posit() const { return FPT(typename FPT::DeepInit(), v); }
108 | 	operator FPT() const { return as_posit(); }
109 | 	operator float() const { return uint32_to_float(op2float[uu()]); }
110 | 	operator double() const { return (float)*this; }
111 | 	operator int() const { return (float)*this; }
112 | 
113 | 	UnpackedLow unpack_low() const { return as_posit().unpack_low(); }
114 | 	UnpackedT unpack() const { return as_posit().unpack(); }
115 | 	info analyze() const { return as_posit().analyze(); }
116 | 
117 | 
118 | 	friend std::ostream & operator << (std::ostream & ons, const tposit & p)
119 | 	{
120 | 		ons << "posit(" << p.uu() << ")" ;
121 | 		return ons;
122 | 	}
123 | 
124 | 
125 | 	T v;
126 | 
127 | };
128 | 
129 | template <class T, int totalbits, int esbits, class FT, bool withnan>
130 | inline tposit<T,totalbits,esbits,FT,withnan> half(tposit<T,totalbits,esbits,FT,withnan> z) { return z.half(); }
131 | 
132 | template <class T, int totalbits, int esbits, class FT, bool withnan>
133 | inline tposit<T,totalbits,esbits,FT,withnan> twice(tposit<T,totalbits,esbits,FT,withnan> z) { return z.twice(); }
134 | 
135 | template <class T, int totalbits, int esbits, class FT, bool withnan>
136 | inline tposit<T,totalbits,esbits,FT,withnan> square(tposit<T,totalbits,esbits,FT,withnan> z) { return z.square(); }
137 | 
138 | template <class T, int totalbits, int esbits, class FT, bool withnan>
139 | inline tposit<T,totalbits,esbits,FT,withnan> inv(tposit<T,totalbits,esbits,FT,withnan> x) { return x.inv(); }
140 | 
141 | template <class T, int totalbits, int esbits, class FT, bool withnan>
142 | CONSTEXPR14 inline tposit<T,totalbits,esbits,FT,withnan> neg(tposit<T,totalbits,esbits,FT,withnan> z) { return -z; }
143 | 
144 | template <class T, int totalbits, int esbits, class FT, bool withnan>
145 | inline tposit<T,totalbits,esbits,FT,withnan> exp2(tposit<T,totalbits,esbits,FT,withnan> z) { return z.exp2(); }
146 | 
147 | template <class T, int totalbits, int esbits, class FT, bool withnan>
148 | inline bool is_negative(tposit<T,totalbits,esbits,FT,withnan> a)
149 | {
150 | 	return a.v < 0;
151 | }
152 | 
153 | template <class T, int totalbits, int esbits, class FT, bool withnan>
154 | tposit<T,totalbits,esbits,FT,withnan>::tposit(int a)
155 | {
156 | 	if(a == 0)
157 | 		v = 0;
158 | 	else if(a == 1)
159 | 		v = PT::POSIT_ONE;
160 | 	else 
161 | 		v = FPT(a).v;
162 | 
163 | }
164 | 
165 | template <class T, int totalbits, int esbits, class FT, bool withnan>
166 | tposit<T,totalbits,esbits,FT,withnan>::tposit(float a)
167 | {
168 | 		if(a == 0)
169 | 		v = 0;
170 | 	else if(a == 1)
171 | 		v = PT::POSIT_ONE;
172 | 	else 
173 | 		v = FPT(a).v;
174 | 
175 | }
176 | 
177 | template <class T, int totalbits, int esbits, class FT, bool withnan>
178 | tposit<T,totalbits,esbits,FT,withnan>::tposit(double a) 
179 | {
180 | 	if(a == 0)
181 | 		v = 0;
182 | 	else if(a == 1)
183 | 		v = PT::POSIT_ONE; 
184 | 	else 
185 | 		v = FPT(a).v;		
186 | }
187 | 
188 | 
189 | 
190 | 
191 | 
192 | namespace std {
193 |     template<class T, int totalbits, int esbits, class FT, bool withnan> class numeric_limits<tposit<T,totalbits,esbits,FT,withnan> > {
194 |     public:
195 |        static tposit<T,totalbits,esbits,FT,withnan> max() {return tposit::max(); };
196 |        static tposit<T,totalbits,esbits,FT,withnan> min() {return tposit::min(); };
197 |         // One can implement other methods if needed
198 |     };
199 | }
200 | 
201 | 
202 | 
203 | namespace std
204 | {
205 | 	template <class T, int totalbits, int esbits, class FT, bool withnan>
206 | 	inline CONSTEXPR14 tposit<T,totalbits,esbits,FT,withnan> abs(tposit<T,totalbits,esbits,FT,withnan> z) 
207 | 	{
208 | 		return tposit(tposit::DeepInit(),z.v < 0 ? -z.v : z.v);
209 | 	}
210 | 
211 | 	template <class T, int totalbits, int esbits, class FT, bool withnan>
212 | 	inline CONSTEXPR14 tposit<T,totalbits,esbits,FT,withnan> min(tposit<T,totalbits,esbits,FT,withnan> a, tposit<T,totalbits,esbits,FT,withnan> b)
213 | 	{
214 | 		return a <=  b ? a : b;
215 | 	}
216 | 
217 | 	template <class T, int totalbits, int esbits, class FT, bool withnan>
218 | 	inline CONSTEXPR14 tposit <T,totalbits,esbits,FT,withnan> max(tposit<T,totalbits,esbits,FT,withnan> a, tposit<T,totalbits,esbits,FT,withnan> b)
219 | 	{
220 | 		return a >= b ? a : b;
221 | 	}
222 | }
223 | 
224 | 


--------------------------------------------------------------------------------
/include/tvalids.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2017-2019 Emanuele Ruffaldi
  3 |  * Distributed under the terms of the BSD 3-Clause License.  
  4 |  * 
  5 |  * (See accompanying file LICENSE)
  6 |  * 
  7 |  * --
  8 |  */
  9 | // tvalids.hpp
 10 | //
 11 | // Emanuele Ruffaldi 2016
 12 | #pragma once 
 13 | 
 14 | #include <cstdint>
 15 | #include <iostream>
 16 | #include <limits>
 17 | 
 18 | #ifndef CONSTEXPR14
 19 | #define CONSTEXPR14
 20 | #endif
 21 | 
 22 | #include "posit.h"
 23 | /**
 24 | // T = support type (enough for 1<<N)
 25 | // N = exacts
 26 | // id= identifier
 27 | // T = two is the value of two (useful)
 28 | //
 29 | //
 30 | */
 31 | template <class T, int N, int id, T vtwo>
 32 | class tvalid
 33 | {
 34 | public:
 35 | 	static_assert(std::is_signed<T>::value,"required signed T");
 36 | 	static_assert(N >= 2,"minimum is 2 for the case 0,1,2");
 37 | 	static_assert(sizeof(T)*8 >= N*4,"required enough storage T for provided bits  SREF");
 38 | 
 39 | 	using value_t=T;
 40 | 	static constexpr int TN = N*4;
 41 | 	using PT=PositTrait<T,TN,0,false>;
 42 | 	static constexpr int vesbits = 0;
 43 | 	using PTU=typename PT::POSIT_UTYPE;
 44 | 	using tposit=value_t;
 45 | 
 46 | private:
 47 | 	static T exacts[TN];
 48 | 	static T opexp2[TN*TN];
 49 | 	static T optwice[TN];
 50 | 	static T ophalf[TN];
 51 | 	static T opsquare[TN];
 52 | 	static T opmul[TN*TN];
 53 | 	static T opadd[TN*TN];
 54 | 	static float op2float[TN];
 55 | 
 56 | public:
 57 | 	struct DeepInit{};
 58 | 
 59 | 	explicit tvalid(double a);
 60 | 	explicit tvalid(float a);
 61 | 	explicit tvalid(int a);
 62 | 	explicit tvalid(DeepInit , int16_t a) : v(a) {}
 63 | 	explicit tvalid() {}
 64 | 	tvalid(const tvalid & a): v(a.v) {}
 65 | 
 66 | 	bool is_negative() const { return v < 0; }
 67 | 	bool has_neg_exponent() const { return (v & PT::POSIT_INVERTBIT) == 0; }
 68 | 	bool is_nan() const { return PT::withnan && v == PT::POSIT_NAN; }
 69 | 	bool is_infinity() const { return v==PT::POSIT_NINF || v==PT::POSIT_PINF; }
 70 | 	static tposit max() { return tposit(DeepInit(),PT::POSIT_MAX); }
 71 | 	static tposit min() { return tposit(DeepInit(),PT::POSIT_MIN); }  
 72 | 	static tposit smallestpos() { return tposit(DeepInit(),PT::POSIT_SMALLPOS); }
 73 | 	static tposit one() { return tposit(DeepInit(),PT::POSIT_ONE); }
 74 | 	static tposit two() { return tposit(DeepInit(),vtwo); }  
 75 | 	static tposit onehalf() { return inv(two()); }
 76 | 	static tposit pinfinity() { return tposit(DeepInit(),PT::POSIT_PINF); 
 77 | 	static tposit ninfinity() { return tposit(DeepInit(),PT::POSIT_NINF); 
 78 | 	static tposit nan() { return tposit(DeepInit(),PT::POSIT_NAN); }
 79 | 
 80 | 	friend bool operator<(const tvalid &a, const tvalid & b) { return a.v < b.v; }
 81 | 	friend bool operator>(const tvalid &a, const tvalid & b) { return a.v > b.v; }
 82 | 	friend bool operator<=(const tvalid &a, const tvalid & b) { return a.v <= b.v;}
 83 | 	friend bool operator>=(const tvalid &a, const tvalid & b) { return a.v >= b.v; }
 84 | 	friend bool operator==(const tvalid &a, const tvalid & b) { return a.v == b.v; }
 85 | 	friend bool operator!=(const tvalid &a, const tvalid & b) { return a.v !=b.v; }
 86 | 
 87 | 	tvalid inv() const { return tvalid(DeepInit(),-(v+TN)); }
 88 | 	tvalid half() const { return (*this * inv(two())); }
 89 | 	tvalid twice() const{ return tvalid(DeepInit(),optwice[uu()]); }
 90 | 	tvalid exp2() const { return tvalid(DeepInit(),opexp2[uu()]); }
 91 | 	tvalid square() const { return tvalid(DeepInit(),opsquare[uu()]); }
 92 | 	tvalid operator-() const { return tvalid(DeepInit(),-v);  }
 93 | 
 94 | 	friend tvalid operator+(const tvalid &a, const tvalid & b) { return tvalid(DeepInit(),opadd[a.uu()*TN+b.uu()]); }
 95 | 	friend tvalid operator*(const tvalid &a, const tvalid & b) { return tvalid(DeepInit(),opmul[a.uu()*TN+b.uu()]); }
 96 | 	friend tvalid operator-(const tvalid &a, const tvalid & b) { return a+(-b); }
 97 | 	friend tvalid operator/(const tvalid &a, const tvalid & b) { return tvalid(DeepInit(),opmul[a.uu()*TN+(inv(b).uu())]); }
 98 | 
 99 | 	friend std::ostream & operator << (std::ostream & ons, const tvalid & p);
100 | 
101 | 	tvalid& operator+=(const tvalid &a) { tvalid r = *this+a; v = r.v; return *this; }
102 | 	tvalid& operator*=(const tvalid &a) { tvalid r = *this*a; v = r.v; return *this; }
103 | 
104 | 	PTU uu() const { return (PTU)v;}
105 | 	
106 | 	operator float() const { return uint32_to_float(op2float[uu()]); }
107 | 	operator double() const { return (float)*this; }
108 | 	operator int() const { return (float)*this; }
109 | 
110 | 	value_t v;
111 | 
112 | };
113 | 
114 | template<class T, int N, int id >
115 | inline tvalid<T,N,id> half(tvalid<T,N,id> z) { return z.half(); }
116 | 
117 | template<class T, int N, int id >
118 | inline tvalid<T,N,id> twice(tvalid<T,N,id> z) { return z.twice(); }
119 | 
120 | template<class T, int N, int id >
121 | inline tvalid<T,N,id> square(tvalid<T,N,id> z) { 	return z.square();  }
122 | 
123 | template<class T, int N, int id >
124 | inline tvalid<T,N,id> inv(tvalid<T,N,id> x) { return x.inv(); }
125 | 
126 | template<class T, int N, int id >
127 | CONSTEXPR14 inline tvalid<T,N,id> neg(tvalid<T,N,id> z) { return -z; }
128 | 
129 | template<class T, int N, int id >
130 | inline tvalid<T,N,id> exp2(tvalid<T,N,id> z) { return z.exp2(); }
131 | 
132 | template<class T, int N, int id >
133 | inline bool is_negative(tvalid<T,N,id> a)
134 | {
135 | 	return a.v < 0;
136 | }
137 | 
138 | namespace std {
139 |     template<class T, int N, int id > class numeric_limits<tvalid<T,N,id> > {
140 |     public:
141 |        static tvalid<T,N,id> max() {return tvalid<T,N,id> ::max(); };
142 |        static tvalid<T,N,id> min() {return tvalid<T,N,id> ::min(); };
143 |         // One can implement other methods if needed
144 |     };
145 | }
146 | 
147 | namespace std
148 | {
149 | 	template <class T, int N, int id, T two>
150 | 	inline CONSTEXPR14 tvalid<T,N,id> abs(tvalid<T,N,id> z) 
151 | 	{
152 | 		return tvalid(tvalid::DeepInit(),z.v < 0 ? -z.v : z.v);
153 | 	}
154 | 
155 | 	template <class T, int N, int id, T two>
156 | 	inline CONSTEXPR14 tvalid<T,N,id> min(const tvalid<T,N,id> &a, const tvalid<T,N,id> & b)
157 | 	{
158 | 		return a <=  b ? a : b;
159 | 	}
160 | 
161 | 	template <class T, int N, int id, T two>
162 | 	inline CONSTEXPR14 tvalid<T,N,id> max(const tvalid<T,N,id> &a, const tvalid<T,N,id> & b)
163 | 	{
164 | 		return a >= b ? a : b;
165 | 	}
166 | }
167 | 
168 | 


--------------------------------------------------------------------------------
/jupyter/.ipynb_checkpoints/first-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Welcome to initial Jupyter testing"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 2,
 13 |    "metadata": {},
 14 |    "outputs": [
 15 |     {
 16 |      "data": {
 17 |       "text/plain": []
 18 |      },
 19 |      "execution_count": 2,
 20 |      "metadata": {},
 21 |      "output_type": "execute_result"
 22 |     }
 23 |    ],
 24 |    "source": [
 25 |     "#include \"../include/posit.h\""
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 3,
 31 |    "metadata": {},
 32 |    "outputs": [
 33 |     {
 34 |      "data": {
 35 |       "text/plain": [
 36 |        "(unsigned long) 2\n"
 37 |       ]
 38 |      },
 39 |      "execution_count": 3,
 40 |      "metadata": {},
 41 |      "output_type": "execute_result"
 42 |     }
 43 |    ],
 44 |    "source": [
 45 |     "sizeof(int_least_bits<10>::type)"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 4,
 51 |    "metadata": {},
 52 |    "outputs": [
 53 |     {
 54 |      "data": {
 55 |       "text/plain": []
 56 |      },
 57 |      "execution_count": 4,
 58 |      "metadata": {},
 59 |      "output_type": "execute_result"
 60 |     }
 61 |    ],
 62 |    "source": [
 63 |     "using P=Posit<int8_t,8,3,uint16_t,true>;"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 5,
 69 |    "metadata": {},
 70 |    "outputs": [
 71 |     {
 72 |      "data": {
 73 |       "text/plain": []
 74 |      },
 75 |      "execution_count": 5,
 76 |      "metadata": {},
 77 |      "output_type": "execute_result"
 78 |     }
 79 |    ],
 80 |    "source": [
 81 |     "P a(10.0f);\n",
 82 |     "P b(20.0f);\n"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 6,
 88 |    "metadata": {},
 89 |    "outputs": [
 90 |     {
 91 |      "data": {
 92 |       "text/plain": [
 93 |        "(float) 28.00000f\n"
 94 |       ]
 95 |      },
 96 |      "execution_count": 6,
 97 |      "metadata": {},
 98 |      "output_type": "execute_result"
 99 |     }
100 |    ],
101 |    "source": [
102 |     "(float)(a+b)"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": 7,
108 |    "metadata": {},
109 |    "outputs": [
110 |     {
111 |      "data": {
112 |       "text/plain": [
113 |        "(float) -10.00000f\n"
114 |       ]
115 |      },
116 |      "execution_count": 7,
117 |      "metadata": {},
118 |      "output_type": "execute_result"
119 |     }
120 |    ],
121 |    "source": [
122 |     "(float)(a-b)"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": 8,
128 |    "metadata": {},
129 |    "outputs": [
130 |     {
131 |      "data": {
132 |       "text/plain": [
133 |        "(float) 0.09375f\n"
134 |       ]
135 |      },
136 |      "execution_count": 8,
137 |      "metadata": {},
138 |      "output_type": "execute_result"
139 |     }
140 |    ],
141 |    "source": [
142 |     "(float)inv(a)"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": 9,
148 |    "metadata": {},
149 |    "outputs": [
150 |     {
151 |      "data": {
152 |       "text/plain": [
153 |        "(float) 192.00000f\n"
154 |       ]
155 |      },
156 |      "execution_count": 9,
157 |      "metadata": {},
158 |      "output_type": "execute_result"
159 |     }
160 |    ],
161 |    "source": [
162 |     "(float)(a*b)"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": 10,
168 |    "metadata": {},
169 |    "outputs": [
170 |     {
171 |      "data": {
172 |       "text/plain": [
173 |        "(bool) true\n"
174 |       ]
175 |      },
176 |      "execution_count": 10,
177 |      "metadata": {},
178 |      "output_type": "execute_result"
179 |     }
180 |    ],
181 |    "source": [
182 |     "a < b"
183 |    ]
184 |   }
185 |  ],
186 |  "metadata": {
187 |   "kernelspec": {
188 |    "display_name": "C++14",
189 |    "language": "",
190 |    "name": "cling-cpp14"
191 |   },
192 |   "language_info": {
193 |    "codemirror_mode": "c++",
194 |    "file_extension": ".c++",
195 |    "mimetype": "text/x-c++src",
196 |    "name": "c++"
197 |   }
198 |  },
199 |  "nbformat": 4,
200 |  "nbformat_minor": 2
201 | }
202 | 


--------------------------------------------------------------------------------
/jupyter/first.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Welcome to initial Jupyter testing"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "#include \"../include/posit.h\""
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 2,
 22 |    "metadata": {},
 23 |    "outputs": [
 24 |     {
 25 |      "data": {
 26 |       "text/plain": [
 27 |        "2"
 28 |       ]
 29 |      },
 30 |      "execution_count": 2,
 31 |      "metadata": {},
 32 |      "output_type": "execute_result"
 33 |     }
 34 |    ],
 35 |    "source": [
 36 |     "sizeof(int_least_bits<10>::type)"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 3,
 42 |    "metadata": {},
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "using P=Posit<int8_t,8,3,uint16_t,PositSpec::WithNan>;"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 4,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "P a(10.0f);\n",
 55 |     "P b(20.0f);\n"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 5,
 61 |    "metadata": {},
 62 |    "outputs": [
 63 |     {
 64 |      "data": {
 65 |       "text/plain": [
 66 |        "28f"
 67 |       ]
 68 |      },
 69 |      "execution_count": 5,
 70 |      "metadata": {},
 71 |      "output_type": "execute_result"
 72 |     }
 73 |    ],
 74 |    "source": [
 75 |     "(float)(a+b)"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 6,
 81 |    "metadata": {},
 82 |    "outputs": [
 83 |     {
 84 |      "data": {
 85 |       "text/plain": [
 86 |        "-10f"
 87 |       ]
 88 |      },
 89 |      "execution_count": 6,
 90 |      "metadata": {},
 91 |      "output_type": "execute_result"
 92 |     }
 93 |    ],
 94 |    "source": [
 95 |     "(float)(a-b)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 7,
101 |    "metadata": {},
102 |    "outputs": [
103 |     {
104 |      "data": {
105 |       "text/plain": [
106 |        "0.09375f"
107 |       ]
108 |      },
109 |      "execution_count": 7,
110 |      "metadata": {},
111 |      "output_type": "execute_result"
112 |     }
113 |    ],
114 |    "source": [
115 |     "(float)inv(a)"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 8,
121 |    "metadata": {},
122 |    "outputs": [
123 |     {
124 |      "data": {
125 |       "text/plain": [
126 |        "192f"
127 |       ]
128 |      },
129 |      "execution_count": 8,
130 |      "metadata": {},
131 |      "output_type": "execute_result"
132 |     }
133 |    ],
134 |    "source": [
135 |     "(float)(a*b)"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": 9,
141 |    "metadata": {},
142 |    "outputs": [
143 |     {
144 |      "data": {
145 |       "text/plain": [
146 |        "true"
147 |       ]
148 |      },
149 |      "execution_count": 9,
150 |      "metadata": {},
151 |      "output_type": "execute_result"
152 |     }
153 |    ],
154 |    "source": [
155 |     "a < b"
156 |    ]
157 |   }
158 |  ],
159 |  "metadata": {
160 |   "kernelspec": {
161 |    "display_name": "C++14",
162 |    "language": "C++14",
163 |    "name": "xeus-cling-cpp14"
164 |   },
165 |   "language_info": {
166 |    "codemirror_mode": "text/x-c++src",
167 |    "file_extension": ".cpp",
168 |    "mimetype": "text/x-c++src",
169 |    "name": "c++",
170 |    "version": "-std=c++14"
171 |   }
172 |  },
173 |  "nbformat": 4,
174 |  "nbformat_minor": 2
175 | }
176 | 


--------------------------------------------------------------------------------
/makeone.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | cat <(echo -n "// cppPosit ") <(git rev-parse HEAD) <(echo "") include/bithippop.hpp include/fixedtraits.hpp include/floattraits.hpp include/typehelpers.hpp include/unpacked.h include/posit.h | grep -vEh '#include "[^\"]+"' | grep -vh "#pragma once" - > posit_one.h
3 | cat posit_one.h | pbcopy
4 | echo "copied in Clipboard if macOS"


--------------------------------------------------------------------------------
/matlab/Lenna.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eruffaldi/cppPosit/3c497041063dc87de2f9615a52a55b0ed5222b12/matlab/Lenna.png


--------------------------------------------------------------------------------
/matlab/cellindex.m:
--------------------------------------------------------------------------------
1 | function  r =cellindex(a)
2 | r=a{:};


--------------------------------------------------------------------------------
/matlab/cellvcat.m:
--------------------------------------------------------------------------------
 1 | function r = cellvcat(x)
 2 | 
 3 | if isempty(x)
 4 |     r=[];
 5 | else
 6 | r =x{1};
 7 | for I=2:length(x)
 8 |     r=[r;x{I}];
 9 | end
10 | 
11 | end


--------------------------------------------------------------------------------
/matlab/compareposits.m:
--------------------------------------------------------------------------------
 1 | addpath halfprecision/
 2 | n8 = arrayfun(@(x) {8,x,sprintf('positint8_t,8,%d,uint16_t,false.bin',x)},0:3,'UniformOutput',false);
 3 | p8 = cellvcat(cellfun(@(x) loadpositdump(['../build/' x{3}],x{1},x{2}),n8,'UniformOutput',false));
 4 | 
 5 | %%
 6 | n10 = arrayfun(@(x) {10,x,sprintf('positint16_t,10,%d,uint16_t,false.bin',x)},0:3,'UniformOutput',false);
 7 |  p10 = cellvcat(cellfun(@(x) loadpositdump(['../build/' x{3}],x{1},x{2}),n10,'UniformOutput',false));
 8 | 
 9 | %%
10 | n12 = arrayfun(@(x) {12,x,sprintf('positint16_t,12,%d,uint16_t,false.bin',x)},0:4,'UniformOutput',false);
11 | p12 = cellvcat(cellfun(@(x) loadpositdump(['../build/' x{3}],x{1},x{2}),n12,'UniformOutput',false));
12 | 
13 | %%
14 | [u,h] = halfinrange(-10,10);      
15 | tf16=dump2table([-length(u)/2:(length(u)/2-1);cast(u,'double');h]');
16 | tf16.bits=16;
17 | tf16.es=0;
18 | tf16.what=categorical({'float'},{'posit','float','valid'})
19 | f16=tf16;
20 | %%
21 | p12e=[f16;p12];
22 | p10e=[f16;p10];
23 | 
24 | %%
25 | qss={p8,p10,p12,f16};
26 | %%
27 |     colors =[[0 0.4470 0.7410];[0.8500 0.3250 0.0980];[0.9290 0.6940 0.1250];[0.4940 0.1840 0.5560];[0,1,1]];
28 | 
29 | %%
30 | for K=1:length(qss)
31 |     qs=qss{K};
32 |     figure;
33 |     s=[];
34 |     h=[];
35 |     for I=1:length(qs)
36 |         pd=qs(I,:);
37 |         p=double(pd.data{1});
38 |         usenan=false;
39 |         h(I)=plot(p(:,1),p(:,3),'Color',colors(I,:));
40 |         hold on
41 |         plot(p(:,1),p(:,3),['*'],'Color',colors(I,:));
42 |         if pd.what=='float'
43 |          s{I} =sprintf('float16');
44 |         else
45 |          s{end+1} =sprintf('posit%d es=%d',pd.bits,pd.es);
46 |         end
47 |     ylim([-10,10]);
48 |     end
49 |     yl =ylim;
50 |     title(sprintf('Posits %d bits with Y limit %f %f',bits,yl(1),yl(2)));
51 | 
52 |     hold off
53 |     legend(h,s);
54 | end
55 | 
56 | %%
57 | 
58 | % 
59 | % size(u)
60 | % plot(h)
61 | % hold on
62 | % plot(h,'*');
63 | % yl =ylim;
64 | % title(sprintf('float16 in range %f %f',yl(1),yl(2)));
65 | 
66 | %%
67 | qss={p10e};
68 | %%
69 | for K=1:length(qss)
70 |     qs=qss{K};
71 |     figure;
72 | 
73 |     s={};
74 |     h=[];
75 |     for I=1:height(qs)
76 |         bits =qs.bits(I);
77 |         isfloatx = qs.what(I) == 'float';
78 |         es=qs.es(I);
79 |         usenan=false;
80 |         h(I)=plot(p.float{I},p.res{I},'Color',colors(I,:));
81 |         hold on
82 |         plot(p(:,3),p(:,4),['.'],'Color',colors(I,:));
83 |         if isfloatx
84 |             s{I} =sprintf('float%d',bits);
85 |         else
86 |             s{I} = sprintf('posit%d es %d',bits,es);
87 |         end
88 |     end
89 |     xlabel('Value');
90 |     ylabel('Resolution');
91 |     legend(h,s);
92 |     hold off
93 |     xlim([-10,10]);
94 | end
95 | 


--------------------------------------------------------------------------------
/matlab/compareposits2.m:
--------------------------------------------------------------------------------
  1 | addpath halfprecision/
  2 | n8 = arrayfun(@(x) {8,x,sprintf('positint8_t,8,%d,uint16_t,false.bin',x)},0:3,'UniformOutput',false);
  3 | p8 = cellvcat(cellfun(@(x) loadpositdump(['../build/' x{3}],x{1},x{2}),n8,'UniformOutput',false));
  4 | 
  5 | %%
  6 | n10 = arrayfun(@(x) {10,x,sprintf('positint16_t,10,%d,uint16_t,false.bin',x)},0:3,'UniformOutput',false);
  7 |  p10 = cellvcat(cellfun(@(x) loadpositdump(['../build/' x{3}],x{1},x{2}),n10,'UniformOutput',false));
  8 | 
  9 | %%
 10 | n12 = arrayfun(@(x) {12,x,sprintf('positint16_t,12,%d,uint16_t,false.bin',x)},0:4,'UniformOutput',false);
 11 | p12 = cellvcat(cellfun(@(x) loadpositdump(['../build/' x{3}],x{1},x{2}),n12,'UniformOutput',false));
 12 | 
 13 | %%  
 14 | [u,h] = halfinrange(-10,10);      
 15 | tf16=dump2table([-length(u)/2:(length(u)/2-1);cast(u,'double');h]');
 16 | tf16.bits=16;
 17 | tf16.es=0;
 18 | tf16.what=categorical({'float'},{'posit','float','valid'})
 19 | f16=tf16;
 20 | %%
 21 | p12e=[f16;p12];
 22 | p10e=[f16;p10];
 23 | 
 24 | %%
 25 | qss1={[p8;p10;p12;f16]};
 26 | qss1=limittab(qss1,-2,2,'exclusive');
 27 | qss={p8,p10,p12,f16};
 28 | qss=limittab(qss,-2,2,'exclusive');
 29 | %%
 30 | p81=qss{1};
 31 | p8z =p81(1,:);
 32 | %%
 33 |     colors =[[0 0.4470 0.7410];[0.8500 0.3250 0.0980];[0.9290 0.6940 0.1250];[0.4940 0.1840 0.5560];[0,1,1]];
 34 | 
 35 | %%
 36 | for K=1:length(qss)
 37 |     qs=qss{K};
 38 |     figure;
 39 |     s=[];
 40 |     h=[];
 41 |     for I=1:height(qs)
 42 |         h(I)=plot(qs.signed{I},qs.float{I},'Color',colors(mod(I,length(colors))+1,:));
 43 |         hold on
 44 |         plot(qs.signed{I},qs.float{I},['*'],'Color',colors(mod(I,length(colors))+1,:));
 45 |         if qs.what(I)=='float'
 46 |          s{I} =sprintf('float16');
 47 |         else
 48 |          s{end+1} =sprintf('posit%d es=%d',qs.bits(I),qs.es(I));
 49 |         end
 50 |     ylim([-1,1]);
 51 |     end
 52 |     yl =ylim;
 53 | 
 54 |     hold off
 55 |     legend(h,s);
 56 | end
 57 | 
 58 | %%
 59 | 
 60 | % 
 61 | % size(u)
 62 | % plot(h)
 63 | % hold on
 64 | % plot(h,'*');
 65 | % yl =ylim;
 66 | % title(sprintf('float16 in range %f %f',yl(1),yl(2)));
 67 | 
 68 | %%
 69 | qss={p10e,f16};
 70 | qss=limittab(qss,-1,1,'exclusive');
 71 | 
 72 | %%
 73 | for K=1:length(qss)
 74 |     qs=qss{K};
 75 |     figure;
 76 | 
 77 |     s={};
 78 |     h=[];
 79 |     for I=1:height(qs)
 80 |         bits =qs.bits(I);
 81 |         isfloatx = qs.what(I) == 'float';
 82 |         es=qs.es(I);
 83 | %        if es >0
 84 |  %           continue
 85 |   %      end
 86 |         usenan=false;
 87 |         h(I)=plot(qs.float{I},qs.res{I},'Color',colors(I,:));
 88 |         hold on
 89 |         plot(qs.float{I},qs.res{I},['.'],'Color',colors(I,:));
 90 |         if qs.what(I) =='float'
 91 |             s{I} =sprintf('float%d',bits);
 92 |         else
 93 |             s{I} = sprintf('posit%d es %d',bits,es);
 94 |         end
 95 |     end
 96 |     xlabel('Value');
 97 |     ylabel('Resolution');
 98 |     legend(h,s);
 99 |     hold off
100 |     xlim([-1,1]);
101 | end
102 | 


--------------------------------------------------------------------------------
/matlab/describetab.m:
--------------------------------------------------------------------------------
 1 | function t =describetab(t)
 2 | 
 3 | for I=1:height(t)
 4 |     v=t.float{I};
 5 |     W=v(~(isnan(v)|isinf(v)));
 6 |     v=t.res{I};
 7 |     Q=v(~(isnan(v)|isinf(v)));
 8 |     t.max(I) = max(W);
 9 |     t.min(I) = min(W);
10 |     t.smallest(I) = min(abs(W(W ~= 0)));
11 |     t.minres(I) = min(Q);
12 |     t.maxres(I) = max(Q);
13 | end
14 | 


--------------------------------------------------------------------------------
/matlab/dump2table.m:
--------------------------------------------------------------------------------
 1 | function q =dump2table(r)
 2 | 
 3 | % TODO
 4 | r(:,4) =meandiff(r(:,3));
 5 | 
 6 | q = table();
 7 | q.signed = {r(:,1)};
 8 | q.unsigned = {r(:,2)};
 9 | v=r(:,3);
10 | q.float ={v};
11 | q.res = {r(:,4)};
12 | q = describetab(q);
13 | 
14 | 
15 | function r = meandiff(x)
16 | x=x(:);
17 | 
18 | r =nan(size(x));
19 | 
20 | % ((next-cur)+(cur-prev))/2 == (next-prev)/2
21 | r(2:end-1) = (x(3:end)-x(1:end-2))/2;
22 | 


--------------------------------------------------------------------------------
/matlab/fromindex.m:
--------------------------------------------------------------------------------
 1 | 
 2 | function a = fromindex(zp,mode,ii)
 3 | 
 4 | if isempty(ii)
 5 |     a=ii;
 6 |     return;
 7 | end
 8 | switch mode
 9 |     case 'index'
10 |         a=ii;
11 |     case 'signed'
12 |         a = (zp.signed{1}(ii));
13 |     case 'unsigned'
14 |         a = (zp.unsigned{1}(ii));
15 |     case 'float'
16 |         a = zp.float{1}(ii);
17 |     otherwise
18 |         a = 0;
19 | end


--------------------------------------------------------------------------------
/matlab/halfinrange.m:
--------------------------------------------------------------------------------
 1 | function [u,h] = halfinrange(mi,ma)
 2 | 
 3 | z =uint16(0:65535);
 4 | y =halfprecision(z,'double');
 5 | 
 6 | ii = find(y >= mi & y <=ma);
 7 | u =z(ii);
 8 | h = y(ii);
 9 | 
10 | [h,hi]= sort(h);
11 | u =u(hi);


--------------------------------------------------------------------------------
/matlab/halfprecision/halfprecision.m:
--------------------------------------------------------------------------------
  1 | % halfprecision converts IEEE 754 floating point to half precision IEEE 754r
  2 | %******************************************************************************
  3 | % 
  4 | %  MATLAB (R) is a trademark of The Mathworks (R) Corporation
  5 | % 
  6 | %  Function:    halfprecision
  7 | %  Filename:    halfprecision.c
  8 | %  Programmer:  James Tursa
  9 | %  Version:     1.0
 10 | %  Date:        March 3, 2009
 11 | %  Copyright:   (c) 2009 by James Tursa, All Rights Reserved
 12 | %
 13 | %  This code uses the BSD License:
 14 | %
 15 | %  Redistribution and use in source and binary forms, with or without 
 16 | %  modification, are permitted provided that the following conditions are 
 17 | %  met:
 18 | %
 19 | %     * Redistributions of source code must retain the above copyright 
 20 | %       notice, this list of conditions and the following disclaimer.
 21 | %     * Redistributions in binary form must reproduce the above copyright 
 22 | %       notice, this list of conditions and the following disclaimer in 
 23 | %       the documentation and/or other materials provided with the distribution
 24 | %      
 25 | %  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
 26 | %  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
 27 | %  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 28 | %  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
 29 | %  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
 30 | %  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
 31 | %  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
 32 | %  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
 33 | %  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
 34 | %  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
 35 | %  POSSIBILITY OF SUCH DAMAGE.
 36 | % 
 37 | %  halfprecision converts the input argument to/from a half precision floating
 38 | %  point bit pattern corresponding to IEEE 754r. The bit pattern is stored in a
 39 | %  uint16 class variable. Please note that halfprecision is *not* a class. That
 40 | %  is, you cannot do any arithmetic with the half precision bit patterns.
 41 | %  halfprecision is simply a function that converts the IEEE 754r half precision
 42 | %  bit pattern to/from other numeric MATLAB variables. You can, however, take
 43 | %  the half precision bit patterns, convert them to single or double, do the
 44 | %  operation, and then convert the result back manually.
 45 | % 
 46 | %  1 bit sign bit
 47 | %  5 bits exponent, biased by 15
 48 | %  10 bits mantissa, hidden leading bit, normalized to 1.0
 49 | % 
 50 | %  Special floating point bit patterns recognized and supported:
 51 | % 
 52 | %  All exponent bits zero:
 53 | %  - If all mantissa bits are zero, then number is zero (possibly signed)
 54 | %  - Otherwise, number is a denormalized bit pattern
 55 | % 
 56 | %  All exponent bits set to 1:
 57 | %  - If all mantissa bits are zero, then number is +Infinity or -Infinity
 58 | %  - Otherwise, number is NaN (Not a Number)
 59 | % 
 60 | %  Building:
 61 | % 
 62 | %  halfprecision requires that a mex routine be built (one time only). This
 63 | %  process is typically self-building the first time you call the function
 64 | %  as long as you have the files halfprecision.m and halfprecision.c in the
 65 | %  same directory somewhere on the MATLAB path. If you need to manually build
 66 | %  the mex function, here are the commands:
 67 | %
 68 | %  >> mex -setup
 69 | %    (then follow instructions to select a C / C++ compiler of your choice)
 70 | %  >> mex halfprecision.c
 71 | %
 72 | %  If you have an older version of MATLAB, you may need to use this command:
 73 | %
 74 | %  >> mex -DDEFINEMWSIZE halfprecision.c
 75 | % 
 76 | %  Syntax
 77 | % 
 78 | %  B = halfprecision(A)
 79 | %  C = halfprecision(B,S)
 80 | %      halfprecision(B,'disp')
 81 | % 
 82 | %  Description
 83 | % 
 84 | %  A = a MATLAB numeric array, char array, or logical array.
 85 | %
 86 | %  B = the variable A converted into half precision floating point bit pattern.
 87 | %      The bit pattern will be returned as a uint16 class variable. The values
 88 | %      displayed are simply the bit pattern interpreted as if it were an unsigned
 89 | %      16-bit integer. To see the halfprecision values, use the 'disp' option, which
 90 | %      simply converts the bit patterns into a single class and then displays them.
 91 | %
 92 | %  C = the half precision floating point bit pattern in B converted into class S.
 93 | %      B must be a uint16 or int16 class variable.
 94 | %
 95 | %  S = char string naming the desired class (e.g., 'single', 'int32', etc.)
 96 | %      If S = 'disp', then the floating point bit values are simply displayed.
 97 | % 
 98 | %  Examples
 99 | %  
100 | %  >> a = [-inf -1e30 -1.2 NaN 1.2 1e30 inf]
101 | %  a =
102 | %  1.0e+030 *
103 | %      -Inf   -1.0000   -0.0000       NaN    0.0000    1.0000       Inf
104 | % 
105 | %  >> b = halfprecision(a)
106 | %  b =
107 | %  64512  64512  48333  65024  15565  31744  31744
108 | % 
109 | %  >> halfprecision(b,'disp')
110 | %      -Inf      -Inf   -1.2002       NaN    1.2002       Inf       Inf
111 | % 
112 | %  >> halfprecision(b,'double')
113 | %  ans =
114 | %      -Inf      -Inf   -1.2002       NaN    1.2002       Inf       Inf
115 | % 
116 | %  >> 2^(-24)
117 | %  ans =
118 | %  5.9605e-008
119 | % 
120 | %  >> halfprecision(ans)
121 | %  ans =
122 | %      1
123 | % 
124 | %  >> halfprecision(ans,'disp')
125 | %  5.9605e-008
126 | % 
127 | %  >> 2^(-25)
128 | %  ans =
129 | %  2.9802e-008
130 | % 
131 | %  >> halfprecision(ans)
132 | %  ans =
133 | %      1
134 | % 
135 | %  >> halfprecision(ans,'disp')
136 | %  5.9605e-008
137 | % 
138 | %  >> 2^(-26)
139 | %  ans =
140 | %   1.4901e-008
141 | % 
142 | %  >> halfprecision(ans)
143 | %  ans =
144 | %      0
145 | % 
146 | %  >> halfprecision(ans,'disp')
147 | %     0
148 | % 
149 | %  Note that the special cases of -Inf, +Inf, and NaN are handled correctly.
150 | %  Also, note that the -1e30 and 1e30 values overflow the half precision format
151 | %  and are converted into half precision -Inf and +Inf values, and stay that
152 | %  way when they are converted back into doubles.
153 | % 
154 | %  For the denormalized cases, note that 2^(-24) is the smallest number that can
155 | %  be represented in half precision exactly. 2^(-25) will convert to 2^(-24)
156 | %  because of the rounding algorithm used, and 2^(-26) is too small and underflows
157 | %  to zero.
158 | % 
159 | %**************************************************************************
160 | 
161 | function varargout = halfprecision(varargin)
162 | disp(' ');
163 | disp('You must build the mex routine before you can use halfprecision.');
164 | disp('Attempting to do so now ...');
165 | disp(' ');
166 | mname = mfilename('fullpath');
167 | cname = [mname '.c'];
168 | if( isempty(dir(cname)) )
169 |     disp('Cannot find the file halfprecision.c in the same directory as the');
170 |     disp('file halfprecision.m. Please ensure that they are in the same');
171 |     disp('directory and try again. The following file was not found:');
172 |     disp(' ');
173 |     disp(cname);
174 |     disp(' ');
175 |     error('Unable to compile halprecision.c');
176 | else
177 |     disp(['Found file halfprecision.c in ' cname]);
178 |     disp(' ');
179 |     disp('Now attempting to compile ...');
180 |     disp('(If prompted, please press the Enter key and then select any C/C++');
181 |     disp('compiler that is available, such as lcc.)');
182 |     disp(' ');
183 |     disp(['mex(''' cname ''')']);
184 |     disp(' ');
185 |     try
186 |         mex(cname);
187 |         disp('mex halfprecision.c build completed ... you may now use halfprecision.');
188 |         disp(' ');
189 |     catch
190 |         disp(' ');
191 |         disp('Well, *that* didn''t work ... now trying it with mwSize defined ...');
192 |         disp(' ');
193 |         try
194 |             disp(' ');
195 |             disp(['mex(''-DDEFINEMWSIZE'',''' cname ''')']);
196 |             disp(' ');
197 |             mex('-DDEFINEMWSIZE',cname);
198 |             disp('mex halfprecision.c build completed ... you may now use halfprecision.');
199 |             disp(' ');
200 |         catch
201 |             disp('Hmmm ... That didn''t work either.');
202 |             disp(' ');
203 |             disp('The mex command failed. This may be because you have already run');
204 |             disp('mex -setup and selected a non-C compiler, such as Fortran. If this');
205 |             disp('is the case, then rerun mex -setup and select a C/C++ compiler.');
206 |             disp(' ');
207 |             error('Unable to compile halprecision.c');
208 |         end
209 |     end
210 | end
211 | if false
212 |     varargout = varargin; % Get rid of the lint message
213 | end
214 | end
215 | 


--------------------------------------------------------------------------------
/matlab/halfprecision/halfprecisionmax.m:
--------------------------------------------------------------------------------
 1 | % halfprecisionmax returns IEEE 754r bit pattern of max half precision value
 2 | %******************************************************************************
 3 | % 
 4 | %  MATLAB (R) is a trademark of The Mathworks (R) Corporation
 5 | % 
 6 | %  Function:    halfprecisionmax
 7 | %  Filename:    halfprecisionmax.m
 8 | %  Programmer:  James Tursa
 9 | %  Version:     1.0
10 | %  Date:        March 3, 2009
11 | %  Copyright:   (c) 2009 by James Tursa, All Rights Reserved
12 | %
13 | %  This code uses the BSD License:
14 | %
15 | %  Redistribution and use in source and binary forms, with or without 
16 | %  modification, are permitted provided that the following conditions are 
17 | %  met:
18 | %
19 | %     * Redistributions of source code must retain the above copyright 
20 | %       notice, this list of conditions and the following disclaimer.
21 | %     * Redistributions in binary form must reproduce the above copyright 
22 | %       notice, this list of conditions and the following disclaimer in 
23 | %       the documentation and/or other materials provided with the distribution
24 | %      
25 | %  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
26 | %  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
27 | %  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
28 | %  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
29 | %  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
30 | %  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
31 | %  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
32 | %  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
33 | %  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
34 | %  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
35 | %  POSSIBILITY OF SUCH DAMAGE.
36 | % 
37 | % Type 'help halfprecision' to get details of this bit pattern
38 | %
39 | %******************************************************************************
40 | 
41 | function h = halfprecisionmax
42 | if nargin ~= 0
43 |     error(nargchk(0, 0, nargin));
44 | end
45 | if nargout > 1
46 |     error(nargoutchk(0, 1, nargout));
47 | end
48 | h = uint16(hex2dec('7BFF'));
49 | end
50 | 


--------------------------------------------------------------------------------
/matlab/halfprecision/halfprecisionmin.m:
--------------------------------------------------------------------------------
 1 | % halfprecisionmin returns IEEE 754r bit pattern of min half precision value
 2 | %******************************************************************************
 3 | % 
 4 | %  MATLAB (R) is a trademark of The Mathworks (R) Corporation
 5 | % 
 6 | %  Function:    halfprecisionmin
 7 | %  Filename:    halfprecisionmin.m
 8 | %  Programmer:  James Tursa
 9 | %  Version:     1.0
10 | %  Date:        March 3, 2009
11 | %  Copyright:   (c) 2009 by James Tursa, All Rights Reserved
12 | %
13 | %  This code uses the BSD License:
14 | %
15 | %  Redistribution and use in source and binary forms, with or without 
16 | %  modification, are permitted provided that the following conditions are 
17 | %  met:
18 | %
19 | %     * Redistributions of source code must retain the above copyright 
20 | %       notice, this list of conditions and the following disclaimer.
21 | %     * Redistributions in binary form must reproduce the above copyright 
22 | %       notice, this list of conditions and the following disclaimer in 
23 | %       the documentation and/or other materials provided with the distribution
24 | %      
25 | %  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
26 | %  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
27 | %  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
28 | %  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
29 | %  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
30 | %  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
31 | %  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
32 | %  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
33 | %  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
34 | %  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
35 | %  POSSIBILITY OF SUCH DAMAGE.
36 | % 
37 | % Type 'help halfprecision' to get details of this bit pattern
38 | %
39 | %******************************************************************************
40 | 
41 | function h = halfprecisionmin
42 | if nargin ~= 0
43 |     error(nargchk(0, 0, nargin));
44 | end
45 | if nargout > 1
46 |     error(nargoutchk(0, 1, nargout));
47 | end
48 | h = uint16(hex2dec('0001'));
49 | end
50 | 


--------------------------------------------------------------------------------
/matlab/halfprecision/license.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2009, James Tursa
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are
 6 | met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 |     * Redistributions in binary form must reproduce the above copyright
11 |       notice, this list of conditions and the following disclaimer in
12 |       the documentation and/or other materials provided with the distribution
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
18 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 | POSSIBILITY OF SUCH DAMAGE.
25 | 


--------------------------------------------------------------------------------
/matlab/halfprecision/origin.txt:
--------------------------------------------------------------------------------
1 | http://it.mathworks.com/matlabcentral/fileexchange/23173-ieee-754r-half-precision-floating-point-converter


--------------------------------------------------------------------------------
/matlab/limittab.m:
--------------------------------------------------------------------------------
 1 | function t =limittab(t,mi,ma,mode)
 2 | 
 3 | if iscell(t)
 4 |     for I=1:length(t)
 5 |         t{I} = limittab(t{I},mi,ma,mode);
 6 |     end
 7 | else
 8 |     
 9 |     for I=1:height(t)
10 |         v = t.float{I};
11 |         if strcmp(mode,'exclusive')
12 |             ba =v>mi & v<ma;
13 |         else
14 |             ba =v>=mi & v<=ma;
15 |         end
16 |         t.signed{I} =t.signed{I}(ba);
17 |         t.float{I} =t.float{I}(ba);
18 |         t.unsigned{I} =t.unsigned{I}(ba);
19 |         t.res{I} =t.res{I}(ba);
20 |     end
21 | t =describetab(t);
22 | 
23 | end
24 | 


--------------------------------------------------------------------------------
/matlab/loadpositdump.m:
--------------------------------------------------------------------------------
 1 | function q = loadpositdump(name,bits,es)
 2 | 
 3 | if nargin < 2
 4 |     bits=0;
 5 |     es=0;
 6 | end
 7 | % iIv
 8 | fid =fopen(name,'rb');
 9 | r =fread(fid,Inf,'double');
10 | r =reshape(r,3,[])';
11 | q = dump2table(r);
12 | q.bits=bits;
13 | q.es=es;
14 | q.what=categorical({'posit'},{'posit','float','valid'});
15 | 
16 | 


--------------------------------------------------------------------------------
/matlab/meandiff.m:
--------------------------------------------------------------------------------
1 | function r = meandiff(x)
2 | x=x(:);
3 | 
4 | r =nan(size(x));
5 | 
6 | % ((next-cur)+(cur-prev))/2 == (next-prev)/2
7 | r(2:end-1) = (x(3:end)-x(1:end-2))/2;
8 | 
9 | 


--------------------------------------------------------------------------------
/matlab/mex_make.m:
--------------------------------------------------------------------------------
1 | FLAGS='--std=c++14 -O3 --march=native -Werror -Wall -Wno-long-long -pedantic';
2 | mex('floatTop8.cpp',['COMPFLAGS="$COMPFLAGS ' FLAGS]);
3 | mex('p8Tofloat.cpp',['COMPFLAGS="$COMPFLAGS ' FLAGS]);
4 | mex('p8binop.cpp',['COMPFLAGS="$COMPFLAGS ' FLAGS]);
5 | mex('p8unop.cpp',['COMPFLAGS="$COMPFLAGS ' FLAGS]);
6 | 


--------------------------------------------------------------------------------
/matlab/morton.m:
--------------------------------------------------------------------------------
 1 |  function ind=morton(n);
 2 |  % MORTON(N) return the morton permutation order for array of size 2^N
 3 |  % e.g.
 4 |  % matrix size is 2^n
 5 |  % n=2;
 6 |  % ind=morton(n);
 7 |  % d=fix(rand(2^n,2^n)*10);
 8 |  % disp(d)
 9 |  % disp(d(ind))
10 |  linind4=(1:4^n)-1; %start index count for array at zero
11 |  ind4str=dec2base(linind4,4); %convert indices to base-4
12 |  b1=dec2bin(str2num(ind4str(:,1))); %split each base-4 into two base-2numbers
13 |  b2=dec2bin(str2num(ind4str(:,2)));
14 |  rb=[b1(:,1) b2(:,1)]; %the rows are given by the first bits of b1 and
15 |  b2
16 |  cb=[b1(:,2) b2(:,2)]; %the columns are given by the second bits of b1 and b2
17 |  r=bin2dec(rb)+1; %convert the row from bit to decimal
18 |  c=bin2dec(cb)+1; %convert column
19 |  ind=[2^n*(c-1)+r]'; %make a linear row index into array for easyaddressing
20 |  %morton.m ends


--------------------------------------------------------------------------------
/matlab/normr.m:
--------------------------------------------------------------------------------
1 | function X = normr(Y)
2 | 
3 | X = Y./repmat(sqrt(sum(Y.*Y, 2)),1, size(Y, 2));


--------------------------------------------------------------------------------
/matlab/p8Tofloat.cpp:
--------------------------------------------------------------------------------
 1 | #include "mex.h"   
 2 | //#include "zposit8.hpp"
 3 | #include "posit.h"
 4 | 
 5 | 
 6 | using zposit_type = Posit<int8_t,8,0,uint16_t,true>;
 7 | 
 8 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
 9 | {
10 |     if(nrhs != 2 && nlhs != 1)
11 |     {
12 |         mexPrintf("zp8Tofloat(input,output template) -> output. Use (unsigned) int 16 for half precision. \n");
13 |         return;
14 |     }
15 |     if(mxGetClassID(prhs[0]) != mxINT8_CLASS)
16 |     {
17 |         mexPrintf("zp8Tofloat(input,output template) -> output. Use (unsigned) int 16 for half precision. \n");
18 |         return;
19 |     }
20 |     switch(mxGetClassID(prhs[1]))
21 |     {
22 |         case mxDOUBLE_CLASS:
23 |         case mxSINGLE_CLASS:
24 |         case mxINT16_CLASS:
25 |         case mxUINT16_CLASS:
26 |             break;
27 |         default:
28 |             mexPrintf("zp8Tofloat unsupported output type: float double half(as int16)\n");
29 |             return;
30 |     }
31 |     bool complex = mxIsComplex(prhs[0]);
32 | 	const auto ndims = mxGetNumberOfDimensions(prhs[0]);
33 | 	const mwSize * dimi = mxGetDimensions(prhs[0]);
34 | 	plhs[0] = mxCreateUninitNumericArray(ndims,(mwSize*)dimi,mxGetClassID(prhs[1]),!complex ? mxREAL: mxCOMPLEX);
35 |     zposit_type * src = (zposit_type*)mxGetData(prhs[0]);
36 |     int n = mxGetNumberOfElements(prhs[0])*(complex?2:1);
37 |     switch(mxGetClassID(prhs[1]))
38 |     {
39 |     	case mxDOUBLE_CLASS: 
40 |             {
41 |                 double * dst = (double*)mxGetData(plhs[0]);
42 |                 for(int i = 0;i < n; i++)
43 |                 {
44 |                     dst[i] = (double)src[i];   
45 |                 }                
46 |             }
47 |         	// double precision
48 |     		break;
49 |         case mxSINGLE_CLASS: 
50 |             {
51 |                 float * dst = (float*)mxGetData(plhs[0]);
52 |                 for(int i = 0;i < n; i++)
53 |                 {
54 |                     dst[i] = (float)src[i];                       
55 |                 }                                
56 |             }
57 |     		break;
58 |         case mxINT16_CLASS: 
59 |         case mxUINT16_CLASS:
60 |             {
61 |                 halffloat * dst = (halffloat*)mxGetData(plhs[0]);
62 |                 for(int i = 0;i < n; i++)
63 |                 {
64 |                     dst[i] = (halffloat)src[i].unpack(); 
65 |                 }                                
66 |             }
67 |     		break;
68 |         default:
69 |             break;
70 |     }
71 | 
72 | }


--------------------------------------------------------------------------------
/matlab/p8binop.cpp:
--------------------------------------------------------------------------------
 1 | #include "mex.h"   
 2 | //#include "zposit8.hpp"
 3 | #include "posit.h"
 4 | #include <stdint.h>
 5 | using zposit_type = Posit<int8_t,8,0,uint16_t,true>;
 6 | 
 7 | 
 8 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
 9 | {
10 |     if(nrhs != 3 && nlhs != 1)
11 |     {
12 |         mexPrintf("out = zp8binop(a,b,op);\n");
13 |         return;
14 |     }
15 |     if(mxGetClassID(prhs[0]) != mxGetClassID(prhs[1]) || mxGetClassID(prhs[0]) != mxINT8_CLASS)
16 |     {
17 |         mexPrintf("expected signed int 8bit\n");
18 |         return;        
19 |     }
20 |     if(mxGetClassID(prhs[2]) != mxCHAR_CLASS)
21 |     {
22 |         mexPrintf("expected string op\n");
23 |         return;                
24 |     }
25 |     const auto ndims = mxGetNumberOfDimensions(prhs[0]);
26 |     const mwSize * dimi = mxGetDimensions(prhs[0]);
27 |     bool complex1 = mxIsComplex(prhs[0]);
28 |     int n1 = mxGetNumberOfElements(prhs[0])*(complex1?2:1);
29 | 
30 | 
31 | 
32 |     bool complex2 = mxIsComplex(prhs[1]);
33 |     int n2 = mxGetNumberOfElements(prhs[1])*(complex2?2:1);
34 |     if(n1 != n2)
35 |     {
36 |         mexPrintf("mismatch of items count\n");
37 |         return;                    
38 |     }
39 |      char * opa = mxArrayToString(prhs[2]);
40 |      int n = n1;
41 |     if(opa[1] !=0 )
42 |     {
43 |         mexPrintf("unknown operation %s\n",opa);
44 |         mxFree((void*)opa);
45 |         return;            
46 |     }	
47 | 
48 |     plhs[0] = mxCreateUninitNumericArray(ndims,(mwSize*)dimi,mxINT8_CLASS,!complex2 ? mxREAL: mxCOMPLEX);
49 |     const zposit_type * a = (const zposit_type*)mxGetData(prhs[0]);
50 |     const zposit_type * b = (const zposit_type*)mxGetData(prhs[1]);
51 |     zposit_type * dst = (zposit_type*)mxGetData(plhs[0]);
52 | 
53 |     char op = opa[0];
54 |     mxFree((void*)opa);
55 |     switch(op)
56 |     {
57 |         case '+':
58 |             for(int i= 0; i < n; i++)
59 |             {
60 |                 dst[i] = a[i] + b[i];
61 |             }
62 |             break;
63 |         case '-':
64 |             for(int i= 0; i < n; i++)
65 |             {
66 |                 dst[i] = a[i] - b[i];
67 |             }
68 |             break;
69 |         case '*':
70 |             for(int i= 0; i < n; i++)
71 |             {
72 |                 dst[i] = a[i] * b[i];
73 |             }
74 |             break;
75 |         case '/':
76 |             for(int i= 0; i < n; i++)
77 |             {
78 |                 dst[i] = a[i] / b[i];
79 |             }
80 |             break;
81 |         default:
82 |             mexPrintf("unknown operation %c\n",op);
83 |             break;
84 |     }
85 | 
86 | }


--------------------------------------------------------------------------------
/matlab/p8unop.cpp:
--------------------------------------------------------------------------------
 1 | #include "mex.h"   
 2 | //#include "zposit8.hpp"
 3 | #include "posit.h"
 4 | #include <stdint.h>
 5 | using zposit_type = Posit<int8_t,8,0,uint16_t,true>;
 6 | 
 7 | 
 8 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
 9 | {
10 |     if(nrhs != 2 && nlhs != 1)
11 |     {
12 |         mexPrintf("out = zp8unop(a,op);\n");
13 |         return;
14 |     }
15 |     if(mxGetClassID(prhs[0]) != mxINT8_CLASS)
16 |     {
17 |         mexPrintf("expected signed int 8bit\n");
18 |         return;        
19 |     }
20 |     if(mxGetClassID(prhs[1]) != mxCHAR_CLASS)
21 |     {
22 |         mexPrintf("expected string op\n");
23 |         return;                
24 |     }
25 |     const auto ndims = mxGetNumberOfDimensions(prhs[0]);
26 |     const mwSize * dimi = mxGetDimensions(prhs[0]);
27 |     bool complex1 = mxIsComplex(prhs[0]);
28 |     int n1 = mxGetNumberOfElements(prhs[0])*(complex1?2:1);
29 |      char * opa = mxArrayToString(prhs[1]);
30 |      int n = n1;
31 | 
32 | 	plhs[0] = mxCreateUninitNumericArray(ndims,(mwSize*)dimi,mxINT8_CLASS,!complex1 ? mxREAL: mxCOMPLEX);
33 |     const zposit_type * a = (const zposit_type*)mxGetData(prhs[0]);
34 |     zposit_type * dst = (zposit_type*)mxGetData(plhs[0]);
35 | 
36 |     if(strcmp(opa,"inv") == 0)
37 |     {
38 |         for(int i= 0; i < n; i++)
39 |         {
40 |             dst[i] = inv(a[i]);
41 |         }
42 |     }
43 |     else
44 |     {
45 |         mexPrintf("unknown operation <%s>\n",opa);
46 |     }
47 |     mxFree((void*)opa);
48 |     
49 | 
50 | }


--------------------------------------------------------------------------------
/matlab/test_p8conv.m:
--------------------------------------------------------------------------------
 1 | x_i = cast(-128:127,'int8');
 2 | x_if = p8Tofloat(x_i,double(0));
 3 | x_ifi = floatTop8(x_if);
 4 | assert(all(x_i==x_ifi),'conversion');
 5 | %%
 6 | scatter(x_i,x_if)
 7 | xlabel('Posit (uint8)');
 8 | ylabel('float');
 9 | %%
10 | d_i = p8binop(x_i,x_i,'+');
11 | ds_i = p8binop(x_i,x_i,'*');
12 | di_i = p8unop(x_i,'inv');
13 | d_if = p8Tofloat(d_i,double(0));
14 | di_if = p8Tofloat(di_i,double(0));
15 | ds_if = p8Tofloat(ds_i,double(0));
16 | scatter(x_i,x_if,'r')
17 | hold on
18 | scatter(x_i,d_if,'b')
19 | scatter(x_i,ds_if,'g')
20 | scatter(x_i,di_if,'m')
21 | hold off
22 | xlabel('Posit (uint8)');
23 | ylabel('float');
24 | legend({'x','2x','x**2','inv(x)'});
25 | 


--------------------------------------------------------------------------------
/matlab/testfloat11.m:
--------------------------------------------------------------------------------
 1 | function r = testfloatt11(zp,mode,a,b,omode)
 2 | r = [];
 3 | 
 4 | ia = toindex(zp,mode,a);
 5 | if isempty(ia)
 6 |     return;
 7 | end
 8 | r.a=struct('index',ia,'signed',zp.signed{1}(ia),'unsigned',zp.unsigned{1}(ia),'float',zp.float{1}(ia));
 9 | 
10 | ib = toindex(zp,mode,b);
11 | if isempty(ib)
12 |     return;
13 | end
14 | r.b=struct('index',ib,'signed',zp.signed{1}(ib),'unsigned',zp.unsigned{1}(ib),'float',zp.float{1}(ib));
15 | r.s =fromindex(zp,'float',ia)+fromindex(zp,'float',ib);
16 | iy = toindex(zp,'float',r.s);
17 | if isempty(iy)==0
18 | r.y=struct('index',iy,'signed',zp.signed{1}(iy),'unsigned',zp.unsigned{1}(iy),'float',zp.float{1}(iy));
19 | r.diff =r.s-r.y.float;
20 | end
21 | 


--------------------------------------------------------------------------------
/matlab/toindex.m:
--------------------------------------------------------------------------------
 1 | 
 2 | function ii = toindex(zp,mode,a)
 3 | if isempty(a)
 4 |     ii=a;
 5 |     return;
 6 | end
 7 | 
 8 | switch mode
 9 |     case 'index'
10 |         ii =a;
11 |     case 'signed'
12 |         ii = find(zp.signed{1}==a,1,'first');
13 |     case 'unsigned'
14 |         ii = find(zp.unsigned{1}==a,1,'first');
15 |     case 'float'
16 |         %ii = find(zp.float{1}==a,1,'first');
17 |         [~,ii] = min((zp.float{1}-a).^2);
18 |     otherwise
19 |         ii = 0;
20 |         
21 | end
22 | 


--------------------------------------------------------------------------------
/preamble.txt:
--------------------------------------------------------------------------------
1 | Copyright (C) 2017-2019 Emanuele Ruffaldi
2 | Distributed under the terms of the BSD 3-Clause License.  
3 | 
4 | (See accompanying file LICENSE)
5 | 


--------------------------------------------------------------------------------
/scripts/float2bin.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import binascii
 3 | 
 4 | 
 5 | types = (np.float16,np.float32,np.float64,np.float128);
 6 | 
 7 | for t in types:
 8 | 	inf = np.array(np.inf,dtype=t)
 9 | 	nan = np.array(np.nan,dtype=t)
10 | 	zero = np.zeros(1,dtype=t)
11 | 	one = np.ones(1,dtype=t)
12 | 	for x,y in zip((-inf,inf,nan,one,2*one),("-inf","inf","nan","one","two")):
13 | 		bb = x.byteswap().tobytes()
14 | 		print "%30s %10s" % (t,y),"0x" + binascii.hexlify(bb)


--------------------------------------------------------------------------------
/scripts/listpositany.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | SCRIPT_HOME=`dirname $0 | while read a; do cd $a && pwd && break; done`
3 | shopt -s expand_aliases
4 | . ~/.bash_profile
5 | T=$1
6 | shift
7 | tabs -12
8 | cling "-DPOSIT_TYPE=$T" -I$SCRIPT_HOME/../src -I$SCRIPT_HOME/../extern -I$SCRIPT_HOME/../include $SCRIPT_HOME/../src/listposits.cpp $*


--------------------------------------------------------------------------------
/scripts/listposits8_16.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_HOME=`dirname $0 | while read a; do cd $a && pwd && break; done`
 3 | $SCRIPT_HOME/listpositany.sh int16_t,12,3,uint16_t,true -DHEADONLY
 4 | $SCRIPT_HOME/listpositany.sh int8_t,8,0,uint16_t,false -DHEADONLY
 5 | $SCRIPT_HOME/listpositany.sh int8_t,8,0,uint16_t,true -DHEADONLY
 6 | $SCRIPT_HOME/listpositany.sh int8_t,8,1,uint16_t,false -DHEADONLY
 7 | $SCRIPT_HOME/listpositany.sh int8_t,8,1,uint16_t,true -DHEADONLY
 8 | $SCRIPT_HOME/listpositany.sh int8_t,8,2,uint16_t,false -DHEADONLY
 9 | $SCRIPT_HOME/listpositany.sh int8_t,8,2,uint16_t,true -DHEADONLY
10 | $SCRIPT_HOME/listpositany.sh int8_t,8,3,uint16_t,false -DHEADONLY
11 | $SCRIPT_HOME/listpositany.sh int8_t,8,3,uint16_t,true -DHEADONLY
12 | $SCRIPT_HOME/listpositany.sh int16_t,12,0,uint16_t,false -DHEADONLY
13 | $SCRIPT_HOME/listpositany.sh int16_t,12,0,uint16_t,true -DHEADONLY
14 | $SCRIPT_HOME/listpositany.sh int16_t,10,0,uint16_t,false -DHEADONLY
15 | $SCRIPT_HOME/listpositany.sh int16_t,10,0,uint16_t,true -DHEADONLY
16 | $SCRIPT_HOME/listpositany.sh int16_t,10,2,uint16_t,false -DHEADONLY
17 | $SCRIPT_HOME/listpositany.sh int16_t,10,2,uint16_t,true -DHEADONLY
18 | $SCRIPT_HOME/listpositany.sh int16_t,16,0,uint16_t,false -DHEADONLY
19 | $SCRIPT_HOME/listpositany.sh int16_t,16,0,uint16_t,true -DHEADONLY


--------------------------------------------------------------------------------
/scripts/listposits8_16bin.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_HOME=`dirname $0 | while read a; do cd $a && pwd && break; done`
 3 | XX="-DLISTFORMAT "
 4 | listme() {
 5 | 	$SCRIPT_HOME/listpositany.sh $1 -DLISTFORMAT=Biov
 6 | 	mv tmp "posit$1.bin"
 7 | }
 8 | 
 9 | listme int8_t,8,0,uint16_t,false
10 | listme int8_t,8,0,uint16_t,true 
11 | listme int8_t,8,1,uint16_t,false 
12 | listme int8_t,8,1,uint16_t,true 
13 | listme int8_t,8,2,uint16_t,false 
14 | listme int8_t,8,2,uint16_t,true 
15 | listme int8_t,8,3,uint16_t,false 
16 | listme int8_t,8,3,uint16_t,true 
17 | listme int16_t,12,0,uint16_t,false 
18 | listme int16_t,12,0,uint16_t,true 
19 | listme int16_t,12,1,uint16_t,false 
20 | listme int16_t,12,1,uint16_t,true 
21 | listme int16_t,12,2,uint16_t,false 
22 | listme int16_t,12,2,uint16_t,true 
23 | listme int16_t,12,3,uint16_t,false 
24 | listme int16_t,12,3,uint16_t,true 
25 | listme int16_t,12,4,uint16_t,false 
26 | listme int16_t,12,4,uint16_t,true 
27 | listme int16_t,10,0,uint16_t,false 
28 | listme int16_t,10,0,uint16_t,true 
29 | listme int16_t,10,2,uint16_t,false 
30 | listme int16_t,10,1,uint16_t,true 
31 | listme int16_t,10,1,uint16_t,false 
32 | listme int16_t,10,2,uint16_t,true 
33 | listme int16_t,10,3,uint16_t,true 
34 | listme int16_t,10,3,uint16_t,false 
35 | listme int16_t,16,0,uint16_t,false 
36 | listme int16_t,16,0,uint16_t,true 


--------------------------------------------------------------------------------
/scripts/valid2tvalid.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import punum
 3 | import argparse
 4 | import fractions
 5 | import operator
 6 | import tabulate
 7 | 
 8 | 
 9 | def main():
10 |     parser = argparse.ArgumentParser(description='Table to C code')
11 |     parser.add_argument('--id',default=1,help="tvalid identifier")
12 |     parser.add_argument('--tvalid',help="emits structure for tvalid class")
13 | 	pass
14 | 
15 | if __name__ == '__main__':
16 | 	main()


--------------------------------------------------------------------------------
/scripts/validsgentable.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Properties of summation table
  3 | #
  4 | # Given only points on lattice (a,b) >= 1
  5 | #
  6 | # lattice size n --> unum 3+n tbis
  7 | #
  8 | # (a,-a,1/a,-1/a) + (b,-b,1/b,-1/b) 
  9 | #
 10 | # [   a + b,     a - b,   a + 1/b,     a - 1/b]
 11 | # [   b - a,   - a - b,   1/b - a,   - a - 1/b]
 12 | # [ b + 1/a,   1/a - b, 1/a + 1/b,   1/a - 1/b]
 13 | # [ b - 1/a, - b - 1/a, 1/b - 1/a, - 1/a - 1/b]
 14 | #
 15 | # [ 1, 2, 3, 4; -2, -1, -4, -3; 5, 6, 7, 8; -6, -5, -8, -7]
 16 | #
 17 | # a+b
 18 | # a-b
 19 | # a+1/b = (ab+1)/b
 20 | # a-1/b = (ab-1)/b
 21 | # b+1/a = (ab+1)/a
 22 | # b-1/a = (ab-1)/a
 23 | # 1/a+1/b = (a+b)/ab deived
 24 | # 1/a-1/b = (b-a)/ab derived
 25 | #
 26 | # But then we can consider our magic properties so tat
 27 | #
 28 | # 
 29 | #
 30 | # Taking all pairs (a,b) with a <= b we have n(n+1)/2 pairs each of 8 cases => 8n (n+1)/2
 31 | #
 32 | # e.g. 16bit as n=13 we have 13*8 (13+1)/2 instead of 
 33 | #
 34 | # Emanuele Ruffaldi 2017
 35 | 
 36 | 
 37 | import punum
 38 | import argparse
 39 | import fractions
 40 | import operator
 41 | import tabulate
 42 | 
 43 | 
 44 | def main():
 45 |     t1 =["half","twice","square","exp2","float"]
 46 |     t2 =["times","plus"]
 47 |     parser = argparse.ArgumentParser(description='Table generator')
 48 |     parser.add_argument('--points',nargs="+",type=int,help="lattice points")
 49 |     parser.add_argument('--op1',choices=t1,nargs="+",default=t1)
 50 |     parser.add_argument('--op2',choices=t2,nargs="+",default=t2)
 51 |     parser.add_argument('--p3',action="store_true")
 52 |     parser.add_argument('--p4',action="store_true")
 53 |     parser.add_argument('--p5',action="store_true")
 54 |     parser.add_argument('--p8',action="store_true")
 55 |     parser.add_argument('--p8a',action="store_true")
 56 |     parser.add_argument('--p16a',action="store_true")
 57 |     parser.add_argument('--verbose',action="store_true")
 58 |     parser.add_argument('--sparsetab')
 59 |     parser.add_argument('--pickle')
 60 |     args = parser.parse_args()
 61 |     if args.points is not None:
 62 |         if args.points[0] != 1:
 63 |             args.points = [1] + args.points
 64 |         args.points.sort()
 65 |         alpha = punum.Alphabet(args.points)
 66 |     else:
 67 |         if args.p3:
 68 |             alpha = punum.Alphabet.p3()
 69 |         elif args.p4:
 70 |             alpha = punum.Alphabet.p4()
 71 |         elif args.p5:
 72 |             alpha = punum.Alphabet.p5()
 73 |         elif args.p8:
 74 |             alpha = punum.Alphabet.p8()
 75 |         elif args.p8a:
 76 |             alpha = punum.Alphabet.p8a()
 77 |         elif args.p16a:
 78 |             alpha = punum.Alphabet.p16a()
 79 | 
 80 | 
 81 |     print ("with",len(alpha.eexacts),"obtain",alpha.n," and ",alpha.n2)
 82 |     print (alpha.eexacts)
 83 | 
 84 | 
 85 |     # from arguments to operation groups
 86 |     ops1=[]
 87 |     ops2=[]
 88 |     asfloat=False
 89 |     for aop in args.op2:
 90 |         if aop == "plus":
 91 |             op = operator.add
 92 |         elif aop == "times":
 93 |             op = operator.mul
 94 |         else:
 95 |             print ("skipping op",aop)
 96 |             continue
 97 |         ops2.append((aop,op))
 98 | 
 99 |     two = alpha.convert(2)
100 |     for aop in args.op1:
101 |         if aop == "half":
102 |             op = lambda x: x/two
103 |         elif aop == "twice":
104 |             op = lambda x: x*two
105 |         elif aop == "square":
106 |             op = lambda x: x*x
107 |         elif aop == "exp2":
108 |             op = lambda x: math.exp2(x)
109 |         elif aop == "float":
110 |             asfloat=True
111 |             continue
112 |         else:
113 |             print ("unknown op",aop)
114 |             continue
115 |         ops1.append((aop,op))
116 | 
117 |     # prepare the outputs with flag and array
118 |     outputs = {}
119 |     for aop,op in ops2:
120 |         outputs[aop] = (2,[])
121 |     for aop,op in ops1:
122 |         outputs[aop] = (1,[])
123 |     if asfloat:
124 |         outputs["float"] = (-1,[])
125 |     # also listing
126 |     outputs["exacts"] = (0,alpha.eexacts)
127 | 
128 |     # enumerate all the exacts (i is correctly orderer 1/4)
129 |     for i,ae in enumerate(alpha.eexacts):
130 |         a = alpha.fromexactsindex(i) # fraction
131 |         ia = ~a
132 | 
133 |         # direct conversion table
134 |         if asfloat:
135 |             outputs["float"][1].append(dict(a=a,ai=a.v,y=a.exactvalue()))
136 |             outputs["float"][1].append(dict(a=ia,ai=ia.v,y=ia.exactvalue()))
137 | 
138 |         # unaries for the 4 cases: +- 1/x -1/x
139 |         for aop,op in ops1:
140 |             q = outputs[aop][1]
141 |             ma =-a;
142 |             mia=-ia;
143 |             y = op(a)
144 |             iy = op(ia)
145 |             my = op(ma)
146 |             miy = op(mia)
147 |             # 4 variants
148 |             q.append(dict(a=a,ai=a.v,yi=y.v,y=y.exactvalue()))
149 |             q.append(dict(a=ma,ai=ma.v,yi=my.v,y=my.exactvalue()))
150 |             q.append(dict(a=ia,ai=ia.v,yi=iy.v,y=iy.exactvalue()))
151 |             q.append(dict(a=mia,ai=mia.v,yi=miy.v,y=miy.exactvalue()))
152 | 
153 |         for j,be in enumerate(alpha.eexacts):
154 |             if j < i:
155 |                 continue
156 |             b = alpha.fromexactsindex(j)
157 |             # 8 cases 
158 |             ib = ~b
159 |             if i == 0:
160 |                 if j == 0:
161 |                     # exactly 1+1 or 1-1 
162 |                     # keep first v < second
163 |                     whats = [(a,b),(-a,b)]
164 |                 else:
165 |                     # a is 1, j > 1
166 |                     # keep first v < second
167 |                     whats = [(a,b),(a,-b),(-a,-ib),(a,-ib)]
168 |             elif i == j:
169 |                 whats = [(a,a),(a,-a),(ia,a),(ia,ia),(a,-ia)]
170 |             else:
171 |                 # both > 1
172 |                 whats = [(a,b),(a,-b),(ia,b),(-a,-ib),(-ia,-ib),(ia,-ib),(ia,-b),(a,-ib)]
173 |             for k,(xa,xb) in enumerate(whats):
174 |                 print (xa.v,xb.v,ae,be,k,len(whats))
175 |                 x1 = xa.exactvalue()
176 |                 x2 = xb.exactvalue()
177 |                 for aop,op in ops2:
178 |                     rr = outputs[aop][1]
179 |                     y = op(x1,x2)
180 |                     uy = alpha.convert(y)
181 |                     rr.append(dict(ai=xa.v,bi=xb.v,a=x1,b=x2,y=y,yi=uy.v))
182 |     if args.pickle:
183 |         outputs["two"] = (-2,[two.v,inv(two).v])
184 |         pickle.dump(outputs,open(args.pickle,"wb"),protocol=pickle.HIGHEST_PROTOCOL)
185 |     elif args.sparsetab:
186 |         for x,y in outputs.items():
187 |             o = open(args.sparsetab+x,"w",encoding="utf8")
188 |             mode,content = y
189 |             if mode == 2:
190 |                 # binop
191 |                 for x in content:
192 |                     o.write("%d %d %d\n" % (x["ai"],x["bi"],x["yi"]))
193 |             elif mode == 1:
194 |                 # unaryop
195 |                 for x in content:
196 |                     o.write("%d %d\n" % (x["ai"],x["yi"]))
197 |             elif mode == -1:
198 |                 # float
199 |                 for x in content:
200 |                     o.write("%d %f\n" % (x["ai"],x["y"]))
201 |             elif mode == 0:
202 |                 # exact list
203 |                 for x in y[1]:
204 |                     o.write("%d\n" % (x["ai"]))
205 |     else:
206 |         for x,y in outputs.items():
207 |             mode,content = y
208 |             print ("\n",x,mode,"\n")
209 |             print (tabulate.tabulate(content))
210 | 
211 | 
212 | 
213 | 
214 | if __name__ == '__main__':
215 |     main()


--------------------------------------------------------------------------------
/src/binary8.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Zbinary8 library
 3 |  *
 4 |  * Emanuele Ruffaldi 2017
 5 |  */
 6 | #include "binary8.hpp"
 7 | 
 8 | binary8tab::binary8tab(int a)
 9 | {
10 | }
11 | 
12 | binary8tab::binary8tab(float a)
13 | {
14 | }
15 | 
16 | binary8tab::binary8tab(double a) 
17 | {
18 | }
19 | 


--------------------------------------------------------------------------------
/src/binary8_gen.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Emanuele Ruffaldi (C) 2017
  3 |  * Templated C++ Posit
  4 |  */
  5 | #include <typeinfo>
  6 | #include <fstream>
  7 | #include <iostream>
  8 | #include "binary8.hpp"
  9 | #include "anyfloat.hpp"
 10 | 
 11 | #define SIGNEX(v, sb) ((v) | (((v) & (1 << (sb))) ? ~((1 << (sb))-1) : 0))
 12 | 
 13 | int main(int argc, char const *argv[])
 14 | {
 15 | 	// template <int expbits, int fractionbits, class value_t, class holder_t, class impl_t>
 16 | 	// match the posit8 from posit8.hpp
 17 | 	using X=anyfloat_emu<BINARY8_EXP,BINARY8_MAN,int8_t,uint8_t, float>;
 18 | 	union Q {
 19 | 		float f;
 20 | 		uint32_t i;
 21 | 	} ;
 22 | 	int8_t opadd[256*256],opmul[256*256],opdiv[256*256],opinv[256],opsquare[256],opexp2[256],ophalf[256];
 23 | 	uint32_t op2float[256];
 24 | 
 25 | 	for(int16_t s = -128 ; s < 128; s++)
 26 | 	{
 27 | 		X x;
 28 | 		x.v = s;
 29 | 		int32_t i = ((unsigned int)(uint16_t)s) & 0xFF;
 30 | 		float fx(x);
 31 | 		Q fxq;
 32 |         fxq.f = fx;
 33 | 
 34 |         opinv[i] = x.inv().v;
 35 |         opsquare[i] = X(fx*fx).v;
 36 |         op2float[i] = fxq.i; 
 37 |         ophalf[i] = X(fx/2).v;
 38 |         opexp2[i] = X(exp2(fx)).v;
 39 |         //std::cout << "twice " << (X)(fx*2) << " half " << (X)(fx/2) << std::endl;
 40 |     }
 41 | 
 42 | 	for(int16_t s1 = -128 ; s1 < 128; s1++)
 43 | 	{
 44 | 		X x1;
 45 | 		x1.v = s1;
 46 | 		int32_t i1 = ((unsigned int)(uint8_t)s1) & 0xFF;
 47 | 		float f1 = uint32_to_float(op2float[i1]); 
 48 | 
 49 | 		for(int16_t s2 = -128 ; s2 < 128; s2++)
 50 | 		{
 51 | 			X x2;
 52 | 			x2.v = s2;
 53 | 			int32_t i2 = ((unsigned int)(uint8_t)s2) & 0xFF;
 54 | 			float f2 = uint32_to_float(op2float[i2]); 
 55 | 
 56 | 	        opadd[i1*256+i2] = X(f1+f2).v;
 57 | 	        opmul[i1*256+i2] = X(f1*f2).v;
 58 | 	        opdiv[i1*256+i2] = X(f1/f2).v;
 59 | 	        //opadd[i*256+j] = (x*y).v;
 60 | 		}
 61 | 	}
 62 | 
 63 | 	std::ofstream onf(argc == 1 ? "binary8_tbl.cpp" : argv[1]);
 64 | 	onf << "#include <stdint.h>\nnamespace binary8ns {\n";
 65 | 	onf << "// type is " << typeid(X).name() << std::endl;
 66 | 	onf << "int8_t opadd[] = {\n";
 67 | 	for(int i  = 0; i < 256*256; i++)
 68 | 		onf << (int)(opadd[i]) << ",";
 69 | 	onf << "}; " << std::endl;
 70 | 	// emit the numbers as C file
 71 | 	onf << "int8_t opmul[] = {\n";
 72 | 	for(int i  = 0; i < 256*256; i++)
 73 | 		onf << (int)(opmul[i]) << ",";
 74 | 	onf << "}; " << std::endl;
 75 | 	onf << "int8_t opdiv[] = {\n";
 76 | 	for(int i  = 0; i < 256*256; i++)
 77 | 		onf << (int)(opdiv[i]) << ",";
 78 | 	onf << "}; " << std::endl;
 79 | 	onf << "int8_t opinv[] = {\n";
 80 | 	for(int i  = 0; i < 256; i++)
 81 | 		onf << (int)(opinv[i]) << ",";
 82 | 	onf << "}; " << std::endl;
 83 | 	onf << "int8_t opsquare[] = {\n";
 84 | 	for(int i  = 0; i < 256; i++)
 85 | 		onf << (int)(opsquare[i]) << ",";
 86 | 	onf << "}; " << std::endl;
 87 | 	onf << "int8_t opexp2[] = {\n";
 88 | 	for(int i  = 0; i < 256; i++)
 89 | 		onf << (int)(opexp2[i]) << ",";
 90 | 	onf << "}; " << std::endl;
 91 | 
 92 | 	onf << "int8_t ophalf[] = {\n";
 93 | 	for(int i  = 0; i < 256; i++)
 94 | 		onf << (int)(ophalf[i]) << ",";
 95 | 	onf << "}; " << std::endl;
 96 | 
 97 | 	onf << "uint32_t op2float[] = {\n";
 98 | 	for(int i  = 0; i < 256; i++)
 99 | 		onf << (op2float[i]) << ",";
100 | 	onf << "};} " << std::endl;
101 | 	return  0;
102 | }
103 | 
104 | 


--------------------------------------------------------------------------------
/src/floatTop8.cpp:
--------------------------------------------------------------------------------
 1 | #include "mex.h"   
 2 | //#include "zposit8.hpp"
 3 | #include "posit.h"
 4 | #include <stdint.h>
 5 | using zposit_type = Posit<int8_t,8,0,uint16_t,true>;
 6 | 
 7 | 
 8 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
 9 | {
10 |     if(nrhs != 1 && nlhs != 1)
11 |     {
12 |         mexPrintf("floatTozp8(input) -> output\n");
13 |         return;
14 |     }
15 |     bool complex = mxIsComplex(prhs[0]);
16 | 	const auto ndims = mxGetNumberOfDimensions(prhs[0]);
17 | 	const mwSize * dimi = mxGetDimensions(prhs[0]);
18 | 	plhs[0] = mxCreateUninitNumericArray(ndims,(mwSize*)dimi,mxINT8_CLASS,!complex ? mxREAL: mxCOMPLEX);
19 |     zposit_type * dst = (zposit_type*)mxGetData(plhs[0]);
20 |     int n = mxGetNumberOfElements(prhs[0])*(complex?2:1);
21 |     switch(mxGetClassID(prhs[0]))
22 |     {
23 |     	case mxDOUBLE_CLASS: 
24 |         	// double precision
25 |             {
26 |                 double * src = (double*)mxGetData(prhs[0]);
27 |                 for(int i = 0;i < n; i++)
28 |                 {
29 |                   dst[i] = zposit_type(src[i]);
30 |                 }
31 |             }
32 |     		break;
33 |         case mxSINGLE_CLASS: 
34 |             {
35 |                 float * src = (float*)mxGetData(prhs[0]);
36 |                 for(int i = 0;i < n; i++)
37 |                 {
38 |                   dst[i] = zposit_type(src[i]);
39 |                 }
40 |             }
41 |     		break;
42 |         case mxINT16_CLASS: 
43 |         case mxUINT16_CLASS:
44 |             {
45 |                 halffloat * src = (halffloat*)mxGetData(prhs[0]);
46 |                 for(int i = 0;i < n; i++)
47 |                 {
48 |                   dst[i] = zposit_type(typename zposit_type::UnpackedT(halffloat(src[i]))); // wrap cast to unpacked then to posit
49 |                 }
50 |             }
51 |     		break;
52 |         default:
53 |             mexErrMsgTxt("Unsupported type of output: only double, single or 16-bit has half float\n");
54 |             break;
55 |     }
56 | 
57 | }


--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Emanuele Ruffaldi (C) 2017
  3 |  * Templated C++ Posit
  4 |  */
  5 |  #include "posit.h"
  6 | 
  7 | using X=Posit<int32_t,32,0,uint64_t,PositSpec::WithNan> ; // es, total
  8 | #include <iostream>
  9 | 
 10 | template <class T,class W>
 11 | void checkfloat(T f)
 12 | {
 13 | union {
 14 |    T f;
 15 |    W i;
 16 | } tmp;
 17 |    tmp.f = f;
 18 |     W x[1] = {tmp.i};
 19 |     std::cout << "--\n";
 20 |     X::UnpackedT fu(f);
 21 |     std::cout << "f:float   " << f << std::endl;
 22 |     std::cout << "f:float hex " << std::hex << x[0] << std::dec << std::endl;
 23 |     std::cout << "fu:unpacked" << fu << std::endl;
 24 |     T fuf = (float)fu;
 25 |    tmp.f = fu;
 26 |     x[0] = {tmp.i};
 27 |     std::cout << "fuf: float " << fuf << std::endl;
 28 |     std::cout << "fuf:float hex " << std::hex << x[0] << std::dec << std::endl;
 29 |     auto fup = X(fu);
 30 |     std::cout << "fup is " << std::hex << fup << std::endl;
 31 |     //X q;
 32 |     //q.setBits(0x10);
 33 |     //up = q.v;
 34 |     X::UnpackedT fupu(fup.unpack());
 35 |     std::cout << "fupu   " << fupu << std::endl;
 36 |     T fupuf = fupu.pack_float<T>();
 37 |     std::cout << "fupuf  " << fupuf  << std::endl;
 38 | 
 39 |     if(f != fuf)
 40 |     {
 41 |         std::cout << "ERROR fuf vs f\n";
 42 |     }
 43 | 
 44 |     if(fupu != fu)
 45 |     {
 46 |         std::cout << "ERROR fupu != fu\n";
 47 |     }
 48 |     else if(fupuf != f)
 49 |     {
 50 |         std::cout << "ERROR fupuf vs fup\n";
 51 |     }
 52 | }
 53 | 
 54 | template <class T,class W>
 55 | void checkup(X::UnpackedT u)
 56 | {
 57 |     W x[1];
 58 |     std::cout << "--\n";
 59 |     std::cout << "u:unpacked" << u << std::endl;
 60 |     T uf = u.pack_float<T>();
 61 | union {
 62 |    T f;
 63 |    W i;
 64 | } tmp;
 65 |    tmp.f = uf;
 66 |     x[0] = {tmp.i};
 67 |     std::cout << "uf: float " << uf << std::endl;
 68 |     std::cout << "uf:float hex " << std::hex << x[0] << std::dec << std::endl;
 69 |     X::UnpackedT ufu(uf);
 70 |     std::cout << "ufu " << ufu << std::endl;
 71 | 
 72 |     auto up = X(u);
 73 |     std::cout << "fup is " << std::hex << up << std::endl;
 74 |     //X q;
 75 |     //q.setBits(0x10);
 76 |     //up = q.v;
 77 |     X::UnpackedT upu(up.unpack());
 78 |     std::cout << "fupu   " << upu << std::endl;
 79 |     T upuf = upu.pack_float<T>();
 80 |     std::cout << "fupuf  " << upuf  << std::endl;
 81 | 
 82 |     if(ufu != u)
 83 |     {
 84 |         std::cout << "ERROR ufu != u\n";
 85 |     }
 86 | 
 87 | }
 88 | 
 89 | int main(int argc, char const *argv[]) {
 90 |     std::cout << "N " << X::PT::POSIT_SIZE << " " << X::PT::POSIT_ESP_SIZE << std::endl;
 91 |     std::cout << "Configuration " << std::hex << " MSB = " << X::PT::POSIT_MSB << " Mask=" << X::PT::POSIT_MASK
 92 |               << " Sign=" << X::PT::POSIT_SIGNBIT << " one=" << X::PT::POSIT_ONE << " inf=" << X::PT::POSIT_PINF
 93 |               << " half=" << (double)X(X::PT::POSIT_HALF) << " twice=" <<  (double)X(X::PT::POSIT_TWO) 
 94 |               << " min=" << (double)X(X::PT::POSIT_MINNEG) << " max=" << X(X::PT::POSIT_MAXPOS) 
 95 |               << " maxexponent " << X::PT::maxexponent() << std::dec << std::endl;
 96 |     if(false)
 97 |     {
 98 |             float values[] = {1, 2, INFINITY, NAN, 0, 4.0, 10.0, -1.0, -2.0, -4.0, -10.0, 1 / 2.0, 1 / 4.0};
 99 | 
100 |             using TQ = double;
101 |             using TI = uint64_t;
102 |             // TODO: bugs with fractional 1/2 and 1/4
103 |             for (unsigned int i = 0; i < sizeof(values) / sizeof(values[0]); i++) {
104 |                 checkfloat<TQ, TI>(values[i]);
105 |             }
106 |             std::cout << "Zero is " << X::zero().v << " unpacked " << X::zero().unpack() << std::endl;
107 |             X x(2.0);
108 |             std::cout << "Inverse " << x << " " << x.inv() << " " << X(0.5) << std::endl;
109 |             //static_assert(X(2.0).inv() == X(0.5),"test"); // PROBLEM DUE to UNION
110 |         std::cout << "---------\n";
111 |         std::cout << "TESTING UNPACKED\n";
112 |         // now check the unpack
113 |         checkup<TQ,TI>(X::UnpackedT(40,0,true));
114 |         checkup<TQ,TI>(X::UnpackedT(X::PT::maxexponent(),0,true));
115 |         checkup<TQ,TI>(X::UnpackedT(-X::PT::maxexponent(),0,true));
116 |         checkup<TQ,TI>(X::UnpackedT(X::PT::maxexponent(),0,false));
117 |         checkup<TQ,TI>(X::UnpackedT(-X::PT::maxexponent(),0,false));
118 |         }
119 | 
120 |     X::UnpackedT yu(0.3f);
121 |     std::cout << "YUff " << (float)yu << std::endl;
122 |     std::cout << "YUfd " << (double)yu << std::endl;
123 | 
124 |     X::UnpackedT yud(0.3);
125 |     std::cout << "YUdf " << (float)yud << std::endl;
126 |     std::cout << "YUdd " << (double)yud << std::endl;
127 |     X y(0.3);
128 |     std::cout << "One " << (float)X::one() << std::endl;
129 |     std::cout << "Is in normalized range: " << (float)X::one() << " " << X::one().isUnitRange() << std::endl;
130 |     std::cout << "Is in normalized range: " << (float)y << " " << y.isUnitRange() << std::endl;
131 |     std::cout << "Is !negative " << y.isnegative() << std::endl;
132 |     std::cout << "One Minus " << (float)y << "(hex " << std::hex << y.v << ") " << (float)(X::one()-y) << "(hex " << (X::one()-y).v <<  ") and fast " << (float)(y.urOneMinus()) << " (hex " << (y.urOneMinus()).v  <<  ")"<< std::endl;
133 | 
134 |     /*
135 |     int bits[3];
136 |     int rs,es;
137 |     uint64_t fs;
138 |     std::cout << "y unpacked is " << y.unpack() << std::endl;
139 |     y.analy(bits[0],bits[1],bits[2],rs,es,fs);
140 |     std::cout << "analyzing " << std::hex << y.v << " bits:" << bits[0] << " " << bits[1] << " " << bits[2] << " fields (rs,es,fs) " << rs << " " << es << " "  << fs << std::endl;
141 |     */
142 | 	return 0;
143 | }
144 | 


--------------------------------------------------------------------------------
/src/maineigen.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Emanuele Ruffaldi (C) 2017
 3 |  * Templated C++ Posit
 4 |  */
 5 |  #include "positeigen.h"
 6 | 
 7 | // https://github.com/libigl/eigen/blob/master/unsupported/Eigen/MPRealSupport
 8 | int main(int argc, char const *argv[])
 9 | {
10 | 	// TODO VERIFY MORE
11 | 	using P=Posit<int64_t,64,1,uint64_t,true>;
12 | 	using UT=typename P::UnpackedT;
13 |     using MatrixP=Eigen::Matrix<P,Eigen::Dynamic,Eigen::Dynamic>  ;
14 |   	using VectorP=Eigen::Matrix<P,Eigen::Dynamic,1>        ;
15 | 
16 |   	MatrixP A = Eigen::MatrixXf::Random(10,10).cast<P>();
17 |   	MatrixP B = MatrixP::Ones(1,1);
18 |   	MatrixP C = MatrixP::Ones(1,1);
19 | 
20 | //  	std::cout << "Resulting A is " << (B.cwiseProduct(A)).cast<float>() << std::endl;
21 |   	//std::cout << "Resulting B+C is " << (B+C).cast<float>() << std::endl;
22 |   	//P A = MatrixP::Reandom(10,10);
23 | 	Eigen::VectorXd af(4),bf(4);
24 | 	af<< 3.2e8, 1, -1, 8.0e7;
25 | 	bf << 4.0e7, 1, -1, -1.6e8;
26 | 	//af<< 3.2e8, 8.0e7,1,-1;
27 | 	//bf << 4.0e7,  -1.6e8,1,-1;
28 | 	VectorP ap,bp;
29 | 	ap= af.cast<P>();
30 | 	bp= bf.cast<P>();
31 | 	std::cout << "3.2e8 ==> unpacked " << UT(af(0)) << std::endl;
32 | 	std::cout << "3.2e8 ==> unpacked ==> double " << (double)P(UT(af(0))) << std::endl;
33 | 	std::cout << "af0 is: " << ap(0).v << " " << ap(0) << std::endl;
34 | 	std::cout << "af1 is: " << ap(1).v << " " << ap(1) << std::endl;
35 | 	VectorP abp = ap.cwiseProduct(bp);
36 | 
37 | 	std::cout << "af " << af.transpose() << std::endl;
38 | 	//std::cout << "ap " << ap.transpose() << std::endl;
39 | 	std::cout << "ap " << ap.cast<double>().transpose() << std::endl;
40 | 	std::cout << "----"<< std::endl;
41 | 	std::cout << "bf " << bf.transpose() << std::endl;
42 | 	//std::cout << "bp " << bp.transpose() << std::endl;
43 | 	std::cout << "bp " << bp.cast<double>().transpose() << std::endl;
44 | 	
45 | 	std::cout << "----"<< std::endl;
46 | 	std::cout << "af prod bf " << af.cwiseProduct(bf).transpose() << std::endl;
47 | 	std::cout << "af dot bf: " << af.dot(bf) << std::endl;
48 | 	std::cout << "----"<< std::endl;
49 | 
50 | 	std::cout << "ap prod bp (double)" << abp.cast<double>().transpose() << std::endl;
51 | 	std::cout << "one is " << std::hex << P::PT::POSIT_ONE << std::endl;
52 | 	std::cout << "ap prod bp (posit)" << posit_formatter<P>(abp[0]) << " "<< posit_formatter<P>(abp[1]) << " " << posit_formatter<P>(abp[2])  << " " << posit_formatter<P>(abp[3]) << std::endl;
53 | 	//std::cout << "ap prod bp " << abp.cast<P::UnpackedT>().transpose() << std::endl;
54 | 	std::cout << "sum(ap .* bp) " << abp.sum() << " " << abp.sum().unpack() << " " << (double)abp.sum() << std::endl;
55 | 	std::cout << "ap dot bp: " << ap.dot(bp).unpack() << " " << (double)(ap.dot(bp)) << std::endl; // SHOULD PRODUCE 2 and not ZERO
56 | 
57 | 	return 0;
58 | }


--------------------------------------------------------------------------------
/src/posit10.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * ZPosit8 library
  3 |  *
  4 |  * Emanuele Ruffaldi 2017
  5 |  */
  6 | #include "posit10.hpp"
  7 | 
  8 | 
  9 | std::ostream & operator << (std::ostream & ons, const posit10 & p)
 10 | {
 11 | 	ons << "posit(" << p.uu() << ")" ;
 12 | 	return ons;
 13 | }
 14 | posit10::posit10(int a)
 15 | {
 16 | 	if(a == 0)
 17 | 		v = 0;
 18 | 	else if(a == 1)
 19 | 		v = 0x100;
 20 | 	else 
 21 | 		v = FPT(a).v;
 22 | }
 23 | 
 24 | posit10::posit10(float a)
 25 | {
 26 | 	if(a == 0)
 27 | 		v = 0;
 28 | 	else if(a == 1)
 29 | 		v = 0x100;
 30 | 	else 
 31 | 		v = FPT(a).v;
 32 | }
 33 | 
 34 | posit10::posit10(double a)
 35 | {
 36 | 	if(a == 0)
 37 | 		v = 0;
 38 | 	else if(a == 1)
 39 | 		v = 0x100;
 40 | 	else 
 41 | 		v = FPT(a).v;
 42 | 
 43 | }
 44 | 
 45 | /**
 46 |  Positive NUmber families
 47 | 
 48 |  00000000
 49 |  ..
 50 |  00001100 = .1875
 51 |  ..
 52 |  00010000 = .25
 53 |  ..
 54 |  00011000 = 0.375
 55 |  ..
 56 |  00100001 = 0.5
 57 |  ..
 58 |  00110000 = 0.75
 59 |  ..
 60 |  01000000 = 1
 61 |  ..
 62 |  01010000 = 1.5
 63 |  ..
 64 |  01100000 = 2
 65 |  ...
 66 |  01110000 = 4
 67 |  ...
 68 |  01111111 = max
 69 |  10000000 = infinity
 70 |  */
 71 | 
 72 | 	/*
 73 | posit10 posit10::half() const
 74 | {
 75 | 	int8_t aa = v < 0 ? -v : v;
 76 | 	if(has_neg_exponent()) // [0..1)
 77 | 		aa = aa >> 1; // down to 0
 78 | 	else if aa < 2 // [1..2)
 79 | 		aa = ((aa & 0x3F)|0x20);
 80 | 	else
 81 | 		aa = ((aa << 1) & 0x7F) | 0x40;  // down to 01000000 == 0x4  
 82 | 	return v < 0 ? -aa: aa;
 83 | 	*/
 84 | 
 85 | 
 86 | posit10 posit10::twice() const
 87 | {
 88 | 	/*
 89 | 	int8_t aa = v < 0 ? -v : v;
 90 | 	if(has_neg_exponent()) // [0..1)
 91 | 		aa = (aa << 1) & 0x3F; // up to [1..] 
 92 | 	else // [2...]
 93 | 		aa = (aa >> 1) | 0x40; // up to 011111111 without overflow
 94 | 	return v < 0 ? -aa: aa;
 95 | 	*/
 96 | 	return (posit10)(as_posit()*(FPT)2);
 97 | }
 98 | 
 99 | 
100 | 
101 | 


--------------------------------------------------------------------------------
/src/posit10_gen.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Emanuele Ruffaldi (C) 2017
  3 |  * Templated C++ Posit
  4 |  */
  5 | #include <typeinfo>
  6 | #include <fstream>
  7 | #include <iostream>
  8 | #include "posit.h"
  9 | #include "posit10.hpp"
 10 | 
 11 | #define SIGNEX(v, sb) ((v) | (((v) & (1 << (sb))) ? ~((1 << (sb))-1) : 0))
 12 | 
 13 | int16_t opinv[1024],opsquare[1024],opexp2[1024],ophalf[1024];
 14 | uint32_t op2float[1024];
 15 | int16_t opadd[1024*1024],opmul[1024*1024],opdiv[1024*1024];
 16 | 
 17 | 
 18 | int main(int argc, char const *argv[])
 19 | {
 20 | 	// match the posit16 from posit16.hpp
 21 | 	using X=Posit<typename posit10::PT::POSIT_STYPE,10,posit10::PT::POSIT_ESP_SIZE,typename posit10::FT,posit10::PT::positspec>;
 22 | 	union Q {
 23 | 		float f;
 24 | 		uint32_t i;
 25 | 	} ;
 26 | 	for(int16_t s = -512 ; s < 512; s++)
 27 | 	{
 28 | 		X x;
 29 | 		x.v = s;
 30 | 		int32_t i = ((unsigned int)(uint16_t)s) & 0x03FF;
 31 | 		float fx(x);
 32 | 		Q fxq;
 33 |         fxq.f = fx;
 34 |         opinv[i] = x.inv().v;
 35 |         opsquare[i] = X(fx*fx).v;
 36 |         op2float[i] = fxq.i; 
 37 |         ophalf[i] = X(fx/2).v;
 38 |         opexp2[i] = X(exp2(fx)).v;
 39 |     }
 40 | 
 41 | 	for(int16_t s1 = -512 ; s1 < 512; s1++)
 42 | 	{
 43 | 		X x1;
 44 | 		x1.v = s1;
 45 | 		int32_t i1 = ((unsigned int)(uint16_t)s1) & 0x03FF;
 46 | 		float f1 = uint32_to_float(op2float[i1]); 
 47 | 
 48 | 		for(int16_t s2 = -512 ; s2 < 512; s2++)
 49 | 		{
 50 | 			X x2;
 51 | 			x2.v = s2;
 52 | 			int32_t i2 = ((unsigned int)(uint16_t)s2) & 0x03FF;
 53 | 			float f2 = uint32_to_float(op2float[i2]); 
 54 | 
 55 | 	        opadd[i1*1024+i2] = X(f1+f2).v;
 56 | 	        opmul[i1*1024+i2] = X(f1*f2).v;
 57 | 	        opdiv[i1*1024+i2] = X(f1/f2).v;
 58 | 	        //opadd[i*1024+j] = (x*y).v;
 59 | 		}
 60 | 		
 61 | 	}
 62 | 
 63 | 	std::ofstream onf(argc == 1 ? "posit10_tbl.cpp": argv[1]);
 64 | 	onf << "#include <stdint.h>\nnamespace posit10ns {\n";
 65 | 	onf << "// type is " << typeid(X).name() << std::endl;
 66 | 	onf << "int16_t opmul[] = {\n";
 67 | 	for(int i  = 0; i < 1024*1024; i++)
 68 | 		onf << (int)(opmul[i]) << ",";
 69 | 	onf << "}; " << std::endl;
 70 | 	onf << "int16_t opadd[] = {\n";
 71 | 	for(int i  = 0; i < 1024*1024; i++)
 72 | 		onf << (int)(opadd[i]) << ",";
 73 | 	onf << "}; " << std::endl;
 74 | 	onf << "int16_t opdiv[] = {\n";
 75 | 	for(int i  = 0; i < 1024*1024; i++)
 76 | 		onf << (int)(opdiv[i]) << ",";
 77 | 	onf << "}; " << std::endl;
 78 | 	onf << "int16_t opinv[] = {\n";
 79 | 	for(int i  = 0; i < 1024; i++)
 80 | 		onf << (int)(opinv[i]) << ",";
 81 | 	onf << "}; " << std::endl;
 82 | 
 83 | 	onf << "int16_t ophalf[] = {\n";
 84 | 	for(int i  = 0; i < 1024; i++)
 85 | 		onf << (int)(ophalf[i]) << ",";
 86 | 	onf << "}; " << std::endl;
 87 | 
 88 | 	onf << "int16_t opsquare[] = {\n";
 89 | 	for(int i  = 0; i < 1024; i++)
 90 | 		onf << (int)(opsquare[i]) << ",";
 91 | 	onf << "}; " << std::endl;
 92 | 	onf << "int16_t opexp2[] = {\n";
 93 | 	for(int i  = 0; i < 1024; i++)
 94 | 		onf << (int)(opexp2[i]) << ",";
 95 | 	onf << "}; " << std::endl;
 96 | 	onf << "uint32_t op2float[] = {\n";
 97 | 	for(int i  = 0; i < 1024; i++)
 98 | 		onf << (op2float[i]) << ",";
 99 | 	onf << "};} " << std::endl;
100 | 	return  0;
101 | }
102 | 


--------------------------------------------------------------------------------
/src/posit12.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * ZPosit8 library
 3 |  *
 4 |  * Emanuele Ruffaldi 2017
 5 |  */
 6 | #include "posit12.hpp"
 7 | 
 8 | 
 9 | std::ostream & operator << (std::ostream & ons, const posit12 & p)
10 | {
11 | 	ons << "posit(" << p.uu() << ")" ;
12 | 	return ons;
13 | }
14 | posit12::posit12(int a): v((FPT(a)).v)
15 | {
16 | }
17 | 
18 | posit12::posit12(float a): v((FPT(a)).v)
19 | {
20 | }
21 | 
22 | posit12::posit12(double a) : v((FPT(a)).v)
23 | {
24 | }
25 | 
26 | /**
27 |  Positive NUmber families
28 | 
29 |  00000000
30 |  ..
31 |  00001100 = .1875
32 |  ..
33 |  00010000 = .25
34 |  ..
35 |  00011000 = 0.375
36 |  ..
37 |  00100001 = 0.5
38 |  ..
39 |  00110000 = 0.75
40 |  ..
41 |  01000000 = 1
42 |  ..
43 |  01010000 = 1.5
44 |  ..
45 |  01100000 = 2
46 |  ...
47 |  01110000 = 4
48 |  ...
49 |  01111111 = max
50 |  10000000 = infinity
51 |  */
52 | 
53 | posit12 posit12::half() const
54 | {
55 | 	/*
56 | 	int8_t aa = v < 0 ? -v : v;
57 | 	if(has_neg_exponent()) // [0..1)
58 | 		aa = aa >> 1; // down to 0
59 | 	else if aa < 2 // [1..2)
60 | 		aa = ((aa & 0x3F)|0x20);
61 | 	else
62 | 		aa = ((aa << 1) & 0x7F) | 0x40;  // down to 01000000 == 0x4  
63 | 	return v < 0 ? -aa: aa;
64 | 	*/
65 | 	return (posit12)(as_posit()/(FPT)2);
66 | }
67 | 
68 | 
69 | posit12 posit12::twice() const
70 | {
71 | 	/*
72 | 	int8_t aa = v < 0 ? -v : v;
73 | 	if(has_neg_exponent()) // [0..1)
74 | 		aa = (aa << 1) & 0x3F; // up to [1..] 
75 | 	else // [2...]
76 | 		aa = (aa >> 1) | 0x40; // up to 011111111 without overflow
77 | 	return v < 0 ? -aa: aa;
78 | 	*/
79 | 	return (posit12)(as_posit()*(FPT)2);
80 | }
81 | 


--------------------------------------------------------------------------------
/src/posit12_gen.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Emanuele Ruffaldi (C) 2017
 3 |  * Templated C++ Posit
 4 |  */
 5 | #include <typeinfo>
 6 | #include <fstream>
 7 | #include <iostream>
 8 | #include "posit.h"
 9 | #include "posit12.hpp"
10 | 
11 | #define SIGNEX(v, sb) ((v) | (((v) & (1 << (sb))) ? ~((1 << (sb))-1) : 0))
12 | 
13 | int16_t opinv[4096],opsquare[4096],opexp2[4096];
14 | uint32_t op2float[4096];
15 | 
16 | int main(int argc, char const *argv[])
17 | {
18 | 	// match the posit16 from posit16.hpp
19 | 	using X=Posit<typename posit12::PT::POSIT_STYPE,12,posit12::PT::POSIT_ESP_SIZE,typename posit12::FT,posit12::PT::positspec>;
20 | 	union Q {
21 | 		float f;
22 | 		uint32_t i;
23 | 	} ;
24 | 	for(int16_t s = -2048 ; s < 2048; s++)
25 | 	{
26 | 		X x;
27 | 		x.v = s;
28 | 		int32_t i = ((unsigned int)(uint16_t)s) & 0x0FFF;
29 |         float fx = (float)x;
30 |         //std::cout << s << " " << i << " " << fx << std::endl;
31 |         Q fxq;
32 |         fxq.f = fx;
33 |         opinv[i] = x.inv().v;
34 |         opsquare[i] = X(fx*fx).v;
35 |         op2float[i] = fxq.i; 
36 |         opexp2[i] = X(exp2(fx)).v;
37 |         //std::cout << "twice " << (X)(fx*2) << " half " << (X)(fx/2) << std::endl;
38 |         /*
39 | 		for(int j = 0; j < 4096; j++)
40 | 		{
41 | 	        X y(SIGNEX(j,X::vtotalbits-1));
42 | 	        float fy = (float)y;
43 | 	        opadd[i*4096+j] = X(fx+fy).v;
44 | 	        opmul[i*4096+j] = X(fx*fy).v;
45 | 	        opdiv[i*4096+j] = X(fx/fy).v;
46 | 	        //opadd[i*4096+j] = (x*y).v;
47 | 		}
48 | 		*/
49 | 	}
50 | 
51 | 	std::ofstream onf(argc == 1 ? "posit12_tbl.cpp": argv[1]);
52 | 	onf << "#include <stdint.h>\nnamespace posit12ns {\n";
53 | 	onf << "// type is " << typeid(X).name() << std::endl;
54 | 	onf << "int16_t opinv[] = {\n";
55 | 	for(int i  = 0; i < 4096; i++)
56 | 		onf << (int)(opinv[i]) << ",";
57 | 	onf << "}; " << std::endl;
58 | 	onf << "int16_t opsquare[] = {\n";
59 | 	for(int i  = 0; i < 4096; i++)
60 | 		onf << (int)(opsquare[i]) << ",";
61 | 	onf << "}; " << std::endl;
62 | 	onf << "int16_t opexp2[] = {\n";
63 | 	for(int i  = 0; i < 4096; i++)
64 | 		onf << (int)(opexp2[i]) << ",";
65 | 	onf << "}; " << std::endl;
66 | 	onf << "uint32_t op2float[] = {\n";
67 | 	for(int i  = 0; i < 4096; i++)
68 | 		onf << (op2float[i]) << ",";
69 | 	onf << "};} " << std::endl;
70 | 	return  0;
71 | }
72 | 


--------------------------------------------------------------------------------
/src/posit8.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * ZPosit8 library
  3 |  *
  4 |  * Emanuele Ruffaldi 2017
  5 |  */
  6 | #include "posit8.hpp"
  7 | 
  8 | 
  9 | std::ostream & operator << (std::ostream & ons, const posit8 & p)
 10 | {
 11 | 	ons << "posit(" << p.uu() << ")" ;
 12 | 	return ons;
 13 | }
 14 | posit8::posit8(int a)
 15 | {
 16 | 	if(a == 0)
 17 | 		v = 0;
 18 | 	else if(a == 1)
 19 | 		v = 64;
 20 | 	else 
 21 | 		v = FPT(a).v;
 22 | 
 23 | }
 24 | 
 25 | posit8::posit8(float a)
 26 | {
 27 | 		if(a == 0)
 28 | 		v = 0;
 29 | 	else if(a == 1)
 30 | 		v = 64;
 31 | 	else 
 32 | 		v = FPT(a).v;
 33 | 
 34 | }
 35 | 
 36 | posit8::posit8(double a) 
 37 | {
 38 | 	if(a == 0)
 39 | 		v = 0;
 40 | 	else if(a == 1)
 41 | 		v = 64;
 42 | 	else 
 43 | 		v = FPT(a).v;
 44 | 
 45 | }
 46 | 
 47 | 
 48 | /**
 49 |  Positive NUmber families
 50 | 
 51 |  00000000
 52 |  ..
 53 |  00001100 = .1875
 54 |  ..
 55 |  00010000 = .25
 56 |  ..
 57 |  00011000 = 0.375
 58 |  ..
 59 |  00100001 = 0.5
 60 |  ..
 61 |  00110000 = 0.75
 62 |  ..
 63 |  01000000 = 1
 64 |  ..
 65 |  01010000 = 1.5
 66 |  ..
 67 |  01100000 = 2
 68 |  ...
 69 |  01110000 = 4
 70 |  ...
 71 |  01111111 = max
 72 |  10000000 = infinity
 73 |  */
 74 | 	/*
 75 | 	int8_t aa = v < 0 ? -v : v;
 76 | 	if(has_neg_exponent()) // [0..1)
 77 | 		aa = aa >> 1; // down to 0
 78 | 	else if aa < 2 // [1..2)
 79 | 		aa = ((aa & 0x3F)|0x20);
 80 | 	else
 81 | 		aa = ((aa << 1) & 0x7F) | 0x40;  // down to 01000000 == 0x4  
 82 | 	return v < 0 ? -aa: aa;
 83 | 	*/
 84 | 
 85 | 
 86 | 
 87 | posit8 posit8::twice() const
 88 | {
 89 | 	/*
 90 | 	int8_t aa = v < 0 ? -v : v;
 91 | 	if(has_neg_exponent()) // [0..1)
 92 | 		aa = (aa << 1) & 0x3F; // up to [1..] 
 93 | 	else // [2...]
 94 | 		aa = (aa >> 1) | 0x40; // up to 011111111 without overflow
 95 | 	return v < 0 ? -aa: aa;
 96 | 	*/
 97 | 	return (posit8)(as_posit()*(FPT)2);
 98 | }
 99 | 
100 | 
101 | 


--------------------------------------------------------------------------------
/src/posit8_gen.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Emanuele Ruffaldi (C) 2017
  3 |  * Templated C++ Posit
  4 |  */
  5 | #include <typeinfo>
  6 | #include <fstream>
  7 | #include <iostream>
  8 | #include "posit.h"
  9 | #include "posit8.hpp"
 10 | 
 11 | #define SIGNEX(v, sb) ((v) | (((v) & (1 << (sb))) ? ~((1 << (sb))-1) : 0))
 12 | 
 13 | int main(int argc, char const *argv[])
 14 | {
 15 | 	// match the posit8 from posit8.hpp
 16 | 	using X=Posit<typename posit8::PT::POSIT_STYPE,8,posit8::PT::POSIT_ESP_SIZE,typename posit8::FT,posit8::PT::positspec>;
 17 | 	union Q {
 18 | 		float f;
 19 | 		uint32_t i;
 20 | 	} ;
 21 | 	int8_t opadd[256*256],opmul[256*256],opdiv[256*256],opinv[256],opsquare[256],opexp2[256],ophalf[256];
 22 | 	uint32_t op2float[256];
 23 | 
 24 | 	for(int16_t s = -128 ; s < 128; s++)
 25 | 	{
 26 | 		X x;
 27 | 		x.v = s;
 28 | 		int32_t i = ((unsigned int)(uint16_t)s) & 0xFF;
 29 | 		float fx(x);
 30 | 		Q fxq;
 31 |         fxq.f = fx;
 32 | 
 33 |         opinv[i] = x.inv().v;
 34 |         opsquare[i] = X(fx*fx).v;
 35 |         op2float[i] = fxq.i; 
 36 |         ophalf[i] = X(fx/2).v;
 37 |         opexp2[i] = X(exp2(fx)).v;
 38 |         //std::cout << "twice " << (X)(fx*2) << " half " << (X)(fx/2) << std::endl;
 39 |     }
 40 | 
 41 | 	for(int16_t s1 = -128 ; s1 < 128; s1++)
 42 | 	{
 43 | 		X x1;
 44 | 		x1.v = s1;
 45 | 		int32_t i1 = ((unsigned int)(uint8_t)s1) & 0xFF;
 46 | 		float f1 = uint32_to_float(op2float[i1]); 
 47 | 
 48 | 		for(int16_t s2 = -128 ; s2 < 128; s2++)
 49 | 		{
 50 | 			X x2;
 51 | 			x2.v = s2;
 52 | 			int32_t i2 = ((unsigned int)(uint8_t)s2) & 0xFF;
 53 | 			float f2 = uint32_to_float(op2float[i2]); 
 54 | 
 55 | 	        opadd[i1*256+i2] = X(f1+f2).v;
 56 | 	        opmul[i1*256+i2] = X(f1*f2).v;
 57 | 	        opdiv[i1*256+i2] = X(f1/f2).v;
 58 | 	        //opadd[i*256+j] = (x*y).v;
 59 | 		}
 60 | 	}
 61 | 
 62 | 	std::ofstream onf(argc == 1 ? "posit8_tbl.cpp" : argv[1]);
 63 | 	onf << "#include <stdint.h>\nnamespace posit8ns {\n";
 64 | 	onf << "// type is " << typeid(X).name() << std::endl;
 65 | 	onf << "int8_t opadd[] = {\n";
 66 | 	for(int i  = 0; i < 256*256; i++)
 67 | 		onf << (int)(opadd[i]) << ",";
 68 | 	onf << "}; " << std::endl;
 69 | 	// emit the numbers as C file
 70 | 	onf << "int8_t opmul[] = {\n";
 71 | 	for(int i  = 0; i < 256*256; i++)
 72 | 		onf << (int)(opmul[i]) << ",";
 73 | 	onf << "}; " << std::endl;
 74 | 	onf << "int8_t opdiv[] = {\n";
 75 | 	for(int i  = 0; i < 256*256; i++)
 76 | 		onf << (int)(opdiv[i]) << ",";
 77 | 	onf << "}; " << std::endl;
 78 | 	onf << "int8_t opinv[] = {\n";
 79 | 	for(int i  = 0; i < 256; i++)
 80 | 		onf << (int)(opinv[i]) << ",";
 81 | 	onf << "}; " << std::endl;
 82 | 	onf << "int8_t opsquare[] = {\n";
 83 | 	for(int i  = 0; i < 256; i++)
 84 | 		onf << (int)(opsquare[i]) << ",";
 85 | 	onf << "}; " << std::endl;
 86 | 	onf << "int8_t opexp2[] = {\n";
 87 | 	for(int i  = 0; i < 256; i++)
 88 | 		onf << (int)(opexp2[i]) << ",";
 89 | 	onf << "}; " << std::endl;
 90 | 
 91 | 	onf << "int8_t ophalf[] = {\n";
 92 | 	for(int i  = 0; i < 256; i++)
 93 | 		onf << (int)(ophalf[i]) << ",";
 94 | 	onf << "}; " << std::endl;
 95 | 
 96 | 	onf << "uint32_t op2float[] = {\n";
 97 | 	for(int i  = 0; i < 256; i++)
 98 | 		onf << (op2float[i]) << ",";
 99 | 	onf << "};} " << std::endl;
100 | 	return  0;
101 | }
102 | 


--------------------------------------------------------------------------------
/src/simdposit8.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * ZPosit8 library
 3 |  *
 4 |  * Emanuele Ruffaldi 2017
 5 |  */
 6 | #include "simdposit8.hpp"
 7 | 
 8 | 
 9 | 
10 | 
11 | std::ostream & operator << (std::ostream & ons, const simd_posit8_32 & p)
12 | {
13 | 	ons << (std::array<float,32>)p;
14 | 	return ons;
15 | }
16 | simd_posit8_32::simd_posit8_32(int a)
17 | {
18 | 	auto q = FPT(a).v;
19 | 	std::cout << "initing from integer " << a << " back as " << (float)(FPT(FPT::DeepInit(),q)) << " as dec " << std::dec << (int)q << std::dec <<std::endl;
20 | 	v = _mm256_set1_epi8(q);
21 | }
22 | 
23 | simd_posit8_32::simd_posit8_32(float a)
24 | {
25 | 	auto q = FPT(a).v;
26 | 	std::cout << "initing from float " << a << " back as  " << (float)(FPT(FPT::DeepInit(),q)) << " as dec " << std::dec << (int)q << std::dec <<std::endl;
27 | 	v = _mm256_set1_epi8(q);
28 | }
29 | /**
30 |  Positive NUmber families
31 | 
32 |  00000000
33 |  ..
34 |  00001100 = .1875
35 |  ..
36 |  00010000 = .25
37 |  ..
38 |  00011000 = 0.375
39 |  ..
40 |  00100001 = 0.5
41 |  ..
42 |  00110000 = 0.75
43 |  ..
44 |  01000000 = 1
45 |  ..
46 |  01010000 = 1.5
47 |  ..
48 |  01100000 = 2
49 |  ...
50 |  01110000 = 4
51 |  ...
52 |  01111111 = max
53 |  10000000 = infinity
54 |  */
55 | 
56 | simd_posit8_32::operator std::array<float,32>() const
57 | {
58 | 	std::array<float,32> r;
59 | #if 0
60 | 	union Q {
61 | 		float f;
62 | 		uint32_t i;
63 | 	};
64 | 	alignas(__m256i) int8_t idata[32]; // uint8 if using table
65 | 	_mm256_store_si256((__m256i*)idata,v);
66 | 	uint32_t * pp = (uint32_t*)&r[0];
67 | 	for(int i = 0; i < 32; i++)
68 | 	{
69 | 		Q xyz;
70 | 		xyz.f = (float)FPT(FPT::DeepInit(),idata[i]);
71 | 		pp[i] = xyz.i;
72 | 	}
73 | #else
74 | 	alignas(__m256i) uint8_t idata[32]; // uint8 if using table
75 | 	_mm256_store_si256((__m256i*)idata,v);
76 | 	uint32_t * pp = (uint32_t*)&r[0];
77 | 	for(int i = 0; i < 32; i++)
78 | 	{
79 | 		pp[i] = posit8ns::op2float[idata[i]];
80 | 	}
81 | #endif
82 | 	return r;
83 | }
84 | 
85 | 


--------------------------------------------------------------------------------
/src/softfloat_gen.cpp:
--------------------------------------------------------------------------------
 1 | #include "softfloat.hpp"
 2 | #include <math.h>
 3 | #include <fstream>
 4 | 
 5 | int main(int argc, char const *argv[])
 6 | {
 7 | 	{
 8 | 		uint16_t opexp2[65536]; 
 9 | 		for(int i = 0; i < 65536; i++)
10 | 		{
11 | 			float16_t x;
12 | 			x.v = i;
13 | 			softfloat16 u(x);
14 | 			float uf = (float)u;
15 | 			float ef = exp2(uf);
16 | 			softfloat16 efu(ef);
17 | 			opexp2[i] = ((float16_t)efu).v;
18 | 		}
19 | 		std::ofstream onf(argc == 1 ? "float16_tbl.cpp":argv[1]);
20 | 		onf << "#include <stdint.h>\nuint16_t float16_opexp2[] = {\n";
21 | 		for(int i  = 0; i < 256*256; i++)
22 | 			onf << (int)(opexp2[i]) << ",";
23 | 		onf << "}; " << std::endl;
24 | 	}
25 | }


--------------------------------------------------------------------------------
/tests/itest_anyfloat.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <iostream>
 3 | #include "anyfloat.hpp"
 4 | 
 5 | #ifndef XFT
 6 | #define XFT binary8_emu
 7 | #warning "Using XFT type " XFT
 8 | #endif
 9 | void itest_anyfloat() {
10 |   XFT a(3.52);
11 |   XFT b(-1.0);
12 |   XFT c;
13 |   XFT ra = a + b;
14 |   XFT rm = a * b;
15 |   XFT rs = a - b;
16 |   XFT rd = a / b;
17 |   std::cout << "c=" << c << std::endl;
18 |   std::cout << "a=" << a << std::endl;
19 |   std::cout << "b=" << b << std::endl;
20 |   std::cout << "1/a=" << a.inv() << std::endl;
21 |   std::cout << "1/b=" << b.inv() << std::endl;
22 |   std::cout << "1/1/a=" << a.inv().inv() << std::endl;
23 |   std::cout << "1/1/b=" << b.inv().inv() << std::endl;
24 |   std::cout << "a<b =" << (a < b) << std::endl;
25 |   std::cout << "a>=b =" << (a >= b) << std::endl;
26 |   std::cout << "|a| " << a.abs() << " -a " << a.neg() << std::endl;
27 |   std::cout << "|b| " << b.abs() << " -b " << b.neg() << std::endl;
28 |   std::cout << "a+b " << ra << std::endl;
29 |   std::cout << "a*b " << rm << std::endl;
30 |   std::cout << "a-b " << rs << std::endl;
31 |   std::cout << "a/b " << rd << std::endl;
32 |   std::cout << "inf " << XFT::pinf() << " to " << XFT::ninf() << std::endl;
33 |   std::cout << "nan " << XFT::nan() << std::endl;
34 |   std::cout << "max-min " << XFT::max() << " to " << XFT::min() << std::endl;
35 |   if(sizeof(XFT)==1)
36 |   {
37 |   	//for(int i = -128; i < 127; i++)
38 |   	//	std::cout << i << ": " << XFT(XFT::DeepInit(),*(int*)(unsigned *)&i) << std::endl;
39 |   	for(int i = 0 ; i < 256; i++)
40 |   	{
41 |   		XFT x(XFT::DeepInit(),i);
42 |   		double d = x;
43 |   		double dd  = XFT(d);
44 |   		unsigned int rawexp = (i >> XFT::vfractionbits) & 0x1F; //bitmask<int>(XFT::vexpbits);
45 | 
46 |   		/*
47 |   		TODO fix conversion from that double to binary8:: unpack to denormalized 
48 |   		1: 1.52588e-05 -> 0 0
49 | 		2: 3.05176e-05 -> 0 0
50 |   		*/
51 |   		if(d  != dd)
52 | 	  		std::cout << i << ": " << d << " -> " << dd << " " << (d==dd) << " exp " << rawexp << " hex " << std::hex << i << std::dec << std::endl;
53 | 	  	else
54 | 	  		std::cout << i << ": " << d << " exp " << rawexp << std::endl;
55 | 
56 |   	}
57 |   }
58 | }


--------------------------------------------------------------------------------
/tests/itest_anyvsreallimit.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "floattraits.hpp"
 3 | #include <iostream>
 4 | 
 5 | /*
 6 | template <int exp_bits, int frac_bits, class value_T, class holder_T>
 7 | struct any_floattrait
 8 | 
 9 | template <>
10 | struct float2trait<double>
11 | {
12 | 	using type = double;
13 | 	using trait = double_trait;
14 | };
15 | */
16 | #ifndef FT
17 | #error "Needs macro FT"
18 | #endif
19 | void itest_anyvsreallimit()
20 | {
21 | 	using Trait=float2trait<FT>::trait;
22 | 	using AT=any_floattrait<Trait::exponent_bits,Trait::fraction_bits,Trait::value_t,Trait::holder_t>;
23 | 	static_assert(AT::one_h == Trait::one_h,"same one");
24 | 	static_assert(AT::two_h == Trait::two_h,"same two_h");
25 | 	//static_assert(AT::afterone_h == Trait::afterone_h,"same afterone_h");
26 | 	static_assert(AT::min_h == Trait::min_h,"same min_h");
27 | 	static_assert(AT::max_h == Trait::max_h,"same max_h");
28 | 	static_assert(AT::ninfinity_h == Trait::ninfinity_h,"same ninfinity_h");
29 | 	static_assert(AT::pinfinity_h == Trait::pinfinity_h,"same pinfinity_h");
30 | 	static_assert(AT::nan_h == Trait::nan_h,"same nan_h");
31 | }


--------------------------------------------------------------------------------
/tests/itest_constexpr.cpp:
--------------------------------------------------------------------------------
 1 | #include "posit.h"
 2 | 
 3 | template <class P>
 4 | void pp(P po)
 5 | {
 6 | 	std::cout << std::hex << po.v << " " << po << " == " << (float)po << std::endl;
 7 | }
 8 | 
 9 | int main()
10 | {
11 | 	using X1=Posit<int32_t,32,1,uint64_t,true>;
12 | 	static_assert(X1::one() == X1::one()*X1::one(),"same 1==1*1");
13 | 	static_assert(X1::zero() == X1::one()-X1::one(),"same 0==1-1");
14 | 	static_assert(X1::one() == X1::one()/X1::one(),"same 1==1/1");
15 | 	static_assert(X1::one() == X1::one()+X1::zero(),"same 1==1+0");
16 | 
17 | 	static_assert(X1::two() == X1::one()+X1::one(),"same 2==1+1");
18 | 	static_assert(X1::one() == X1::two().half(),"same 1==half(2)");
19 | 	static_assert(X1::onehalf() == X1::one()/X1::two(),"same 0.5==1/2");
20 | 	static_assert(X1::one().twice() == X1::two(),"same 2==1*1+1 using fma");
21 | 	static_assert(X1::two() == X1::one()*X1::one()+X1::one(),"same 2==1*1+1 using fma");
22 | 
23 | 	static_assert(X1::PT::minexponent() < X1::PT::maxexponent() ,"exponent ordering");
24 | 
25 | 	static_assert(X1(1) == X1::one(),"same 1==1");
26 | 
27 | 	pp(X1(2));
28 | 	pp(X1::two());
29 | 	pp(X1::one()+X1::one());
30 | 	return 1;
31 | }
32 | 
33 | void itest_constexpr()
34 | {
35 | 	return main();
36 | }


--------------------------------------------------------------------------------
/tests/itest_float16native32.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "float16native32.hpp"
 3 | #include <iostream>
 4 | 
 5 | #define FT float16n32;
 6 | 
 7 | #ifdef __AVX__
 8 | #warning Using HW acceleration for F16 conversion
 9 | #else
10 | #warning NOT Using HW acceleration for F16 conversion
11 | #endif
12 | 
13 | void itest_anyfloat()
14 | {
15 | 	#ifdef __AVX__
16 | 	std::cout  <<"Using HW acceleration for F16 conversion\n";
17 | 	#else
18 | 	std::cout  <<"NOT Using HW acceleration for F16 conversion\n";
19 | 	#endif
20 | 
21 | 	FT a(1.0);
22 | 	FT b(1.0);
23 | 	FT r = a+b;
24 | 
25 | 	std::cout << r << " " << (double)r << std::endl;
26 | }


--------------------------------------------------------------------------------
/tests/itest_lowlimits.cpp:
--------------------------------------------------------------------------------
 1 | // cling -I../include itest_lowlimits.cpp -DQQ=5,10,void,uint16_t
 2 | // cling -I../include itest_lowlimits.cpp -DQQ=8,7,void,uint16_t
 3 | // cling -I../include itest_lowlimits.cpp -DQQ=8,23,float,uint32_t
 4 | // cling -I../include itest_lowlimits.cpp -DQQ=11,52,double,uint64_t
 5 | // cout issue: cling -I../include itest_lowlimits.cpp -DQQ=15,112,void,__uint128_t
 6 | #include "floattraits.hpp"
 7 | #include <iostream>
 8 | 
 9 | void itest_lowlimits()
10 | {
11 | 	std::cout << "one: " << std::hex << (uint64_t)any_floattrait<QQ>::one_h << std::endl;
12 | 	std::cout << "two: " << std::hex << (uint64_t)any_floattrait<QQ>::two_h << std::endl;
13 | 	std::cout << "max: " << std::hex << (uint64_t)any_floattrait<QQ>::max_h << std::endl;
14 | 	std::cout << "min: " << std::hex << (uint64_t)any_floattrait<QQ>::min_h << std::endl;
15 | 	std::cout << "nan: " << std::hex << (uint64_t)any_floattrait<QQ>::nan_h << std::endl;
16 | 	std::cout << "pinfinity: " << std::hex <<(uint64_t) any_floattrait<QQ>::pinfinity_h << std::endl;
17 | }


--------------------------------------------------------------------------------
/tests/itest_positf.cpp:
--------------------------------------------------------------------------------
 1 | #include "positf.h"
 2 | 
 3 | 
 4 | int main()
 5 | {
 6 | 	using XF=Posit<int32_t,8,3,double,true>;
 7 | 	using X=XF::PPT;
 8 | 
 9 | 	XF af0(XF::DeepInit(),50);
10 | 	X ai0(X::DeepInit(),50);
11 | 	for(int32_t i = -128; i < 127; i++)
12 | 	{
13 | 		XF af(XF::DeepInit(),i);
14 | 		X ai(X::DeepInit(),i);
15 | 		auto rf = af+af0;
16 | 		auto ri = ai+ai0;
17 | 		if(rf.v != ri.v)
18 | 		{
19 | 			std::cout << "!" << rf << " != " << ri << " from " << af << " " << ai << " + " << (double)ai0 << std::endl;
20 | 		}
21 | 		else
22 | 			std::cout << (double)af << "+" << (double)af0 << " == " << (double)rf <<  std::endl;
23 | 	}
24 | }
25 | 
26 | void itest_positf()
27 | {
28 | 	main();
29 | }
30 | 


--------------------------------------------------------------------------------
/tests/itest_unpackfixed.cpp:
--------------------------------------------------------------------------------
 1 | #include "unpacked.h"
 2 | #include <iostream>
 3 | 
 4 | void itest_unpackfixed()
 5 | {
 6 | 	static_assert(Unpacked<>(1) == Unpacked<>::one(),"one int is one");
 7 | 	std::cout << (float)Unpacked<>(0) << std::endl;
 8 | 	std::cout << Unpacked<>(1) << " == " << Unpacked<>::one()<< std::endl;
 9 | 	std::cout << (float)Unpacked<>(121) << " " << Unpacked<>(121) << std::endl;
10 | 	std::cout << (float)Unpacked<>::make_fixed<fixedtrait<int,32,1> > (121*2+1) << std::endl;
11 | 	std::cout << (float)Unpacked<>(1.0) << std::endl;
12 | 	std::cout << (float)Unpacked<>(1.0f) << std::endl;
13 | 	std::cout << (int)Unpacked<>(10.5) << std::endl;
14 | 	std::cout << Unpacked<>(10.75).pack_xfixed<fixedtrait<int,32,0>> () << std::endl;
15 | 	std::cout << Unpacked<>(10.75).pack_xfixed<fixedtrait<int,32,1>> () << std::endl;
16 | 	std::cout << Unpacked<>(10.75).pack_xfixed<fixedtrait<int,32,2>> () << std::endl;
17 | }


--------------------------------------------------------------------------------
/tests/itest_unpackfloat.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "unpacked.h"
 3 | #include <iostream>
 4 | 
 5 | void itest_anyfloat()
 6 | {
 7 | 
 8 | 	double x = 3.0;
 9 | 	{
10 | 		Unpacked<uint64_t, int> u;
11 | 		std::cout << "study " << x << std::endl;
12 | 		u.unpack_xfloat<double_trait>((double)x);
13 | 		std::cout << "unpacking as double:" << u << std::endl;
14 | 		std::cout << "repacking as double:" << u.pack_xfloat<double_trait>() << std::endl;
15 | 		u.unpack_xfloat<double_trait>((float)x);
16 | 		std::cout << "unpacking as float:" << u << std::endl;
17 | 		std::cout << "repacking as float:" << u.pack_xfloat<single_trait>() << std::endl;
18 | 		std::cout << "repacking as double:" << u.pack_xfloat<double_trait>() << std::endl;
19 | 	}
20 | 	{
21 | 		Unpacked<uint32_t, int> u;
22 | 		std::cout << "study " << x << std::endl;
23 | 		u.unpack_xfloat<double_trait>((double)x);
24 | 		std::cout << "unpacking as double:" << u << std::endl;
25 | 		std::cout << "repacking as double:" << u.pack_xfloat<double_trait>() << std::endl;
26 | 		u.unpack_xfloat<double_trait>((float)x);
27 | 		std::cout << "unpacking as float:" << u << std::endl;
28 | 		std::cout << "repacking as float:" << u.pack_xfloat<single_trait>() << std::endl;
29 | 		std::cout << "repacking as double:" << u.pack_xfloat<double_trait>() << std::endl;
30 | 	}
31 | }


--------------------------------------------------------------------------------
/tests/makelimits.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export cling='/Applications/cling_2017-10-26_mac1012/bin/cling --std=c++14 -march=native'
 3 | $cling "-DXBT=Posit<int32_t,32,2,uint64_t,true>" --std=c++14 -I../include test_numeric_limits.cpp
 4 | $cling "-DXBT=Posit<int32_t,16,1,uint64_t,true>" --std=c++14 -I../include test_numeric_limits.cpp
 5 | $cling "-DXBT=Posit<int32_t,8,0,uint64_t,true>" --std=c++14 -I../include test_numeric_limits.cpp
 6 | $cling "-DXBT=Posit<int32_t,32,2,uint64_t,false>" --std=c++14 -I../include test_numeric_limits.cpp
 7 | $cling "-DXBT=Posit<int32_t,16,1,uint64_t,false>" --std=c++14 -I../include test_numeric_limits.cpp
 8 | $cling "-DXBT=Posit<int32_t,8,0,uint64_t,false>" --std=c++14 -I../include test_numeric_limits.cpp
 9 | $cling -DXBT=float --std=c++14 -I../include test_numeric_limits.cpp
10 | $cling -DXBT=double --std=c++14 -I../include test_numeric_limits.cpp
11 | #$cling -DXBT=halffloat --std=c++14 -I../include test_numeric_limits.cpp
12 | #$cling -DXBT=halffloatalt --std=c++14 -I../include test_numeric_limits.cpp
13 | 


--------------------------------------------------------------------------------
/tests/minimal.cpp:
--------------------------------------------------------------------------------
1 | #include "include/posit.h"
2 | #include <iostream>
3 | #include <tuple>
4 | 
5 | using P=Posit<int8_t,8,3,uint16_t,true>;
6 | P a(10.0f);
7 | P b(20.0f);
8 | std::cout << std::make_tuple((float)a.inv(),(float)(a-b),(float)a*b,(float)a.abs()) << std::endl;
9 | 


--------------------------------------------------------------------------------
/tests/test_dot.cpp:
--------------------------------------------------------------------------------
 1 | #include "positf.h"     // std::cout
 2 | #include <iostream>     // std::cout
 3 | #include <limits>       // std::numeric_limits
 4 | 
 5 | #define xstr(s) str(s)
 6 | #define str(s) #s
 7 | #ifndef XBT
 8 | 	#define XBT Posit<int32_t,32,1,uint64_t,PositSpec::WithNan>
 9 | #endif
10 | 
11 | int main()
12 | {
13 | 	  using T=XBT;
14 | 	  XBT a[3] = {3,4,5};
15 | 	  XBT b[3] = {1,2,3};
16 | 	  std::cout << XBT::dot(a,b,3) << std::endl;
17 | 	  return 0;
18 | }


--------------------------------------------------------------------------------
/tests/test_numeric_limits.cpp:
--------------------------------------------------------------------------------
 1 | // cling -DXBT=float --std=c++14 -I../include test_numeric_limits.cpp
 2 | // cling "-DXBT=PositF<int32_t,32,1,double,true> --std=c++14 -I../include test_numeric_limits.cpp
 3 | #include "positf.h"     // std::cout
 4 | #include <iostream>     // std::cout
 5 | #include <limits>       // std::numeric_limits
 6 | 
 7 | #define xstr(s) str(s)
 8 | #define str(s) #s
 9 | #ifndef XBT
10 | 	#define XBT Posit<int32_t,32,1,uint64_t,true>
11 | #endif
12 | 
13 | void test_numeric_limits () {
14 |   using T=XBT;
15 |   std::cout << std::boolalpha;
16 |   std::cout << "\n\nMacro: " << xstr((XBT)) << std::endl;
17 |   std::cout << "Signature: " << typeid(T).name() << std::endl;
18 |   std::cout << "Minimum value for T: " << std::numeric_limits<T>::min() <<" "<< (double)  std::numeric_limits<T>::min()  << '\n';
19 |   std::cout << "Maximum value for T: " << std::numeric_limits<T>::max() <<" "<< (double) std::numeric_limits<T>::max()<< '\n';
20 |   std::cout << "Lowest value for T: " << std::numeric_limits<T>::lowest() << " "<<(double) std::numeric_limits<T>::lowest()<< '\n';
21 |   std::cout << "Epsilon for T: " << std::numeric_limits<T>::epsilon() << " "<<(double) std::numeric_limits<T>::epsilon()<< '\n';
22 | 
23 |   std::cout << "T is signed: " << std::numeric_limits<T>::is_signed << '\n';
24 |   //std::cout << std::numeric_limits<T>::epsilon().v << std::endl;
25 |   //std::cout << "then " << (T::one()+std::numeric_limits<T>::epsilon() ==  T::one().next()) << std::endl;
26 |   //std::cout << (double)(T::one().next())-(double)T::one() << std::endl;
27 |   //std::cout << "Non-sign bits in T: " << std::numeric_limits<T>::digits << '\n';
28 |   std::cout << "T has infinity: " << std::numeric_limits<T>::has_infinity << '\n';
29 |   std::cout << "Infinity value for T: " << std::numeric_limits<T>::infinity() << " "<< (double)std::numeric_limits<T>::infinity() <<'\n';
30 |   std::cout << "T has nan: " << std::numeric_limits<T>::has_quiet_NaN << '\n';
31 |   std::cout << "NaN " << std::numeric_limits<T>::quiet_NaN() << " " << (double) std::numeric_limits<T>::quiet_NaN() << '\n';
32 |   std::cout << "Exponent Range " << std::numeric_limits<T>::min_exponent << " to " << std::numeric_limits<T>::max_exponent << '\n';
33 |   return ;
34 | }


--------------------------------------------------------------------------------
/tests/testbit.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Emanuele Ruffaldi (C) 2017
 3 |  * Templated C++ Posit
 4 |  */
 5 | #include <climits>
 6 | #include <iostream>
 7 | #include "bithippop.hpp"
 8 | 
 9 | int main(int argc, char const *argv[])
10 | {
11 | 		enum X1 { w1 = bitset_get((uint32_t)0x0A,2,3) };
12 | 		enum X2 { w2 = bitset_getT((uint32_t)0x0A,2,3) };
13 | 		std::cout << "constant " << w1 << " " << w2 << std::endl;
14 | 
15 | 	std::cout << "mask " << bitset_all(bitmask<uint16_t>(5)) << std::endl;
16 | 	uint16_t a = 0x0110;
17 | 	std::cout << bitset_msb<uint16_t,5>(a) << std::endl;
18 | 	std::cout << bitset_lsb<uint16_t,5>(a) << std::endl;
19 | 	std::cout << "input: " << bitset_all(a) << std::endl;
20 | 	auto c = bitset_part(a,0xFF,2,5);
21 | 	std::cout << bitset_msb<uint16_t,5>(c) << std::endl;
22 | 	std::cout << bitset_lsb<uint16_t,5>(c) << std::endl;
23 | 	std::cout << "afterpart " << bitset_all(bitset_get(c,2,5)) << std::endl;
24 | 	std::cout << "beforpart " << bitset_all(bitset_get(a,2,5)) << std::endl;
25 | 	std::cout << "afterparthw " << bitset_all(bitset_gethw(c,2,5)) << std::endl;
26 | 
27 | 	std::cout << "after: " << bitset_all(c) << std::endl;
28 | 
29 | 	auto b = bitset_part<uint16_t,uint16_t,2,5>(a,0xFF);
30 | 	std::cout << bitset_msb<uint16_t,5>(b) << std::endl;
31 | 	std::cout << bitset_lsb<uint16_t,5>(b) << std::endl;
32 | 	std::cout << "after: " << bitset_all(b) << std::endl;
33 | 	std::cout << "afterpart " << bitset_all(bitset_get<uint16_t,2,5>(c)) << std::endl;
34 | 	std::cout << "beforpart " << bitset_all(bitset_get<uint16_t,2,5>(a)) << std::endl;
35 | 
36 | 
37 | 	return 0;
38 | }


--------------------------------------------------------------------------------
/tests/testeps.cpp:
--------------------------------------------------------------------------------
 1 | .I../include
 2 | #include "posit.h"
 3 | #include "posit12.hpp"
 4 | #include "posit10.hpp"
 5 | #include <iostream>
 6 | #include <typeinfo>
 7 | 
 8 | #include "posit12_tbl.cpp"
 9 | #include "posit12.cpp"
10 | #include "posit10_tbl.cpp"
11 | #include "posit10.cpp"
12 | 
13 | using PT=posit10;
14 | auto a = PT(0.00001);
15 | auto a1 = PT(1.0);
16 | auto adiff=a1-a;
17 | auto adiff_f = PT(1.0-0.0001);
18 | auto asum = a1+a;
19 | auto asum_f = PT(1.0+0.0001);
20 | (float)a
21 | (float)a1
22 | (float)adiff
23 | (float)adiffe
24 | (float)asum
25 | (float)asume


--------------------------------------------------------------------------------
/tests/testfind.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Emanuele Ruffaldi (C) 2017
 3 |  * Templated C++ Posit
 4 |  */
 5 | #include "bithippop.hpp"
 6 | #include <iostream>
 7 | 
 8 | int main(int argc, char * argv[])
 9 | {
10 | 	//enum X { w = findbitleftmostC((uint64_t)0x0A00000000000000) };
11 | 	enum X2 { w2 = findbitleftmostC((uint32_t)0x0A000000) };
12 | 	enum X3 { w3 = findbitleftmostC((uint16_t)0x0A00) };
13 | 	enum X4 { w4 = findbitleftmostC((uint8_t)0xA) };
14 | 
15 | 	std::cout << "Constants: " << " " << w2 << " " << w3 << " " << w4 << std::endl;
16 | 
17 | 	std::cout << findbitleftmost((uint64_t)0x0A00000000000000) << std::endl;
18 | 	std::cout << findbitleftmost((uint32_t)0x0A000000) << std::endl;
19 | 	std::cout << findbitleftmost((uint16_t)0x0A00) << std::endl;
20 | 	std::cout << findbitleftmost((uint8_t)0x0A) << std::endl;
21 | 	std::cout << findbitleftmost((uint16_t)0) << std::endl;
22 | 	return 0;
23 | }


--------------------------------------------------------------------------------
/tests/testnextprev.cpp:
--------------------------------------------------------------------------------
1 | #include <iostream>     // std::cout
2 | #include "posit.h"
3 | 
4 | int main () {
5 | 	using X1=Posit<int32_t,32,1,uint64_t,true>;
6 | 


--------------------------------------------------------------------------------
/tests/testposit.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Emanuele Ruffaldi (C) 2017
 3 |  * Templated C++ Posit
 4 |  */
 5 | #define CATCH_CONFIG_MAIN  // This tells Catch to provide a main() - only do this in one cpp file
 6 | #include "catch.hpp"
 7 | #include "posit.h"
 8 | #include <iostream>
 9 | 
10 | #define STR_EXPAND(tok) #tok
11 | #define STR(tok) STR_EXPAND(tok)
12 | 
13 | 
14 | using X1=Posit<int32_t,28,5,uint32_t,PositSpec::WithNan>;
15 | #define X X1
16 | #define XA "T28_E5 "
17 | #define SPOSIT "Posit<int32_t,28,5,uint32_t,false>"
18 | #include "testsposit.hpp"
19 | #undef X
20 | #undef XA
21 | #undef SPOSIT
22 | 
23 | /*using X2=Posit<int16_t,16,0,uint32_t>;
24 | #define X X2
25 | #define XA "T16_E0 "
26 | #define SPOSIT "Posit<int16_t,16,0,uint32_t>"
27 | #include "testsposit.hpp"
28 | #undef X
29 | #undef SPOSIT
30 | #undef XA
31 | */
32 | 
33 | //	af<< 3.2e8, 1, -1, 8.0e7;
34 | 
35 | TEST_CASE ("Special Conversion", "")
36 | {
37 | 	// IEEE32 All integers with 6 or fewer significant decimal digits can be converted to an IEEE 754 floating-point value without loss of precision
38 | 	//using Y1=Posit<int32_t,32,3,uint32_t>;
39 | 	using Y=Posit<int32_t,32,3,uint32_t,PositSpec::WithNan>;
40 | 
41 | 	double values1[] = { 3.2e8, 100, 10000, 20000, 1,-1,8.0e7, 4.0e7, 1, -1, -1.6e8};
42 | 	double values[] = { -1.6e8};
43 | 	for(unsigned int i = 0; i < sizeof(values)/sizeof(values[0]); i++)
44 | 	{
45 | 		char w[128];
46 | 		sprintf(w,"Special SECTION %f",values[i]);
47 | 		SECTION(w)
48 | 		{
49 | 			double d = values[i];
50 | 			Y::UnpackedT du(d);
51 | 			Y dp(values[i]);
52 | 			Y::UnpackedT dpu = dp;
53 | 			Y dup = Y(du);
54 | 			double dupd = dup;
55 | 			double dud = du;
56 | 			double dpd = dp;
57 | 			/*
58 | 			std::cout << "d   is " << d << std::endl;
59 | 			std::cout << "du  is " << du << std::endl;
60 |             std::cout << "dp  is " << posit_formatter<Y>(dp) << std::endl;
61 |             std::cout << "dup is "  << posit_formatter<Y>(dup) << std::endl;
62 |  	         std::cout << "dpu is " << dpu << std::endl;
63 | 	         std::cout << "dpd is " << dpd << std::endl;
64 | 	         */
65 | 			REQUIRE(dud == d);
66 | 			REQUIRE(dup == dp);
67 | 			REQUIRE(dpu == du);
68 | 			REQUIRE(dpd == d);
69 | 			REQUIRE(dupd == d);
70 | 		}
71 | 	}
72 | }
73 | 


--------------------------------------------------------------------------------
/tests/testposit10.cpp:
--------------------------------------------------------------------------------
 1 | #include "posit10.hpp"
 2 | #include <iostream>
 3 | 
 4 | template <class T>
 5 | struct pof_
 6 | {
 7 | public:
 8 | 	pof_(T p): posit(p) {}
 9 | 
10 | 	friend std::ostream & operator << (std::ostream & ons, const pof_ & x)
11 | 	{
12 | 		ons << "(" << " unsigned " << x.posit.uu() << " signed " << (int)x.posit.v << " float " << (float)x.posit<<")";
13 | 		return ons;
14 | 	}
15 | 
16 | 	T posit;
17 | };
18 | 
19 | template<class T>
20 | pof_<T> pof(T x)
21 | {
22 | 	return pof_<T>(x);
23 | }
24 | 
25 | int main(int argc, char const *argv[])
26 | {
27 | 	posit10 a(20.0);
28 | 	posit10 b(-10.0);
29 | 	std::cout << "a:   " << pof(a) <<   "\nb:   " <<  pof(b) << std::endl;
30 | 	std::cout << "a+b: " << pof(a+b) << "\na-b: " << pof(a-b) << "\na*b: " << pof(a*b)  << "\na/b: " << pof(a/b) << "\ninv a:"  << pof(a.inv()) << "\ninv b:" << pof(b.inv()) << std::endl;
31 | 	return 0;
32 | }


--------------------------------------------------------------------------------
/tests/testposit12.cpp:
--------------------------------------------------------------------------------
 1 | #include "posit12.hpp"
 2 | #include <iostream>
 3 | 
 4 | template <class T>
 5 | struct pof_
 6 | {
 7 | public:
 8 | 	pof_(T p): posit(p) {}
 9 | 
10 | 	friend std::ostream & operator << (std::ostream & ons, const pof_ & x)
11 | 	{
12 | 		ons << "(" << " unsigned " << x.posit.uu() << " signed " << (int)x.posit.v << " float " << (float)x.posit<<")";
13 | 		return ons;
14 | 	}
15 | 
16 | 	T posit;
17 | };
18 | 
19 | template<class T>
20 | pof_<T> pof(T x)
21 | {
22 | 	return pof_<T>(x);
23 | }
24 | 
25 | int main(int argc, char const *argv[])
26 | {
27 | 	posit12 a(20.0);
28 | 	posit12 b(-10.0);
29 | 	std::cout << "a:   " << pof(a) <<   "\nb:   " <<  pof(b) << std::endl;
30 | 	std::cout << "a+b: " << pof(a+b) << "\na-b: " << pof(a-b) << "\na*b: " << pof(a*b)  << "\na/b: " << pof(a/b) << "\ninv a:"  << pof(a.inv()) << "\ninv b:" << pof(b.inv()) << std::endl;
31 | 	return 0;
32 | }


--------------------------------------------------------------------------------
/tests/testposit8.cpp:
--------------------------------------------------------------------------------
 1 | #include "posit8.hpp"
 2 | #include <iostream>
 3 | 
 4 | template <class T>
 5 | struct pof_
 6 | {
 7 | public:
 8 | 	pof_(T p): posit(p) {}
 9 | 
10 | 	friend std::ostream & operator << (std::ostream & ons, const pof_ & x)
11 | 	{
12 | 		ons << "(" << " unsigned " << x.posit.uu() << " signed " << (int)x.posit.v << " float " << (float)x.posit<<")";
13 | 		return ons;
14 | 	}
15 | 
16 | 	T posit;
17 | };
18 | 
19 | template<class T>
20 | pof_<T> pof(T x)
21 | {
22 | 	return pof_<T>(x);
23 | }
24 | 
25 | int main(int argc, char const *argv[])
26 | {
27 | 	posit8 a(20.0);
28 | 	posit8 b(-10.0);
29 | 	std::cout << "a:   " << pof(a) <<   "\nb:   " <<  pof(b) << std::endl;
30 | 	std::cout << "a+b: " << pof(a+b) << "\na-b: " << pof(a-b) << "\na*b: " << pof(a*b)  << "\na/b: " << pof(a/b) << "\ninv a:"  << pof(a.inv()) << "\ninv b:" << pof(b.inv()) << std::endl;
31 | 	return 0;
32 | }


--------------------------------------------------------------------------------
/tests/testregime.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Emanuele Ruffaldi (C) 2017
 3 |  * Templated C++ Posit
 4 |  */
 5 | // g++ -march=native -I. --std=c++14 testregime.cpp  && ./a.out 
 6 | #include "bithippop.hpp"
 7 | #include <bitset>
 8 | #include <algorithm>
 9 | #include <iostream>
10 | #include <cstdint>
11 | 	using POSIT_UTYPE = uint32_t;
12 | namespace PT
13 | {
14 | 	enum {POSIT_SIZE = 32, POSIT_INVERTBIT = 1<<(sizeof(POSIT_UTYPE)*8)-1};
15 | }
16 | 
17 | 
18 | std::pair<int,int> extract(int pars)
19 | 
20 | 
21 | {	
22 | 	const bool x = (pars & PT::POSIT_INVERTBIT) != 0;
23 |     int aindex = x ? (~pars == 0 ? PT::POSIT_SIZE : findbitleftmostC((POSIT_UTYPE)~pars)) : (pars == 0 ? PT::POSIT_SIZE : findbitleftmostC((POSIT_UTYPE)pars)); // where is stop 0... 1 beyond
24 |     int index = aindex > PT::POSIT_SIZE  ? PT::POSIT_SIZE : aindex;
25 |     int reg = x ? index-1 : -index;
26 |     int rs =  index == PT::POSIT_SIZE ? PT::POSIT_SIZE: index+1;
27 |     return {reg,rs};
28 | }
29 | 
30 | 
31 | 
32 | int main(int argc , char * argv[])
33 | {
34 | 	if(0)
35 | 	{
36 | 		for(int i = 0; i < 1<< PT::POSIT_SIZE; i++)
37 | 		{
38 | 			POSIT_UTYPE a = (i <<4 )|0x0F;
39 | 			auto q = extract(a); 
40 | 			std::cout << (std::bitset<sizeof(a)*8>(a)) << " reg=" << q.first << " rs=" << q.second  << std::endl;
41 | 		}
42 | 	}
43 | 	else
44 | 	{
45 | 		uint32_t a = 0xe0000000;
46 | 			auto q = extract(a); 
47 | 			std::cout << (std::bitset<sizeof(a)*8>(a)) << " reg=" << q.first << " rs=" << q.second  << std::endl;
48 | 	}
49 | 	return 0;
50 | }
51 | 


--------------------------------------------------------------------------------
/tests/testsimdposit8.cpp:
--------------------------------------------------------------------------------
 1 | #include "simdposit8.hpp"
 2 | #include <iostream>
 3 | 
 4 | /**
 5 |  * TODO:
 6 |  	load from array
 7 |  	verification speed test
 8 |  */
 9 | 
10 | int main(int argc, char const *argv[])
11 | {
12 | 	simd_posit8_32 a(0.5f);
13 | 	simd_posit8_32 b(1.0f);
14 | 
15 | 	std::cout << "as integers:\n\t" << a.uu() << "\n\t" << a << "\n\t" << b.uu() << "\n\t" << b <<  std::endl;
16 |  	std::cout << "then product:\n\t" << a*b << "\ndifference:\n\t" << (a-b) << "\ndivision:\n\t" << (a/b) << "\ninversion:\n\t" << a.inv() << "\ninversionb:\n\t" << b.inv() << std::endl;
17 | 	return 0;
18 | }


--------------------------------------------------------------------------------
/tests/testsoftfloat.cpp:
--------------------------------------------------------------------------------
 1 | #include "softfloat.hpp"
 2 | #include "exp2t.hpp"
 3 | 
 4 | 
 5 | extern uint16_t float16_opexp2[];
 6 | 
 7 | template <>
 8 | struct exp2t_<softfloat16>
 9 | {
10 | 	softfloat16 operator() (softfloat16 a)
11 | 	{
12 | 		float16_t w;
13 | 		w.v = float16_opexp2[((float16_t)a).v];
14 | 		return softfloat16(w);
15 | 	}
16 | };
17 | 
18 | int main(int argc, char * argv[])
19 | {
20 | 	std::cout << softfloat16(3)+softfloat16(2) << std::endl;
21 | 	std::cout << ((float)exp2t(softfloat16(5.2))) << " vs " << exp2t(5.2f) << std::endl;
22 | 	return 0;
23 | }


--------------------------------------------------------------------------------
/tests/testsposit.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (C) 2017-2019 Emanuele Ruffaldi
 3 |  * Distributed under the terms of the BSD 3-Clause License.  
 4 |  * 
 5 |  * (See accompanying file LICENSE)
 6 |  * 
 7 |  * --
 8 |  */
 9 | /**
10 |  * Emanuele Ruffaldi (C) 2017
11 |  * Templated C++ Posit
12 |  */
13 |  //
14 | // Created by Emanuele Ruffaldi on 20/10/2017.
15 | //
16 | 
17 | #include <iomanip>
18 | 
19 | TEST_CASE( XA  "Unpacked Tests", SPOSIT ) {
20 |     REQUIRE(X::UnpackedT(2.0).inv().inv() == X::UnpackedT(2.0));
21 | }
22 | 
23 | TEST_CASE( XA "Posits Unpacked Tests", SPOSIT ) {
24 |     REQUIRE(X(2.0).unpack() == X::UnpackedT(2.0));
25 |     REQUIRE(X((X(2.0).unpack())) == X(2.0));
26 | }
27 | 
28 | TEST_CASE( XA "Other Tests",SPOSIT) {
29 | 
30 |     std::cout << X(2.0) << "\n\n" << X(2.0).inv() << "\n\n" << X(2.0).inv().inv() << std::endl;
31 |     REQUIRE(X(2.0).inv().inv() == X(2.0));
32 | 
33 | }
34 | 
35 | TEST_CASE (XA "Conversions", SPOSIT)
36 | {
37 |     float values[] = {1, 2, INFINITY, 4.0, 10.0, -1.0, -2.0, -4.0, -10.0, 1 / 2.0, 1 / 4.0};
38 | 
39 |     using TQ = double;
40 |     //using TI = uint64_t;
41 |     // TODO: bugs with fractional 1/2 and 1/4
42 |     for (unsigned int i = 0; i < sizeof(values) / sizeof(values[0]); i++) {
43 | 
44 |     	TQ f = values[i];
45 | 	    X::UnpackedT fu(f);
46 |         X fp(f);
47 | 	    TQ fuf = fu.pack_float<TQ>();
48 | 	    X fup(fu);
49 |         X::UnpackedT fupu(fup.unpack());
50 | 	    TQ fupuf = fupu.pack_float<TQ>();
51 | 
52 |         std::cout << "step " << values[i] << std::endl;
53 |         if(fupu != fu)
54 |         {
55 |             std::cout  << "d   is " << f << std::endl;
56 |             std::cout  << "du  is " << fu << std::endl;
57 |             std::cout  << "dp  is " << std::oct << fp << std::endl;
58 |             std::cout  << "dup is "  << std::oct << fup << std::endl;
59 |              std::cout << "dupu is " << fupu << std::endl;
60 |             // std::cout << "dpu is " << fpu << std::endl;
61 |              std::cout << "dpud is " << fupuf << std::endl;
62 | 
63 |         }
64 | 	    REQUIRE(f == fuf);   // unpack bug
65 |         REQUIRE(fp == fup);  // impossible bug due to using fu to go to posit
66 | 	    REQUIRE(fupu == fu); // bug in unpack posit
67 | 	    REQUIRE(fupuf == f); // impossible after the above
68 | 
69 |     }
70 | 
71 | }
72 | 


--------------------------------------------------------------------------------
/tests/testunpacked.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Emanuele Ruffaldi (C) 2017
  3 |  * Templated C++ Posit
  4 |  */
  5 | #define CATCH_CONFIG_MAIN  // This tells Catch to provide a main() - only do this in one cpp file
  6 | #include "catch.hpp"
  7 | #include "posit.h"
  8 | #include <iostream>
  9 | #include <cmath>
 10 | 
 11 | float values[] = {1, 2, INFINITY, 0, 4.0, 10.0, -1.0, -2.0, -4.0, -10.0, 1 / 2.0, 1 / 4.0, 0.3, 1200, 1/1200.0, 1e20};
 12 | 
 13 | 
 14 | const double epscheck = 1e-5;
 15 | TEST_CASE ("Unpacked Conversions double")
 16 | {
 17 |     using TQ = double;
 18 |     using TI = uint64_t;
 19 | 
 20 |     for (unsigned int i = 0; i < sizeof(values) / sizeof(values[0]); i++) 
 21 |     {
 22 |     	char what[128];
 23 |     	sprintf(what,"Testing TQ=%s TI=%s value=%f case=%d",typeid(TQ).name(),typeid(TI).name(),(double)values[i],i);
 24 |     	SECTION( what) 
 25 |     	{
 26 | 	    	TQ f = values[i];
 27 | 	    	TQ fi = ((TQ)1.0)/f;
 28 | 		    Unpacked<TI> fu(f);
 29 | 		    auto fui = fu.inv();
 30 | 		    auto fuii = fui.inv();
 31 | 		    auto fu_by_fui = fu*fui;
 32 | 		    auto fu2 = Unpacked<TI>((TQ)2)*fu;
 33 | 		    auto fu2_div_fu = fu2/fu;
 34 | 		    auto fu_plus_fu = fu+fu;
 35 | 	    	switch(std::fpclassify(f)) {
 36 | 		        case FP_INFINITE:  REQUIRE(fu.type == Unpacked<TI>::Infinity); break;
 37 | 		        case FP_NAN:       REQUIRE(fu.type == Unpacked<TI>::NaN); break;
 38 | 		        case FP_ZERO:
 39 | 			        {
 40 | 			        	REQUIRE(fu.type == Unpacked<TI>::Zero);
 41 | 					    int i;
 42 | 				    	//std::cout << " " << f << " becomes " << fu << std::endl;
 43 | 				    	//REQUIRE(f2 == fu.fraction);
 44 | 				    	//REQUIRE(i == fu.exponent+1);
 45 | 				    	REQUIRE(std::signbit(f) == fu.negativeSign);
 46 | 				    	REQUIRE(f == (double)fu);
 47 | 				    	break;
 48 | 			        }
 49 | 			        break;
 50 | 		        default:
 51 | 		        {
 52 | 		        	REQUIRE(fu.type == Unpacked<TI>::Regular);
 53 | 				    int i;
 54 | 			    	auto f2 = std::frexp(f, &i);
 55 | 			    	//std::cout << " " << f << " becomes " << fu << std::endl;
 56 | 			    	//REQUIRE(f2 == fu.fraction);
 57 | 			    	REQUIRE(i == fu.exponent+1);
 58 | 			    	REQUIRE(std::signbit(f) == fu.negativeSign);
 59 | 			    	REQUIRE((double)fu == Approx(f));
 60 | 			    	REQUIRE((double)fuii == Approx(f));
 61 | 			    	REQUIRE((double)fui == Approx(fi));
 62 | 			    	REQUIRE((double)fu_by_fui == Approx((TQ)1.0));
 63 | 			    	REQUIRE((double)fu2_div_fu == Approx((TQ)2.0));
 64 | 			    	REQUIRE(fu_plus_fu == fu2);
 65 | 			    	REQUIRE((double)fu_plus_fu == Approx((TQ)2*f));
 66 | 			    	break;
 67 | 		        }
 68 | 		    }
 69 | 		}
 70 |     }
 71 | }
 72 | 
 73 | TEST_CASE ("Unpacked Conversions float")
 74 | {
 75 |     using TQ = float;
 76 |     using TI = uint32_t;
 77 | 
 78 |     for (unsigned int i = 0; i < sizeof(values) / sizeof(values[0]); i++) 
 79 |     {
 80 |     	char what[128];
 81 |     	sprintf(what,"Testing TQ=%s TI=%s value=%f case=%d",typeid(TQ).name(),typeid(TI).name(),(double)values[i],i);
 82 |     	SECTION( what) 
 83 |     	{
 84 | 	    	TQ f = values[i];
 85 | 		    Unpacked<TI> fu(f);
 86 | 		    auto fui = fu.inv();
 87 | 		    auto fuii = fui.inv();
 88 | 	    	TQ fi = ((TQ)1.0)/f;
 89 | 		    auto fu_by_fui = fu*fui;
 90 | 		    auto fu2 = Unpacked<TI>((TQ)2)*fu;
 91 | 		    auto fu2_div_fu = fu2/fu;
 92 | 		    auto fu_plus_fu = fu+fu;
 93 | 
 94 | 	    	switch(std::fpclassify(f)) {
 95 | 		        case FP_INFINITE:  REQUIRE(fu.type == Unpacked<TI>::Infinity); break;
 96 | 		        case FP_NAN:       REQUIRE(fu.type == Unpacked<TI>::NaN); break;
 97 | 		        case FP_ZERO:
 98 | 			        {
 99 | 			        	REQUIRE(fu.type == Unpacked<TI>::Zero);
100 | 					    int i;
101 | 				    	//std::cout << " " << f << " becomes " << fu << std::endl;
102 | 				    	//REQUIRE(f2 == fu.fraction);
103 | 				    	//REQUIRE(i == fu.exponent+1);
104 | 				    	REQUIRE(std::signbit(f) == fu.negativeSign);
105 | 				    	REQUIRE(fabs(f - fu.pack_float<TQ>()) < epscheck);
106 | 				    	break;
107 | 			        }
108 | 			        break;
109 | 		        default:
110 | 		        {
111 | 		        	REQUIRE(fu.type == Unpacked<TI>::Regular);
112 | 				    int i;
113 | 			    	auto f2 = std::frexp(f, &i);
114 | 			    	//std::cout << " " << f << " becomes " << fu << std::endl;
115 | 			    	//REQUIRE(f2 == fu.fraction);
116 | 			    	REQUIRE(i == fu.exponent+1);
117 | 			    	REQUIRE(std::signbit(f) == fu.negativeSign);
118 | 			    	REQUIRE(fu.pack_float<TQ>() == Approx(f));
119 | 			    	REQUIRE(fuii.pack_float<TQ>() == Approx(f));
120 | 			    	REQUIRE(fui.pack_float<TQ>() == Approx(fi));
121 | 			    	REQUIRE(fu_by_fui.pack_float<TQ>() == Approx((TQ)1.0));
122 | 			    	REQUIRE(fu2_div_fu.pack_float<TQ>() == Approx((TQ)2.0));
123 | 			    	REQUIRE(fu_plus_fu == fu2);
124 | 			    	REQUIRE(fu_plus_fu.pack_float<TQ>() == Approx((TQ)2*f));
125 | 			    	break;
126 | 		        }
127 | 		    }
128 | 		}
129 |     }
130 | }
131 | 
132 | TEST_CASE("Sum")
133 | {
134 | 	using TI=double;
135 |     using TQ = uint64_t;
136 |     double values[] = { 3200.0, 2.0, 1500000.0 };
137 |     for(unsigned int i = 0; i < sizeof(values)/sizeof(values[0]); i++)
138 |     {
139 |     	for(unsigned int j=i; j < sizeof(values)/sizeof(values[0]); j++)
140 | 		{
141 | 	    	char what[128];
142 | 	    	sprintf(what,"Sum TQ=%s TI=%s value1=%f value2=%f",typeid(TQ).name(),typeid(TI).name(),(double)values[i],(double)values[j]);
143 | 	    	Unpacked<TQ> a(values[i]);
144 | 	    	Unpacked<TQ> b(values[j]);
145 | 			Unpacked<TQ> ab(values[i]+values[j]);
146 | 			Unpacked<TQ> amb(values[i]-values[j]);
147 | 			Unpacked<TQ> mab(-values[i]+values[j]);
148 | 	    	
149 | 	    	sprintf(what,"Sum TQ=%s TI=%s value1=%f value2=%f f(a+b)=%f u(a+b)=%f",typeid(TQ).name(),typeid(TI).name(),(double)values[i],(double)values[j],(double)ab,(double)(a+b));
150 | 	    	SECTION( what) 
151 | 	    	{
152 | 	    			REQUIRE((a+b) == ab);
153 | 	    			REQUIRE((a-b) == amb);
154 | 					REQUIRE((-a+b) == mab);
155 | 			}
156 | 
157 | 			/*
158 | 	    	sprintf(what,"fSum TQ=%s TI=%s value1=%f value2=%f",typeid(TQ).name(),typeid(TI).name(),(double)values[i],(double)values[j]);
159 | 	    	SECTION( what) 
160 | 	    	{
161 | 					REQUIRE((a+b).pack_float<double>() == Approx((double)(values[i]+values[j])));
162 | 					REQUIRE((a-b).pack_float<double>() == Approx((double)(values[i]-values[j])));
163 | 					REQUIRE((-a+b).pack_float<double>() == Approx((double)(-values[i]+values[j])));
164 | 			}
165 | 			*/
166 |     	}	
167 |     	
168 |     }
169 | 
170 | }
171 | 
172 | // https://www.h-schmidt.net/FloatConverter/IEEE754.html
173 | // Matlab Symbolic Toolbox says: sum(sym([3.2e8, 1, -1, 8.0e7]).*sym([4.0e7, 1, -1, -1.6e8])) == 2
174 | //	each [ 12800000000000000, 1, 1, -12800000000000000]
175 | // Matlab Says: sum([3.2e8, 1, -1, 8.0e7].*[ 4.0e7, 1, -1, -1.6e8]) == 0
176 | //	each 1E16 [ 1.280000000000000   0.000000000000000   0.000000000000000  -1.280000000000000 ]
177 | TEST_CASE ("High Precision Test")
178 | {
179 |     using TI = uint64_t;
180 | 	double a[] = { 3.2e8, 1, -1, 8.0e7};
181 | 	double b[] = { 4.0e7, 1, -1, -1.6e8};
182 | 	Unpacked<TI> r = Unpacked<TI>::zero(); // ZERO is NOT DETECTED HERE
183 | 	for(int i = 0; i < 4 ; i++)
184 | 	{
185 | 		auto z =  (Unpacked<TI>(a[i])*Unpacked<TI>(b[i]));
186 | 		r = r + z;
187 | 		//std::cout << " " << i << " " << z << " " << z << "\n\ttmp " << r << " " << r << " left is " << Unpacked<TI>(a[i]) <<  std::endl;
188 | 	}
189 | 	REQUIRE((double)r == 2);
190 | }
191 | 


--------------------------------------------------------------------------------