├── .editorconfig ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── lib ├── op1.c ├── op1.h ├── op2.c ├── op2.h ├── pack.c ├── pack.h ├── posit.cpp ├── posit.h ├── posit_types.h ├── util.c └── util.h ├── main.cpp └── test ├── CuTest.cpp ├── CuTest.h ├── ieee_test.cpp ├── p2_test.cpp ├── p3_test.cpp ├── test.cpp └── test.h /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*.{c,h,cpp,hpp}] 4 | indent_style = space 5 | indent_size = 4 6 | end_of_line = lf 7 | insert_final_newline = true 8 | trim_trailing_whitespace = true 9 | 10 | [Makefile] 11 | indent_style = tab 12 | 13 | [test/CuTest.*] 14 | indent_style = tab 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.a 3 | *~ 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Clément Guérin 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CXX = g++ 3 | FLAGS = -Ilib -Itest -O2 -Wall -g 4 | CFLAGS = -std=c99 $(FLAGS) 5 | CXXFLAGS = -std=c++11 $(FLAGS) 6 | 7 | LIB_TARGET = lib/libbfp.a 8 | LIB_OBJ = lib/posit.o lib/pack.o lib/util.o lib/op1.o lib/op2.o 9 | 10 | TEST_TARGET = test/bfptest 11 | TEST_OBJ = test/test.o test/p2_test.o test/p3_test.o test/ieee_test.o \ 12 | test/CuTest.o $(LIB_TARGET) 13 | 14 | TARGET = bfp 15 | OBJ = main.o $(LIB_TARGET) 16 | 17 | all: $(TARGET) $(TEST_TARGET) 18 | 19 | test: $(TEST_TARGET) 20 | ./test/bfptest 21 | 22 | clean: 23 | rm -f lib/*.o $(LIB_TARGET) 24 | rm -f test/*.o $(TEST_TARGET) 25 | rm -f *.o $(TARGET) 26 | 27 | $(LIB_TARGET): $(LIB_OBJ) 28 | ar rcs $@ $^ 29 | 30 | $(TEST_TARGET): $(TEST_OBJ) 31 | $(CXX) -o $@ $^ 32 | 33 | $(TARGET): $(OBJ) 34 | $(CXX) -o $@ $^ 35 | 36 | %.o: %.cpp 37 | $(CXX) -o $@ $(CXXFLAGS) -c $^ 38 | 39 | %.o: %.c 40 | $(CC) -o $@ $(CFLAGS) -c $^ 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # bfp - Beyond Floating Point 2 | 3 | bfp is a C/C++ posit floating point format implementation. 4 | 5 | From Dr. John L. Gustafson's intro: 6 | 7 | "A new data type called a "posit" is designed for direct drop-in replacement for 8 | IEEE Standard 754 floats. Unlike unum arithmetic, posits do not require 9 | interval-type mathematics or variable size operands, and they round if an answer 10 | is inexact, much the way floats do. However, they provide compelling advantages 11 | over floats, including simpler hardware implementation that scales from as few 12 | as two-bit operands to thousands of bits. For any bit width, they have a larger 13 | dynamic range, higher accuracy, better closure under arithmetic operations, and 14 | simpler exception-handling. For example, posits never overflow to infinity or 15 | underflow to zero, and there is no "Not-a-Number" (NaN) value. Posits should 16 | take up less space to implement in silicon than an IEEE float of the same size. 17 | With fewer gate delays per operation as well as lower silicon footprint, the 18 | posit operations per second (POPS) supported by a chip can be significantly 19 | higher than the FLOPs using similar hardware resources. GPU accelerators, in 20 | particular, could do more arithmetic per watt and per dollar yet deliver 21 | superior answer quality." 22 | 23 | ### Goals 24 | 25 | - Create a human readable posit reference implementation 26 | - Can be used as a library 27 | - Can be used in embedded projects 28 | 29 | ### References 30 | 31 | - [Abstract](http://web.stanford.edu/class/ee380/Abstracts/170201.html) 32 | - [Paper](http://www.johngustafson.net/pdfs/BeatingFloatingPoint.pdf) 33 | - [Video presentation](https://www.youtube.com/watch?v=aP0Y1uAA-2Y) 34 | - [Posit slides](http://supercomputingfrontiers.com/2017/wp-content/uploads/2017/03/2_1100_John-Gustafson.pdf) ([previous slides](http://ee380.stanford.edu/Abstracts/170201-slides.pdf)) 35 | - [Unum slides](http://www.johngustafson.net/presentations/Unums2.0.pdf) 36 | -------------------------------------------------------------------------------- /lib/op1.c: -------------------------------------------------------------------------------- 1 | #include "op1.h" 2 | #include "op2.h" 3 | #include "util.h" 4 | 5 | static struct unpacked_t half(struct unpacked_t a) 6 | { 7 | struct unpacked_t r = a; 8 | 9 | r.exp--; 10 | 11 | return r; 12 | } 13 | 14 | struct unpacked_t op1_sqrt(struct unpacked_t a) 15 | { 16 | struct unpacked_t r = a; 17 | 18 | // initial guess: half exponent is the sqrt if we ignore fraction bits 19 | r.exp /= 2; 20 | 21 | for (int i = 0; i < 100; i++) { 22 | struct unpacked_t rn; 23 | 24 | // Newton-Raphson: rn = r - (r^2 - a) / (2 * r) = (r + a / r) / 2 25 | rn = half(op2_add(r, op2_div(a, r))); 26 | 27 | if (rn.exp == r.exp && rn.frac == r.frac) { 28 | break; 29 | } 30 | 31 | r = rn; 32 | } 33 | 34 | return r; 35 | } 36 | -------------------------------------------------------------------------------- /lib/op1.h: -------------------------------------------------------------------------------- 1 | #ifndef __POSIT_OP1_H 2 | #define __POSIT_OP1_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "pack.h" 9 | 10 | struct unpacked_t op1_sqrt(struct unpacked_t a); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /lib/op2.c: -------------------------------------------------------------------------------- 1 | #include "op2.h" 2 | #include "util.h" 3 | 4 | static struct unpacked_t add(struct unpacked_t a, struct unpacked_t b, bool neg) 5 | { 6 | struct unpacked_t r; 7 | 8 | POSIT_LUTYPE afrac = HIDDEN_BIT(a.frac); 9 | POSIT_LUTYPE bfrac = HIDDEN_BIT(b.frac); 10 | POSIT_LUTYPE frac; 11 | 12 | if (a.exp > b.exp) { 13 | r.exp = a.exp; 14 | bfrac = RSHIFT(bfrac, a.exp - b.exp); 15 | } else { 16 | r.exp = b.exp; 17 | afrac = RSHIFT(afrac, b.exp - a.exp); 18 | } 19 | 20 | frac = afrac + bfrac; 21 | if (RSHIFT(frac, POSIT_WIDTH) != 0) { 22 | r.exp++; 23 | frac = RSHIFT(frac, 1); 24 | } 25 | 26 | r.neg = neg; 27 | r.frac = LSHIFT(frac, 1); 28 | 29 | return r; 30 | } 31 | 32 | static struct unpacked_t sub(struct unpacked_t a, struct unpacked_t b, bool neg) 33 | { 34 | struct unpacked_t r; 35 | 36 | POSIT_UTYPE afrac = HIDDEN_BIT(a.frac); 37 | POSIT_UTYPE bfrac = HIDDEN_BIT(b.frac); 38 | POSIT_UTYPE frac; 39 | 40 | if (a.exp > b.exp || (a.exp == b.exp && a.frac > b.frac)) { 41 | r.exp = a.exp; 42 | bfrac = RSHIFT(bfrac, a.exp - b.exp); 43 | frac = afrac - bfrac; 44 | } else { 45 | neg = !neg; 46 | r.exp = b.exp; 47 | afrac = RSHIFT(afrac, b.exp - a.exp); 48 | frac = bfrac - afrac; 49 | } 50 | 51 | r.neg = neg; 52 | r.exp -= CLZ(frac); 53 | r.frac = LSHIFT(frac, CLZ(frac) + 1); 54 | 55 | return r; 56 | } 57 | 58 | struct unpacked_t op2_mul(struct unpacked_t a, struct unpacked_t b) 59 | { 60 | struct unpacked_t r; 61 | 62 | POSIT_LUTYPE afrac = HIDDEN_BIT(a.frac); 63 | POSIT_LUTYPE bfrac = HIDDEN_BIT(b.frac); 64 | POSIT_UTYPE frac = RSHIFT(afrac * bfrac, POSIT_WIDTH); 65 | POSIT_STYPE exp = a.exp + b.exp + 1; 66 | 67 | if ((frac & POSIT_MSB) == 0) { 68 | exp--; 69 | frac = LSHIFT(frac, 1); 70 | } 71 | 72 | r.neg = a.neg ^ b.neg; 73 | r.exp = exp; 74 | r.frac = LSHIFT(frac, 1); 75 | 76 | return r; 77 | } 78 | 79 | struct unpacked_t op2_div(struct unpacked_t a, struct unpacked_t b) 80 | { 81 | struct unpacked_t r; 82 | 83 | POSIT_LUTYPE afrac = HIDDEN_BIT(a.frac); 84 | POSIT_LUTYPE bfrac = HIDDEN_BIT(b.frac); 85 | POSIT_STYPE exp = a.exp - b.exp; 86 | 87 | if (afrac < bfrac) { 88 | exp--; 89 | bfrac = RSHIFT(bfrac, 1); 90 | } 91 | 92 | r.neg = a.neg ^ b.neg; 93 | r.exp = exp; 94 | r.frac = LSHIFT(afrac, POSIT_WIDTH) / bfrac; 95 | 96 | return r; 97 | } 98 | 99 | struct unpacked_t op2_add(struct unpacked_t a, struct unpacked_t b) 100 | { 101 | if (a.neg == b.neg) { 102 | return add(a, b, a.neg); 103 | } else { 104 | return sub(a, b, a.neg); 105 | } 106 | } 107 | 108 | struct unpacked_t op2_sub(struct unpacked_t a, struct unpacked_t b) 109 | { 110 | if (a.neg == b.neg) { 111 | return sub(a, b, a.neg); 112 | } else { 113 | return add(a, b, a.neg); 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /lib/op2.h: -------------------------------------------------------------------------------- 1 | #ifndef __POSIT_OP2_H 2 | #define __POSIT_OP2_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "pack.h" 9 | 10 | struct unpacked_t op2_mul(struct unpacked_t a, struct unpacked_t b); 11 | struct unpacked_t op2_div(struct unpacked_t a, struct unpacked_t b); 12 | struct unpacked_t op2_add(struct unpacked_t a, struct unpacked_t b); 13 | struct unpacked_t op2_sub(struct unpacked_t a, struct unpacked_t b); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /lib/pack.c: -------------------------------------------------------------------------------- 1 | #include "pack.h" 2 | #include "util.h" 3 | 4 | POSIT_UTYPE pack_posit(struct unpacked_t up, int nbits, int es) 5 | { 6 | POSIT_UTYPE p; 7 | POSIT_UTYPE regbits; 8 | POSIT_UTYPE expbits; 9 | 10 | // handle underflow and overflow. 11 | // in either case, exponent and fraction bits will disappear. 12 | int maxexp = POW2(es) * (nbits - 2); 13 | if (up.exp < -maxexp) { 14 | up.exp = -maxexp; 15 | } else if (up.exp > maxexp) { 16 | up.exp = maxexp; 17 | } 18 | 19 | int reg = FLOORDIV(up.exp, POW2(es)); 20 | int ss = util_ss(); 21 | int rs = MAX(-reg + 1, reg + 2); 22 | 23 | // FIXME: round exponent up if needed 24 | if (ss + rs + es >= nbits && up.frac >= POSIT_MSB) { 25 | up.exp++; 26 | reg = FLOORDIV(up.exp, POW2(es)); 27 | rs = MAX(-reg + 1, reg + 2); 28 | } 29 | 30 | POSIT_UTYPE exp = up.exp - POW2(es) * reg; 31 | 32 | if (reg < 0) { 33 | regbits = RSHIFT(POSIT_MSB, -reg); 34 | } else { 35 | regbits = LMASK(POSIT_MASK, reg + 1); 36 | } 37 | expbits = LMASK(LSHIFT(exp, POSIT_WIDTH - es), es); 38 | 39 | p = up.frac; 40 | p = expbits | RSHIFT(p, es); 41 | p = regbits | RSHIFT(p, rs); 42 | p = RSHIFT(p, ss); 43 | 44 | if (up.neg) { 45 | return util_neg(p, nbits); 46 | } else { 47 | return LMASK(p, nbits); 48 | } 49 | } 50 | 51 | float pack_float(struct unpacked_t up) 52 | { 53 | int fexp = up.exp + 127; 54 | 55 | // left aligned 56 | uint32_t fexpbits; 57 | uint32_t ffracbits; 58 | 59 | if (fexp > 254) { 60 | // overflow, set maximum value 61 | fexpbits = LSHIFT(254, 24); 62 | ffracbits = -1; 63 | } else if (fexp < 1) { 64 | // underflow, pack as denormal 65 | fexpbits = 0; 66 | #if POSIT_WIDTH <= 32 67 | ffracbits = LSHIFT((uint32_t)(POSIT_MSB | RSHIFT(up.frac, 1)), 32 - POSIT_WIDTH); 68 | #else 69 | ffracbits = RSHIFT(POSIT_MSB | RSHIFT(up.frac, 1), POSIT_WIDTH - 32); 70 | #endif 71 | ffracbits = RSHIFT(ffracbits, -fexp); 72 | } else { 73 | fexpbits = LSHIFT(fexp & 0xFF, 24); 74 | #if POSIT_WIDTH <= 32 75 | ffracbits = LSHIFT((uint32_t)up.frac, 32 - POSIT_WIDTH); 76 | #else 77 | ffracbits = RSHIFT(up.frac, POSIT_WIDTH - 32); 78 | #endif 79 | } 80 | 81 | union { 82 | float f; 83 | uint32_t u; 84 | } un; 85 | 86 | un.u = ffracbits; 87 | un.u = fexpbits | RSHIFT(un.u, 8); 88 | un.u = LSHIFT((uint32_t)up.neg, 31) | RSHIFT(un.u, 1); 89 | 90 | // don't underflow to zero 91 | if (LSHIFT(un.u, 1) == 0) { 92 | un.u++; 93 | } 94 | 95 | return un.f; 96 | } 97 | 98 | double pack_double(struct unpacked_t up) 99 | { 100 | int fexp = up.exp + 1023; 101 | 102 | // left aligned 103 | uint64_t fexpbits; 104 | uint64_t ffracbits; 105 | 106 | if (fexp > 2046) { 107 | // overflow, set maximum value 108 | fexpbits = LSHIFT((uint64_t)2046, 53); 109 | ffracbits = -1; 110 | } else if (fexp < 1) { 111 | // underflow, pack as denormal 112 | fexpbits = 0; 113 | #if POSIT_WIDTH <= 64 114 | ffracbits = LSHIFT((uint64_t)(POSIT_MSB | RSHIFT(up.frac, 1)), 64 - POSIT_WIDTH); 115 | #else 116 | ffracbits = RSHIFT(POSIT_MSB | RSHIFT(up.frac, 1), POSIT_WIDTH - 64); 117 | #endif 118 | ffracbits = RSHIFT(ffracbits, -fexp); 119 | } else { 120 | fexpbits = LSHIFT((uint64_t)(fexp & 0x7FF), 53); 121 | #if POSIT_WIDTH <= 64 122 | ffracbits = LSHIFT((uint64_t)up.frac, 64 - POSIT_WIDTH); 123 | #else 124 | ffracbits = RSHIFT(up.frac, POSIT_WIDTH - 64); 125 | #endif 126 | } 127 | 128 | union { 129 | double f; 130 | uint64_t u; 131 | } un; 132 | 133 | un.u = ffracbits; 134 | un.u = fexpbits | RSHIFT(un.u, 11); 135 | un.u = LSHIFT((uint64_t)up.neg, 63) | RSHIFT(un.u, 1); 136 | 137 | // don't underflow to zero 138 | if (LSHIFT(un.u, 1) == 0) { 139 | un.u++; 140 | } 141 | 142 | return un.f; 143 | } 144 | 145 | struct unpacked_t unpack_posit(POSIT_UTYPE p, int nbits, int es) 146 | { 147 | struct unpacked_t up; 148 | 149 | bool neg = util_is_neg(p); 150 | if (neg) { 151 | p = util_neg(p, nbits); 152 | } 153 | 154 | int ss = util_ss(); 155 | int rs = util_rs(p, nbits); 156 | 157 | int lz = CLZ(LSHIFT(p, ss)); 158 | int lo = CLZ(LSHIFT(~p, ss) | 1); // add LSB to compensate for sign bit 159 | 160 | int reg = (lz == 0 ? lo - 1 : -lz); 161 | POSIT_UTYPE exp = RSHIFT(LSHIFT(p, ss + rs), POSIT_WIDTH - es); 162 | 163 | up.neg = neg; 164 | up.exp = POW2(es) * reg + exp; 165 | up.frac = LSHIFT(p, ss + rs + es); 166 | 167 | return up; 168 | } 169 | 170 | struct unpacked_t unpack_float(float f) 171 | { 172 | struct unpacked_t up; 173 | int bias = 127; 174 | 175 | union { 176 | float f; 177 | uint32_t u; 178 | } un; 179 | 180 | un.f = f; 181 | 182 | up.neg = RSHIFT(un.u, 31); 183 | up.exp = (RSHIFT(un.u, 23) & 0xFF) - bias; 184 | #if POSIT_WIDTH <= 32 185 | up.frac = RSHIFT(LSHIFT(un.u, 9), 32 - POSIT_WIDTH); 186 | #else 187 | up.frac = LSHIFT((POSIT_UTYPE)un.u, POSIT_WIDTH - 32 + 9); 188 | #endif 189 | 190 | if (up.exp == -bias) { 191 | // normalize 192 | // FIXME: some precision is lost if frac was downcasted 193 | up.exp -= CLZ(up.frac); 194 | up.frac = LSHIFT(up.frac, CLZ(up.frac) + 1); 195 | } 196 | 197 | return up; 198 | } 199 | 200 | struct unpacked_t unpack_double(double f) 201 | { 202 | struct unpacked_t up; 203 | int bias = 1023; 204 | 205 | union { 206 | double f; 207 | uint64_t u; 208 | } un; 209 | 210 | un.f = f; 211 | 212 | up.neg = RSHIFT(un.u, 63); 213 | up.exp = (RSHIFT(un.u, 52) & 0x7FF) - bias; 214 | #if POSIT_WIDTH <= 64 215 | up.frac = RSHIFT(LSHIFT(un.u, 12), 64 - POSIT_WIDTH); 216 | #else 217 | up.frac = LSHIFT((POSIT_UTYPE)un.u, POSIT_WIDTH - 64 + 12); 218 | #endif 219 | 220 | if (up.exp == -bias) { 221 | // normalize 222 | // FIXME: some precision is lost if frac was downcasted 223 | up.exp -= CLZ(up.frac); 224 | up.frac = LSHIFT(up.frac, CLZ(up.frac) + 1); 225 | } 226 | 227 | return up; 228 | } 229 | -------------------------------------------------------------------------------- /lib/pack.h: -------------------------------------------------------------------------------- 1 | #ifndef __POSIT_PACK_H 2 | #define __POSIT_PACK_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "posit_types.h" 9 | 10 | struct unpacked_t 11 | { 12 | bool neg; 13 | POSIT_STYPE exp; 14 | POSIT_UTYPE frac; 15 | }; 16 | 17 | POSIT_UTYPE pack_posit(struct unpacked_t up, int nbits, int es); 18 | float pack_float(struct unpacked_t up); 19 | double pack_double(struct unpacked_t up); 20 | 21 | struct unpacked_t unpack_posit(POSIT_UTYPE p, int nbits, int es); 22 | struct unpacked_t unpack_float(float f); 23 | struct unpacked_t unpack_double(double f); 24 | 25 | #ifdef __cplusplus 26 | } 27 | #endif 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /lib/posit.cpp: -------------------------------------------------------------------------------- 1 | #include "posit.h" 2 | #include "util.h" 3 | #include "pack.h" 4 | #include "op1.h" 5 | #include "op2.h" 6 | 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | Posit::Posit(POSIT_UTYPE bits, int nbits, int es) : 13 | mBits(bits), 14 | mNbits(nbits), 15 | mEs(es) 16 | { 17 | } 18 | 19 | Posit::Posit(int nbits, int es) : 20 | Posit(POSIT_ZERO, nbits, es) 21 | { 22 | } 23 | 24 | bool Posit::isZero() const 25 | { 26 | return util_is_zero(mBits); 27 | } 28 | 29 | bool Posit::isNar() const 30 | { 31 | return util_is_nar(mBits); 32 | } 33 | 34 | bool Posit::isNeg() const 35 | { 36 | return util_is_neg(mBits); 37 | } 38 | 39 | int Posit::nbits() const 40 | { 41 | return mNbits; 42 | } 43 | 44 | int Posit::ss() const 45 | { 46 | return util_ss(); 47 | } 48 | 49 | int Posit::rs() const 50 | { 51 | return util_rs(mBits, mNbits); 52 | } 53 | 54 | int Posit::es() const 55 | { 56 | return util_es(mBits, mNbits, mEs); 57 | } 58 | 59 | int Posit::fs() const 60 | { 61 | return util_fs(mBits, mNbits, mEs); 62 | } 63 | 64 | Posit Posit::zero() const 65 | { 66 | return Posit(POSIT_ZERO, mNbits, mEs); 67 | } 68 | 69 | Posit Posit::one() const 70 | { 71 | return Posit(POSIT_ONE, mNbits, mEs); 72 | } 73 | 74 | Posit Posit::nar() const 75 | { 76 | return Posit(POSIT_NAR, mNbits, mEs); 77 | } 78 | 79 | Posit Posit::neg() const 80 | { 81 | if (isNar()) { 82 | return nar(); 83 | } 84 | 85 | return Posit(util_neg(mBits, mNbits), mNbits, mEs); 86 | } 87 | 88 | Posit Posit::rec() const 89 | { 90 | if (isNar() || isZero()) { 91 | return nar(); 92 | } 93 | 94 | return one().div(*this); 95 | } 96 | 97 | Posit Posit::sqrt() const 98 | { 99 | if (isNar() || isNeg()) { 100 | return nar(); 101 | } else if (isZero()) { 102 | return zero(); 103 | } 104 | 105 | unpacked_t aup = unpack_posit(mBits, mNbits, mEs); 106 | unpacked_t up = op1_sqrt(aup); 107 | 108 | return Posit(pack_posit(up, mNbits, mEs), mNbits, mEs); 109 | } 110 | 111 | Posit Posit::add(const Posit& p) const 112 | { 113 | if (isNar() || p.isNar()) { 114 | return nar(); 115 | } else if (isZero()) { 116 | return p; 117 | } else if (p.isZero()) { 118 | return *this; 119 | } else if (neg().eq(p)) { 120 | return zero(); 121 | } 122 | 123 | unpacked_t aup = unpack_posit(mBits, mNbits, mEs); 124 | unpacked_t bup = unpack_posit(p.mBits, p.mNbits, p.mEs); 125 | unpacked_t up = op2_add(aup, bup); 126 | 127 | return Posit(pack_posit(up, mNbits, mEs), mNbits, mEs); 128 | } 129 | 130 | Posit Posit::sub(const Posit& p) const 131 | { 132 | if (isNar() || p.isNar()) { 133 | return nar(); 134 | } else if (isZero()) { 135 | return p.neg(); 136 | } else if (p.isZero()) { 137 | return *this; 138 | } else if (eq(p)) { 139 | return zero(); 140 | } 141 | 142 | unpacked_t aup = unpack_posit(mBits, mNbits, mEs); 143 | unpacked_t bup = unpack_posit(p.mBits, p.mNbits, p.mEs); 144 | unpacked_t up = op2_sub(aup, bup); 145 | 146 | return Posit(pack_posit(up, mNbits, mEs), mNbits, mEs); 147 | } 148 | 149 | Posit Posit::mul(const Posit& p) const 150 | { 151 | if (isNar() || p.isNar()) { 152 | return nar(); 153 | } else if (isZero() || p.isZero()) { 154 | return zero(); 155 | } 156 | 157 | unpacked_t aup = unpack_posit(mBits, mNbits, mEs); 158 | unpacked_t bup = unpack_posit(p.mBits, p.mNbits, p.mEs); 159 | unpacked_t up = op2_mul(aup, bup); 160 | 161 | return Posit(pack_posit(up, mNbits, mEs), mNbits, mEs); 162 | } 163 | 164 | Posit Posit::div(const Posit& p) const 165 | { 166 | if (isNar() || p.isNar() || p.isZero()) { 167 | return nar(); 168 | } else if (isZero()) { 169 | return zero(); 170 | } 171 | 172 | unpacked_t aup = unpack_posit(mBits, mNbits, mEs); 173 | unpacked_t bup = unpack_posit(p.mBits, p.mNbits, p.mEs); 174 | unpacked_t up = op2_div(aup, bup); 175 | 176 | return Posit(pack_posit(up, mNbits, mEs), mNbits, mEs); 177 | } 178 | 179 | bool Posit::eq(const Posit& p) const 180 | { 181 | return mBits == p.mBits; 182 | } 183 | 184 | bool Posit::gt(const Posit& p) const 185 | { 186 | return (POSIT_STYPE)mBits > (POSIT_STYPE)p.mBits; 187 | } 188 | 189 | bool Posit::ge(const Posit& p) const 190 | { 191 | return gt(p) || eq(p); 192 | } 193 | 194 | bool Posit::lt(const Posit& p) const 195 | { 196 | return !gt(p) && !eq(p); 197 | } 198 | 199 | bool Posit::le(const Posit& p) const 200 | { 201 | return !gt(p); 202 | } 203 | 204 | void Posit::set(Posit p) 205 | { 206 | mBits = pack_posit(unpack_posit(p.mBits, p.mNbits, p.mEs), mNbits, mEs); 207 | } 208 | 209 | void Posit::set(float n) 210 | { 211 | switch (fpclassify(n)) { 212 | case FP_INFINITE: 213 | case FP_NAN: 214 | mBits = POSIT_NAR; 215 | break; 216 | case FP_ZERO: 217 | mBits = POSIT_ZERO; 218 | break; 219 | default: 220 | mBits = pack_posit(unpack_float(n), mNbits, mEs); 221 | break; 222 | } 223 | } 224 | 225 | void Posit::set(double n) 226 | { 227 | switch (fpclassify(n)) { 228 | case FP_INFINITE: 229 | case FP_NAN: 230 | mBits = POSIT_NAR; 231 | break; 232 | case FP_ZERO: 233 | mBits = POSIT_ZERO; 234 | break; 235 | default: 236 | mBits = pack_posit(unpack_double(n), mNbits, mEs); 237 | break; 238 | } 239 | } 240 | 241 | float Posit::getFloat() const 242 | { 243 | if (isZero()) { 244 | return 0.f; 245 | } else if (isNar()) { 246 | return 0.f / 0.f; 247 | } 248 | 249 | return pack_float(unpack_posit(mBits, mNbits, mEs)); 250 | } 251 | 252 | double Posit::getDouble() const 253 | { 254 | if (isZero()) { 255 | return 0.0; 256 | } else if (isNar()) { 257 | return 0.0 / 0.0; 258 | } 259 | 260 | return pack_double(unpack_posit(mBits, mNbits, mEs)); 261 | } 262 | 263 | void Posit::setBits(POSIT_UTYPE bits) 264 | { 265 | mBits = LSHIFT(bits, POSIT_WIDTH - mNbits); 266 | } 267 | 268 | POSIT_UTYPE Posit::getBits() 269 | { 270 | return RSHIFT(mBits, POSIT_WIDTH - mNbits); 271 | } 272 | 273 | void Posit::print() 274 | { 275 | Posit p = (isNeg() ? neg() : *this); 276 | 277 | printf("{%d, %d} ", mNbits, mEs); 278 | 279 | if (isNar()) { 280 | printf("NaR\n"); 281 | return; 282 | } 283 | 284 | for (int i = POSIT_WIDTH - 1; i >= POSIT_WIDTH - mNbits; i--) { 285 | printf("%d", RSHIFT(mBits, i) & 1); 286 | } 287 | 288 | printf(" -> "); 289 | printf(isNeg() ? "-" : "+"); 290 | 291 | for (int i = POSIT_WIDTH - ss() - 1; i >= POSIT_WIDTH - mNbits; i--) { 292 | printf("%d", RSHIFT(p.mBits, i) & 1); 293 | 294 | if (i != POSIT_WIDTH - mNbits && 295 | ((i == POSIT_WIDTH - ss() - p.rs()) || 296 | (i == POSIT_WIDTH - ss() - p.rs() - mEs))) { 297 | printf(" "); 298 | } 299 | } 300 | 301 | printf(" = %lg\n", getDouble()); 302 | } 303 | 304 | Posit8::Posit8() : 305 | Posit(8, 0) 306 | { 307 | 308 | } 309 | 310 | Posit8::Posit8(Posit v) : 311 | Posit8() 312 | { 313 | set(v); 314 | } 315 | 316 | Posit8::Posit8(float v) : 317 | Posit8() 318 | { 319 | set(v); 320 | } 321 | 322 | Posit8::Posit8(double v) : 323 | Posit8() 324 | { 325 | set(v); 326 | } 327 | 328 | Posit16::Posit16() : 329 | Posit(16, 1) 330 | { 331 | 332 | } 333 | 334 | Posit16::Posit16(Posit v) : 335 | Posit16() 336 | { 337 | set(v); 338 | } 339 | 340 | Posit16::Posit16(float v) : 341 | Posit16() 342 | { 343 | set(v); 344 | } 345 | 346 | Posit16::Posit16(double v) : 347 | Posit16() 348 | { 349 | set(v); 350 | } 351 | 352 | Posit32::Posit32() : 353 | Posit(32, 2) 354 | { 355 | 356 | } 357 | 358 | Posit32::Posit32(Posit v) : 359 | Posit32() 360 | { 361 | set(v); 362 | } 363 | 364 | Posit32::Posit32(float v) : 365 | Posit32() 366 | { 367 | set(v); 368 | } 369 | 370 | Posit32::Posit32(double v) : 371 | Posit32() 372 | { 373 | set(v); 374 | } 375 | 376 | Posit operator+(const Posit& a, const Posit& b) 377 | { 378 | return a.add(b); 379 | } 380 | 381 | Posit operator-(const Posit& a, const Posit& b) 382 | { 383 | return a.sub(b); 384 | } 385 | 386 | Posit operator*(const Posit& a, const Posit& b) 387 | { 388 | return a.mul(b); 389 | } 390 | 391 | Posit operator/(const Posit& a, const Posit& b) 392 | { 393 | return a.div(b); 394 | } 395 | 396 | Posit operator-(const Posit& a) 397 | { 398 | return a.neg(); 399 | } 400 | 401 | bool operator<(const Posit&a , const Posit& b) 402 | { 403 | return a.lt(b); 404 | } 405 | 406 | bool operator<=(const Posit&a , const Posit& b) 407 | { 408 | return a.le(b); 409 | } 410 | 411 | bool operator>(const Posit&a , const Posit& b) 412 | { 413 | return a.gt(b); 414 | } 415 | 416 | bool operator>=(const Posit&a , const Posit& b) 417 | { 418 | return a.ge(b); 419 | } 420 | bool operator==(const Posit&a , const Posit& b) 421 | { 422 | return a.eq(b); 423 | } 424 | 425 | bool operator!=(const Posit&a , const Posit& b) 426 | { 427 | return !a.eq(b); 428 | } 429 | -------------------------------------------------------------------------------- /lib/posit.h: -------------------------------------------------------------------------------- 1 | #ifndef __POSIT_H 2 | #define __POSIT_H 3 | 4 | #include "posit_types.h" 5 | 6 | class Posit { 7 | private: 8 | POSIT_UTYPE mBits; 9 | int mNbits; 10 | int mEs; 11 | 12 | public: 13 | Posit(POSIT_UTYPE bits, int nbits, int es); 14 | Posit(int nbits, int es); 15 | 16 | bool isZero() const; // check for 0 17 | bool isNar() const; // check for NaR 18 | bool isNeg() const; // check for negative 19 | 20 | int nbits() const; // size in bits 21 | int ss() const; // sign size in bits 22 | int rs() const; // regime size in bits 23 | int es() const; // exponent size in bits 24 | int fs() const; // fraction size in bits 25 | 26 | Posit zero() const; // 0 27 | Posit one() const; // 1 28 | Posit nar() const; // NaR 29 | 30 | Posit neg() const; // -x 31 | Posit rec() const; // 1 / x 32 | Posit sqrt() const; // sqrt(x) 33 | 34 | Posit add(const Posit& p) const; // x + p 35 | Posit sub(const Posit& p) const; // x - p 36 | Posit mul(const Posit& p) const; // x * p 37 | Posit div(const Posit& p) const; // x / p 38 | 39 | bool eq(const Posit& p) const; // x == p 40 | bool gt(const Posit& p) const; // x > p 41 | bool ge(const Posit& p) const; // x >= p 42 | bool lt(const Posit& p) const; // x < p 43 | bool le(const Posit& p) const; // x <= p 44 | 45 | void set(Posit p); // x = p 46 | void set(float n); // x = n 47 | void set(double n); // x = n 48 | 49 | float getFloat() const; // n = x 50 | double getDouble() const; // n = x 51 | 52 | // debug 53 | void setBits(POSIT_UTYPE bits); 54 | POSIT_UTYPE getBits(); 55 | void print(); 56 | }; 57 | 58 | class Posit8 : public Posit 59 | { 60 | public: 61 | Posit8(); 62 | Posit8(Posit v); 63 | Posit8(float v); 64 | Posit8(double v); 65 | }; 66 | 67 | class Posit16 : public Posit 68 | { 69 | public: 70 | Posit16(); 71 | Posit16(Posit v); 72 | Posit16(float v); 73 | Posit16(double v); 74 | }; 75 | 76 | class Posit32 : public Posit 77 | { 78 | public: 79 | Posit32(); 80 | Posit32(Posit v); 81 | Posit32(float v); 82 | Posit32(double v); 83 | }; 84 | 85 | Posit operator+(const Posit& a, const Posit& b); 86 | Posit operator-(const Posit& a, const Posit& b); 87 | Posit operator*(const Posit& a, const Posit& b); 88 | Posit operator/(const Posit& a, const Posit& b); 89 | 90 | Posit operator-(const Posit& a); 91 | 92 | bool operator<(const Posit&a , const Posit& b); 93 | bool operator<=(const Posit&a , const Posit& b); 94 | bool operator>(const Posit&a , const Posit& b); 95 | bool operator>=(const Posit&a , const Posit& b); 96 | bool operator==(const Posit&a , const Posit& b); 97 | bool operator!=(const Posit&a , const Posit& b); 98 | 99 | #endif 100 | -------------------------------------------------------------------------------- /lib/posit_types.h: -------------------------------------------------------------------------------- 1 | #ifndef __POSIT_TYPES_H 2 | #define __POSIT_TYPES_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include 10 | 11 | #define POSIT_LUTYPE uint64_t 12 | #define POSIT_UTYPE uint32_t 13 | #define POSIT_STYPE int32_t 14 | #define POSIT_WIDTH 32 15 | #define POSIT_ZERO ((POSIT_UTYPE)0x00000000) 16 | #define POSIT_ONE ((POSIT_UTYPE)0x40000000) 17 | #define POSIT_NAR ((POSIT_UTYPE)0x80000000) 18 | #define POSIT_MSB ((POSIT_UTYPE)0x80000000) 19 | #define POSIT_MASK ((POSIT_UTYPE)0xFFFFFFFF) 20 | 21 | #ifdef __cplusplus 22 | } 23 | #endif 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /lib/util.c: -------------------------------------------------------------------------------- 1 | #include "util.h" 2 | 3 | bool util_is_zero(POSIT_UTYPE p) 4 | { 5 | return p == POSIT_ZERO; 6 | } 7 | 8 | bool util_is_nar(POSIT_UTYPE p) 9 | { 10 | return p == POSIT_NAR; 11 | } 12 | 13 | bool util_is_neg(POSIT_UTYPE p) 14 | { 15 | return (POSIT_STYPE)p < 0 && !util_is_nar(p); 16 | } 17 | 18 | int util_ss() 19 | { 20 | return 1; 21 | } 22 | 23 | int util_rs(POSIT_UTYPE p, int nbits) 24 | { 25 | int ss = util_ss(); 26 | int lz = CLZ(LSHIFT(p, ss)); 27 | int lo = CLZ(LSHIFT(~p, ss)); 28 | int rs = MAX(lz, lo) + 1; 29 | 30 | return MIN(rs, nbits - ss); 31 | } 32 | 33 | int util_es(POSIT_UTYPE p, int nbits, int es) 34 | { 35 | int ss = util_ss(); 36 | int rs = util_rs(p, nbits); 37 | 38 | return MIN(MAX(nbits - ss - rs, 0), es); 39 | } 40 | 41 | int util_fs(POSIT_UTYPE p, int nbits, int es) 42 | { 43 | int ss = util_ss(); 44 | int rs = util_rs(p, nbits); 45 | 46 | return MAX(nbits - ss - rs - es, 0); 47 | } 48 | 49 | POSIT_UTYPE util_neg(POSIT_UTYPE p, int nbits) 50 | { 51 | // reverse all bits and add one 52 | return LMASK(-LMASK(p, nbits), nbits); 53 | } 54 | -------------------------------------------------------------------------------- /lib/util.h: -------------------------------------------------------------------------------- 1 | #ifndef __POSIT_UTIL_H 2 | #define __POSIT_UTIL_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "posit_types.h" 9 | 10 | #ifdef __GNUC__ 11 | #if POSIT_WIDTH > 32 12 | #error "CLZ doesn't support 64-bit" 13 | #endif 14 | #define CLZ(n) \ 15 | ((n) == 0 ? 8 * sizeof(n) : __builtin_clz(n)) 16 | #endif 17 | 18 | // shift count wraps around on x86: https://stackoverflow.com/q/3871650 19 | #define LSHIFT(bits, shift) \ 20 | ((shift) >= (int)(8 * sizeof(bits)) ? 0 : (bits) << (shift)) 21 | 22 | // shift count wraps around on x86: https://stackoverflow.com/q/3871650 23 | #define RSHIFT(bits, shift) \ 24 | ((shift) >= (int)(8 * sizeof(bits)) ? 0 : (bits) >> (shift)) 25 | 26 | #define POW2(n) \ 27 | (LSHIFT(1, (n))) 28 | 29 | #define FLOORDIV(a, b) \ 30 | ((a) / (b) - ((a) % (b) < 0)) 31 | 32 | #define MIN(a, b) \ 33 | ((a) < (b) ? (a) : (b)) 34 | 35 | #define MAX(a, b) \ 36 | ((a) > (b) ? (a) : (b)) 37 | 38 | #define LMASK(bits, size) \ 39 | ((bits) & LSHIFT(POSIT_MASK, POSIT_WIDTH - (size))) 40 | 41 | #define HIDDEN_BIT(frac) \ 42 | (POSIT_MSB | RSHIFT((frac), 1)) 43 | 44 | bool util_is_zero(POSIT_UTYPE p); 45 | bool util_is_nar(POSIT_UTYPE p); 46 | bool util_is_neg(POSIT_UTYPE p); 47 | 48 | int util_ss(); 49 | int util_rs(POSIT_UTYPE p, int nbits); 50 | int util_es(POSIT_UTYPE p, int nbits, int es); 51 | int util_fs(POSIT_UTYPE p, int nbits, int es); 52 | 53 | POSIT_UTYPE util_neg(POSIT_UTYPE p, int nbits); 54 | 55 | #ifdef __cplusplus 56 | } 57 | #endif 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include "posit.h" 2 | 3 | #include 4 | 5 | int main(int argc, char *argv[]) 6 | { 7 | auto p = Posit(5, 1); 8 | 9 | for (unsigned i = 0; i < (unsigned)(1 << p.nbits()); i++) { 10 | p.setBits(i); 11 | p.print(); 12 | } 13 | 14 | return 0; 15 | } 16 | -------------------------------------------------------------------------------- /test/CuTest.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "CuTest.h" 9 | 10 | /*-------------------------------------------------------------------------* 11 | * CuStr 12 | *-------------------------------------------------------------------------*/ 13 | 14 | char* CuStrAlloc(int size) 15 | { 16 | char* newStr = (char*) malloc( sizeof(char) * (size) ); 17 | return newStr; 18 | } 19 | 20 | char* CuStrCopy(const char* old) 21 | { 22 | int len = strlen(old); 23 | char* newStr = CuStrAlloc(len + 1); 24 | strcpy(newStr, old); 25 | return newStr; 26 | } 27 | 28 | /*-------------------------------------------------------------------------* 29 | * CuString 30 | *-------------------------------------------------------------------------*/ 31 | 32 | void CuStringInit(CuString* str) 33 | { 34 | str->length = 0; 35 | str->size = STRING_MAX; 36 | str->buffer = (char*) malloc(sizeof(char) * str->size); 37 | str->buffer[0] = '\0'; 38 | } 39 | 40 | CuString* CuStringNew(void) 41 | { 42 | CuString* str = (CuString*) malloc(sizeof(CuString)); 43 | str->length = 0; 44 | str->size = STRING_MAX; 45 | str->buffer = (char*) malloc(sizeof(char) * str->size); 46 | str->buffer[0] = '\0'; 47 | return str; 48 | } 49 | 50 | void CuStringDelete(CuString *str) 51 | { 52 | if (!str) return; 53 | free(str->buffer); 54 | free(str); 55 | } 56 | 57 | void CuStringResize(CuString* str, int newSize) 58 | { 59 | str->buffer = (char*) realloc(str->buffer, sizeof(char) * newSize); 60 | str->size = newSize; 61 | } 62 | 63 | void CuStringAppend(CuString* str, const char* text) 64 | { 65 | int length; 66 | 67 | if (text == NULL) { 68 | text = "NULL"; 69 | } 70 | 71 | length = strlen(text); 72 | if (str->length + length + 1 >= str->size) 73 | CuStringResize(str, str->length + length + 1 + STRING_INC); 74 | str->length += length; 75 | strcat(str->buffer, text); 76 | } 77 | 78 | void CuStringAppendChar(CuString* str, char ch) 79 | { 80 | char text[2]; 81 | text[0] = ch; 82 | text[1] = '\0'; 83 | CuStringAppend(str, text); 84 | } 85 | 86 | void CuStringAppendFormat(CuString* str, const char* format, ...) 87 | { 88 | va_list argp; 89 | char buf[HUGE_STRING_LEN]; 90 | va_start(argp, format); 91 | vsprintf(buf, format, argp); 92 | va_end(argp); 93 | CuStringAppend(str, buf); 94 | } 95 | 96 | void CuStringInsert(CuString* str, const char* text, int pos) 97 | { 98 | int length = strlen(text); 99 | if (pos > str->length) 100 | pos = str->length; 101 | if (str->length + length + 1 >= str->size) 102 | CuStringResize(str, str->length + length + 1 + STRING_INC); 103 | memmove(str->buffer + pos + length, str->buffer + pos, (str->length - pos) + 1); 104 | str->length += length; 105 | memcpy(str->buffer + pos, text, length); 106 | } 107 | 108 | /*-------------------------------------------------------------------------* 109 | * CuTest 110 | *-------------------------------------------------------------------------*/ 111 | 112 | void CuTestInit(CuTest* t, const char* name, TestFunction function) 113 | { 114 | t->name = CuStrCopy(name); 115 | t->failed = 0; 116 | t->ran = 0; 117 | t->message = NULL; 118 | t->function = function; 119 | t->jumpBuf = NULL; 120 | } 121 | 122 | CuTest* CuTestNew(const char* name, TestFunction function) 123 | { 124 | CuTest* tc = CU_ALLOC(CuTest); 125 | CuTestInit(tc, name, function); 126 | return tc; 127 | } 128 | 129 | void CuTestDelete(CuTest *t) 130 | { 131 | if (!t) return; 132 | free(t->name); 133 | free(t); 134 | } 135 | 136 | void CuTestRun(CuTest* tc) 137 | { 138 | jmp_buf buf; 139 | tc->jumpBuf = &buf; 140 | if (setjmp(buf) == 0) 141 | { 142 | tc->ran = 1; 143 | (tc->function)(tc); 144 | } 145 | tc->jumpBuf = 0; 146 | } 147 | 148 | static void CuFailInternal(CuTest* tc, const char* file, int line, CuString* string) 149 | { 150 | char buf[HUGE_STRING_LEN]; 151 | 152 | sprintf(buf, "%s:%d: ", file, line); 153 | CuStringInsert(string, buf, 0); 154 | 155 | tc->failed = 1; 156 | tc->message = string->buffer; 157 | if (tc->jumpBuf != 0) longjmp(*(tc->jumpBuf), 0); 158 | } 159 | 160 | void CuFail_Line(CuTest* tc, const char* file, int line, const char* message2, const char* message) 161 | { 162 | CuString string; 163 | 164 | CuStringInit(&string); 165 | if (message2 != NULL) 166 | { 167 | CuStringAppend(&string, message2); 168 | CuStringAppend(&string, ": "); 169 | } 170 | CuStringAppend(&string, message); 171 | CuFailInternal(tc, file, line, &string); 172 | } 173 | 174 | void CuAssert_Line(CuTest* tc, const char* file, int line, const char* message, int condition) 175 | { 176 | if (condition) return; 177 | CuFail_Line(tc, file, line, NULL, message); 178 | } 179 | 180 | void CuAssertStrEquals_LineMsg(CuTest* tc, const char* file, int line, const char* message, 181 | const char* expected, const char* actual) 182 | { 183 | CuString string; 184 | if ((expected == NULL && actual == NULL) || 185 | (expected != NULL && actual != NULL && 186 | strcmp(expected, actual) == 0)) 187 | { 188 | return; 189 | } 190 | 191 | CuStringInit(&string); 192 | if (message != NULL) 193 | { 194 | CuStringAppend(&string, message); 195 | CuStringAppend(&string, ": "); 196 | } 197 | CuStringAppend(&string, "expected <"); 198 | CuStringAppend(&string, expected); 199 | CuStringAppend(&string, "> but was <"); 200 | CuStringAppend(&string, actual); 201 | CuStringAppend(&string, ">"); 202 | CuFailInternal(tc, file, line, &string); 203 | } 204 | 205 | void CuAssertIntEquals_LineMsg(CuTest* tc, const char* file, int line, const char* message, 206 | int expected, int actual) 207 | { 208 | char buf[STRING_MAX]; 209 | if (expected == actual) return; 210 | sprintf(buf, "expected <%d> but was <%d>", expected, actual); 211 | CuFail_Line(tc, file, line, message, buf); 212 | } 213 | 214 | void CuAssertDblEquals_LineMsg(CuTest* tc, const char* file, int line, const char* message, 215 | double expected, double actual, double delta) 216 | { 217 | char buf[STRING_MAX]; 218 | if (fabs(expected - actual) <= delta) return; 219 | sprintf(buf, "expected <%f> but was <%f>", expected, actual); 220 | 221 | CuFail_Line(tc, file, line, message, buf); 222 | } 223 | 224 | void CuAssertPtrEquals_LineMsg(CuTest* tc, const char* file, int line, const char* message, 225 | void* expected, void* actual) 226 | { 227 | char buf[STRING_MAX]; 228 | if (expected == actual) return; 229 | sprintf(buf, "expected pointer <0x%p> but was <0x%p>", expected, actual); 230 | CuFail_Line(tc, file, line, message, buf); 231 | } 232 | 233 | 234 | /*-------------------------------------------------------------------------* 235 | * CuSuite 236 | *-------------------------------------------------------------------------*/ 237 | 238 | void CuSuiteInit(CuSuite* testSuite) 239 | { 240 | testSuite->count = 0; 241 | testSuite->failCount = 0; 242 | memset(testSuite->list, 0, sizeof(testSuite->list)); 243 | } 244 | 245 | CuSuite* CuSuiteNew(void) 246 | { 247 | CuSuite* testSuite = CU_ALLOC(CuSuite); 248 | CuSuiteInit(testSuite); 249 | return testSuite; 250 | } 251 | 252 | void CuSuiteDelete(CuSuite *testSuite) 253 | { 254 | unsigned int n; 255 | for (n=0; n < MAX_TEST_CASES; n++) 256 | { 257 | if (testSuite->list[n]) 258 | { 259 | CuTestDelete(testSuite->list[n]); 260 | } 261 | } 262 | free(testSuite); 263 | 264 | } 265 | 266 | void CuSuiteAdd(CuSuite* testSuite, CuTest *testCase) 267 | { 268 | assert(testSuite->count < MAX_TEST_CASES); 269 | testSuite->list[testSuite->count] = testCase; 270 | testSuite->count++; 271 | } 272 | 273 | void CuSuiteAddSuite(CuSuite* testSuite, CuSuite* testSuite2) 274 | { 275 | int i; 276 | for (i = 0 ; i < testSuite2->count ; ++i) 277 | { 278 | CuTest* testCase = testSuite2->list[i]; 279 | CuSuiteAdd(testSuite, testCase); 280 | } 281 | } 282 | 283 | void CuSuiteRun(CuSuite* testSuite) 284 | { 285 | int i; 286 | for (i = 0 ; i < testSuite->count ; ++i) 287 | { 288 | CuTest* testCase = testSuite->list[i]; 289 | CuTestRun(testCase); 290 | if (testCase->failed) { testSuite->failCount += 1; } 291 | } 292 | } 293 | 294 | void CuSuiteSummary(CuSuite* testSuite, CuString* summary) 295 | { 296 | int i; 297 | for (i = 0 ; i < testSuite->count ; ++i) 298 | { 299 | CuTest* testCase = testSuite->list[i]; 300 | CuStringAppend(summary, testCase->failed ? "F" : "."); 301 | } 302 | CuStringAppend(summary, "\n\n"); 303 | } 304 | 305 | void CuSuiteDetails(CuSuite* testSuite, CuString* details) 306 | { 307 | int i; 308 | int failCount = 0; 309 | 310 | if (testSuite->failCount == 0) 311 | { 312 | int passCount = testSuite->count - testSuite->failCount; 313 | const char* testWord = passCount == 1 ? "test" : "tests"; 314 | CuStringAppendFormat(details, "OK (%d %s)\n", passCount, testWord); 315 | } 316 | else 317 | { 318 | if (testSuite->failCount == 1) 319 | CuStringAppend(details, "There was 1 failure:\n"); 320 | else 321 | CuStringAppendFormat(details, "There were %d failures:\n", testSuite->failCount); 322 | 323 | for (i = 0 ; i < testSuite->count ; ++i) 324 | { 325 | CuTest* testCase = testSuite->list[i]; 326 | if (testCase->failed) 327 | { 328 | failCount++; 329 | CuStringAppendFormat(details, "%d) %s: %s\n", 330 | failCount, testCase->name, testCase->message); 331 | } 332 | } 333 | CuStringAppend(details, "\n!!!FAILURES!!!\n"); 334 | 335 | CuStringAppendFormat(details, "Runs: %d ", testSuite->count); 336 | CuStringAppendFormat(details, "Passes: %d ", testSuite->count - testSuite->failCount); 337 | CuStringAppendFormat(details, "Fails: %d\n", testSuite->failCount); 338 | } 339 | } 340 | -------------------------------------------------------------------------------- /test/CuTest.h: -------------------------------------------------------------------------------- 1 | #ifndef CU_TEST_H 2 | #define CU_TEST_H 3 | 4 | #include 5 | #include 6 | 7 | #define CUTEST_VERSION "CuTest 1.5" 8 | 9 | /* CuString */ 10 | 11 | char* CuStrAlloc(int size); 12 | char* CuStrCopy(const char* old); 13 | 14 | #define CU_ALLOC(TYPE) ((TYPE*) malloc(sizeof(TYPE))) 15 | 16 | #define HUGE_STRING_LEN 8192 17 | #define STRING_MAX 256 18 | #define STRING_INC 256 19 | 20 | typedef struct 21 | { 22 | int length; 23 | int size; 24 | char* buffer; 25 | } CuString; 26 | 27 | void CuStringInit(CuString* str); 28 | CuString* CuStringNew(void); 29 | void CuStringRead(CuString* str, const char* path); 30 | void CuStringAppend(CuString* str, const char* text); 31 | void CuStringAppendChar(CuString* str, char ch); 32 | void CuStringAppendFormat(CuString* str, const char* format, ...); 33 | void CuStringInsert(CuString* str, const char* text, int pos); 34 | void CuStringResize(CuString* str, int newSize); 35 | void CuStringDelete(CuString* str); 36 | 37 | /* CuTest */ 38 | 39 | typedef struct CuTest CuTest; 40 | 41 | typedef void (*TestFunction)(CuTest *); 42 | 43 | struct CuTest 44 | { 45 | char* name; 46 | TestFunction function; 47 | int failed; 48 | int ran; 49 | const char* message; 50 | jmp_buf *jumpBuf; 51 | }; 52 | 53 | void CuTestInit(CuTest* t, const char* name, TestFunction function); 54 | CuTest* CuTestNew(const char* name, TestFunction function); 55 | void CuTestRun(CuTest* tc); 56 | void CuTestDelete(CuTest *t); 57 | 58 | /* Internal versions of assert functions -- use the public versions */ 59 | void CuFail_Line(CuTest* tc, const char* file, int line, const char* message2, const char* message); 60 | void CuAssert_Line(CuTest* tc, const char* file, int line, const char* message, int condition); 61 | void CuAssertStrEquals_LineMsg(CuTest* tc, 62 | const char* file, int line, const char* message, 63 | const char* expected, const char* actual); 64 | void CuAssertIntEquals_LineMsg(CuTest* tc, 65 | const char* file, int line, const char* message, 66 | int expected, int actual); 67 | void CuAssertDblEquals_LineMsg(CuTest* tc, 68 | const char* file, int line, const char* message, 69 | double expected, double actual, double delta); 70 | void CuAssertPtrEquals_LineMsg(CuTest* tc, 71 | const char* file, int line, const char* message, 72 | void* expected, void* actual); 73 | 74 | /* public assert functions */ 75 | 76 | #define CuFail(tc, ms) CuFail_Line( (tc), __FILE__, __LINE__, NULL, (ms)) 77 | #define CuAssert(tc, ms, cond) CuAssert_Line((tc), __FILE__, __LINE__, (ms), (cond)) 78 | #define CuAssertTrue(tc, cond) CuAssert_Line((tc), __FILE__, __LINE__, "assert failed", (cond)) 79 | 80 | #define CuAssertStrEquals(tc,ex,ac) CuAssertStrEquals_LineMsg((tc),__FILE__,__LINE__,NULL,(ex),(ac)) 81 | #define CuAssertStrEquals_Msg(tc,ms,ex,ac) CuAssertStrEquals_LineMsg((tc),__FILE__,__LINE__,(ms),(ex),(ac)) 82 | #define CuAssertIntEquals(tc,ex,ac) CuAssertIntEquals_LineMsg((tc),__FILE__,__LINE__,NULL,(ex),(ac)) 83 | #define CuAssertIntEquals_Msg(tc,ms,ex,ac) CuAssertIntEquals_LineMsg((tc),__FILE__,__LINE__,(ms),(ex),(ac)) 84 | #define CuAssertDblEquals(tc,ex,ac,dl) CuAssertDblEquals_LineMsg((tc),__FILE__,__LINE__,NULL,(ex),(ac),(dl)) 85 | #define CuAssertDblEquals_Msg(tc,ms,ex,ac,dl) CuAssertDblEquals_LineMsg((tc),__FILE__,__LINE__,(ms),(ex),(ac),(dl)) 86 | #define CuAssertPtrEquals(tc,ex,ac) CuAssertPtrEquals_LineMsg((tc),__FILE__,__LINE__,NULL,(ex),(ac)) 87 | #define CuAssertPtrEquals_Msg(tc,ms,ex,ac) CuAssertPtrEquals_LineMsg((tc),__FILE__,__LINE__,(ms),(ex),(ac)) 88 | 89 | #define CuAssertPtrNotNull(tc,p) CuAssert_Line((tc),__FILE__,__LINE__,"null pointer unexpected",(p != NULL)) 90 | #define CuAssertPtrNotNullMsg(tc,msg,p) CuAssert_Line((tc),__FILE__,__LINE__,(msg),(p != NULL)) 91 | 92 | /* CuSuite */ 93 | 94 | #define MAX_TEST_CASES 1024 95 | 96 | #define SUITE_ADD_TEST(SUITE,TEST) CuSuiteAdd(SUITE, CuTestNew(#TEST, TEST)) 97 | 98 | typedef struct 99 | { 100 | int count; 101 | CuTest* list[MAX_TEST_CASES]; 102 | int failCount; 103 | 104 | } CuSuite; 105 | 106 | 107 | void CuSuiteInit(CuSuite* testSuite); 108 | CuSuite* CuSuiteNew(void); 109 | void CuSuiteDelete(CuSuite *testSuite); 110 | void CuSuiteAdd(CuSuite* testSuite, CuTest *testCase); 111 | void CuSuiteAddSuite(CuSuite* testSuite, CuSuite* testSuite2); 112 | void CuSuiteRun(CuSuite* testSuite); 113 | void CuSuiteSummary(CuSuite* testSuite, CuString* summary); 114 | void CuSuiteDetails(CuSuite* testSuite, CuString* details); 115 | 116 | #endif /* CU_TEST_H */ 117 | -------------------------------------------------------------------------------- /test/ieee_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test.h" 2 | #include "pack.h" 3 | 4 | #define EPSILON 0.0001 5 | 6 | static void TestIeeeFloat(CuTest* tc) 7 | { 8 | for (float i = -1000.f; i < 1000.f; i += 0.001f) { 9 | Posit32 p(i); 10 | float r = p.getFloat(); 11 | CuAssertTrue(tc, i == r); 12 | } 13 | } 14 | 15 | static void TestIeeeFloatPackDenormal(CuTest* tc) 16 | { 17 | union { 18 | float f; 19 | uint32_t u; 20 | }; 21 | 22 | for (u = 0x00000001; u < 0x00800000; u++) { 23 | CuAssertTrue(tc, f == pack_float(unpack_float(f))); 24 | } 25 | } 26 | 27 | static void TestIeeeDouble(CuTest* tc) 28 | { 29 | for (double i = -1000.0; i < 1000.0; i += 0.001) { 30 | Posit32 p(i); 31 | double r = p.getDouble(); 32 | CuAssertTrue(tc, i - EPSILON < r && r < i + EPSILON); 33 | } 34 | } 35 | 36 | CuSuite* TestIeeeGetSuite(void) 37 | { 38 | CuSuite* suite = CuSuiteNew(); 39 | 40 | SUITE_ADD_TEST(suite, TestIeeeFloat); 41 | SUITE_ADD_TEST(suite, TestIeeeFloatPackDenormal); 42 | SUITE_ADD_TEST(suite, TestIeeeDouble); 43 | 44 | return suite; 45 | } 46 | -------------------------------------------------------------------------------- /test/p2_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "test.h" 8 | 9 | #define NAR 0b10 10 | 11 | static POSIT_STYPE sub_table[4][4]; 12 | static POSIT_STYPE add_table[4][4] = { 13 | // 0 1 NaR -1 14 | { 0b00, 0b01, NAR, 0b11 }, // 0 15 | { 0b01, 0b01, NAR, 0b00 }, // 1 16 | { NAR, NAR, NAR, NAR }, // NaR 17 | { 0b11, 0b00, NAR, 0b11 }, // -1 18 | }; 19 | 20 | static POSIT_STYPE div_table[4][4]; 21 | static POSIT_STYPE mul_table[4][4] = { 22 | // 0 1 NaR -1 23 | { 0b00, 0b00, NAR, 0b00 }, // 0 24 | { 0b00, 0b01, NAR, 0b11 }, // 1 25 | { NAR, NAR, NAR, NAR }, // NaR 26 | { 0b00, 0b11, NAR, 0b01 }, // -1 27 | }; 28 | 29 | static void TestP2Zero(CuTest* tc) 30 | { 31 | Posit p = Posit(0, 2, 0); 32 | Posit zero = p.zero(); 33 | Posit nar = p.nar(); 34 | Posit neg = p.neg(); 35 | Posit rec = p.rec(); 36 | 37 | CuAssertTrue(tc, p.isZero()); 38 | CuAssertTrue(tc, !p.isNar()); 39 | CuAssertTrue(tc, !p.isNeg()); 40 | CuAssertTrue(tc, p.nbits() == 2); 41 | CuAssertTrue(tc, p.rs() == 1); 42 | CuAssertTrue(tc, p.es() == 0); 43 | CuAssertTrue(tc, p.fs() == 0); 44 | CuAssertTrue(tc, p.eq(zero)); 45 | CuAssertTrue(tc, !p.eq(nar)); 46 | CuAssertTrue(tc, p.eq(neg)); 47 | CuAssertTrue(tc, rec.isNar()); 48 | } 49 | 50 | static void TestP2Add(CuTest* tc) 51 | { 52 | TEST_OP2(add, 2, 0) 53 | } 54 | 55 | static void TestP2Sub(CuTest* tc) 56 | { 57 | TEST_OP2(sub, 2, 0) 58 | } 59 | 60 | static void TestP2Mul(CuTest* tc) 61 | { 62 | TEST_OP2(mul, 2, 0) 63 | } 64 | 65 | static void TestP2Div(CuTest* tc) 66 | { 67 | TEST_OP2(div, 2, 0) 68 | } 69 | 70 | static void TestP2Is(CuTest* tc) 71 | { 72 | Posit p = Posit(0, 2, 0); 73 | Posit zero = p.zero(); 74 | Posit one = p.one(); 75 | Posit nar = p.nar(); 76 | Posit mone = p.one().neg(); 77 | 78 | CuAssertTrue(tc, zero.isZero()); 79 | CuAssertTrue(tc, !one.isZero()); 80 | CuAssertTrue(tc, !nar.isZero()); 81 | CuAssertTrue(tc, !mone.isZero()); 82 | 83 | CuAssertTrue(tc, !zero.isNar()); 84 | CuAssertTrue(tc, !one.isNar()); 85 | CuAssertTrue(tc, nar.isNar()); 86 | CuAssertTrue(tc, !mone.isNar()); 87 | 88 | CuAssertTrue(tc, !zero.isNeg()); 89 | CuAssertTrue(tc, !one.isNeg()); 90 | CuAssertTrue(tc, !nar.isNeg()); 91 | CuAssertTrue(tc, mone.isNeg()); 92 | 93 | CuAssertTrue(tc, !zero.isNar()); 94 | CuAssertTrue(tc, !one.isNar()); 95 | CuAssertTrue(tc, nar.isNar()); 96 | CuAssertTrue(tc, !mone.isNar()); 97 | } 98 | 99 | CuSuite* TestP2GetSuite(void) 100 | { 101 | CuSuite* suite = CuSuiteNew(); 102 | 103 | genSubTable(sub_table[0], add_table[0], 4); 104 | genDivTable(div_table[0], mul_table[0], 4); 105 | 106 | SUITE_ADD_TEST(suite, TestP2Zero); 107 | SUITE_ADD_TEST(suite, TestP2Add); 108 | SUITE_ADD_TEST(suite, TestP2Sub); 109 | SUITE_ADD_TEST(suite, TestP2Mul); 110 | SUITE_ADD_TEST(suite, TestP2Div); 111 | SUITE_ADD_TEST(suite, TestP2Is); 112 | 113 | return suite; 114 | } 115 | -------------------------------------------------------------------------------- /test/p3_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "test.h" 8 | 9 | #define NAR 0b100 10 | 11 | static POSIT_STYPE sub_table[8][8]; 12 | static POSIT_STYPE add_table[8][8] = { 13 | // 0 1/4 1 4 NaR -4 -1 -1/4 14 | { 0b000, 0b001, 0b010, 0b011, NAR, 0b101, 0b110, 0b111 }, // 0 15 | { 0b001, 0b001, 0b010, 0b011, NAR, 0b101, 0b110, 0b000 }, // 1/4 16 | { 0b010, 0b010, 0b010, 0b011, NAR, 0b101, 0b000, 0b010 }, // 1 17 | { 0b011, 0b011, 0b011, 0b011, NAR, 0b000, 0b011, 0b011 }, // 4 18 | { NAR, NAR, NAR, NAR, NAR, NAR, NAR, NAR }, // NaR 19 | { 0b101, 0b101, 0b101, 0b000, NAR, 0b101, 0b101, 0b101 }, // -4 20 | { 0b110, 0b110, 0b000, 0b011, NAR, 0b101, 0b110, 0b110 }, // -1 21 | { 0b111, 0b000, 0b010, 0b011, NAR, 0b101, 0b110, 0b111 }, // -1/4 22 | }; 23 | 24 | static POSIT_STYPE div_table[8][8]; 25 | static POSIT_STYPE mul_table[8][8] = { 26 | // 0 1/4 1 4 NaR -4 -1 -1/4 27 | { 0b000, 0b000, 0b000, 0b000, NAR, 0b000, 0b000, 0b000 }, // 0 28 | { 0b000, 0b001, 0b001, 0b010, NAR, 0b110, 0b111, 0b111 }, // 1/4 29 | { 0b000, 0b001, 0b010, 0b011, NAR, 0b101, 0b110, 0b111 }, // 1 30 | { 0b000, 0b010, 0b011, 0b011, NAR, 0b101, 0b101, 0b110 }, // 4 31 | { NAR, NAR, NAR, NAR, NAR, NAR, NAR, NAR }, // NaR 32 | { 0b000, 0b110, 0b101, 0b101, NAR, 0b011, 0b011, 0b010 }, // -4 33 | { 0b000, 0b111, 0b110, 0b101, NAR, 0b011, 0b010, 0b001 }, // -1 34 | { 0b000, 0b111, 0b111, 0b110, NAR, 0b010, 0b001, 0b001 }, // -1/4 35 | }; 36 | 37 | static void TestP3Add(CuTest* tc) 38 | { 39 | TEST_OP2(add, 3, 1) 40 | } 41 | 42 | static void TestP3Sub(CuTest* tc) 43 | { 44 | TEST_OP2(sub, 3, 1) 45 | } 46 | 47 | static void TestP3Mul(CuTest* tc) 48 | { 49 | TEST_OP2(mul, 3, 1) 50 | } 51 | 52 | static void TestP3Div(CuTest* tc) 53 | { 54 | TEST_OP2(div, 3, 1) 55 | } 56 | 57 | CuSuite* TestP3GetSuite(void) 58 | { 59 | CuSuite* suite = CuSuiteNew(); 60 | 61 | genSubTable(sub_table[0], add_table[0], 8); 62 | genDivTable(div_table[0], mul_table[0], 8); 63 | 64 | SUITE_ADD_TEST(suite, TestP3Add); 65 | SUITE_ADD_TEST(suite, TestP3Sub); 66 | SUITE_ADD_TEST(suite, TestP3Mul); 67 | SUITE_ADD_TEST(suite, TestP3Div); 68 | 69 | return suite; 70 | } 71 | -------------------------------------------------------------------------------- /test/test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "test.h" 4 | 5 | CuSuite* TestP2GetSuite(); 6 | CuSuite* TestP3GetSuite(); 7 | CuSuite* TestIeeeGetSuite(); 8 | 9 | void genSubTable(POSIT_STYPE *sub_table, POSIT_STYPE *add_table, int size) 10 | { 11 | for (int i = 0; i < size; i++) { 12 | int k; 13 | 14 | if (i == 0) { 15 | k = 0; 16 | } else { 17 | k = size / 2 - (i - size / 2); 18 | } 19 | 20 | for (int j = 0; j < size; j++) { 21 | sub_table[k * size + j] = add_table[i * size + j]; 22 | } 23 | } 24 | } 25 | 26 | void genDivTable(POSIT_STYPE *div_table, POSIT_STYPE *mul_table, int size) 27 | { 28 | for (int i = 0; i < size; i++) { 29 | int k; 30 | 31 | if (i <= size / 2) { 32 | k = size / 2 - i; 33 | } else { 34 | k = size - (i - size / 2); 35 | } 36 | 37 | for (int j = 0; j < size; j++) { 38 | if (k == size / 2) { 39 | // TODO explain 40 | div_table[k * size + j] = mul_table[size / 2 * size + j]; 41 | } else { 42 | div_table[k * size + j] = mul_table[i * size + j]; 43 | } 44 | } 45 | } 46 | } 47 | 48 | void RunAllTests(void) 49 | { 50 | CuString *output = CuStringNew(); 51 | CuSuite *suite = CuSuiteNew(); 52 | 53 | CuSuiteAddSuite(suite, TestP2GetSuite()); 54 | CuSuiteAddSuite(suite, TestP3GetSuite()); 55 | CuSuiteAddSuite(suite, TestIeeeGetSuite()); 56 | 57 | CuSuiteRun(suite); 58 | CuSuiteSummary(suite, output); 59 | CuSuiteDetails(suite, output); 60 | printf("%s", output->buffer); 61 | } 62 | 63 | int main(int argc, char *argv[]) 64 | { 65 | RunAllTests(); 66 | } 67 | -------------------------------------------------------------------------------- /test/test.h: -------------------------------------------------------------------------------- 1 | #ifndef __TEST_H 2 | #define __TEST_H 3 | 4 | #include "CuTest.h" 5 | #include "posit.h" 6 | #include 7 | 8 | #define TEST_OP2(OP, NBITS, ES) \ 9 | Posit a = Posit(NBITS, ES); \ 10 | Posit b = Posit(NBITS, ES); \ 11 | \ 12 | for (int i = 0; i < (1 << NBITS); i++) { \ 13 | a.setBits(i); \ 14 | for (int j = 0; j < (1 << NBITS); j++) { \ 15 | b.setBits(j); \ 16 | \ 17 | Posit c = a.OP(b); \ 18 | \ 19 | CuAssertTrue(tc, c.getBits() == (POSIT_UTYPE)OP ## _table[j][i]); \ 20 | } \ 21 | } 22 | 23 | void genSubTable(POSIT_STYPE *sub_table, POSIT_STYPE *add_table, int size); 24 | void genDivTable(POSIT_STYPE *div_table, POSIT_STYPE *mul_table, int size); 25 | 26 | #endif 27 | --------------------------------------------------------------------------------