├── AUTHORS ├── CONTRIBUTORS ├── Makefile ├── quick_check.sh ├── fsc_utils.h ├── README ├── alias.h ├── bits.h ├── div_test.c ├── README.md ├── fsc_utils.c ├── fsc.h ├── divide.h ├── fsc.c ├── alias.c ├── bit_test.c ├── bits.c ├── test.c ├── histo.c ├── LICENSE ├── bit_cmp.c ├── fsc_dec.c └── fsc_enc.c /AUTHORS: -------------------------------------------------------------------------------- 1 | # This is the official list of Finite State Coder authors for copyright 2 | # purposes. 3 | # This file is distinct from the CONTRIBUTORS files. 4 | # See the latter for an explanation. 5 | 6 | # Names should be added to this file as: 7 | # Name or Organization 8 | # The email address is not required for organizations. 9 | 10 | Pascal Massimino 11 | Google Inc. 12 | -------------------------------------------------------------------------------- /CONTRIBUTORS: -------------------------------------------------------------------------------- 1 | # People who have agreed to one of the CLAs and can contribute patches. 2 | # The AUTHORS file lists the copyright holders; this file 3 | # lists people. For example, Google employees are listed here 4 | # but not in AUTHORS, because Google holds the copyright. 5 | # 6 | # https://developers.google.com/open-source/cla/individual 7 | # https://developers.google.com/open-source/cla/corporate 8 | # 9 | # Names should be added to this file as: 10 | # Name 11 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # Simple makefile for gcc compiler 3 | # 4 | 5 | EXES = fsc test bit_test div_test bit_cmp 6 | all: libfsc.a $(EXES) 7 | 8 | CC = gcc 9 | CFLAGS = -O3 -DNDEBUG 10 | AR = ar 11 | ARFLAGS = r 12 | LDFLAGS = -lm 13 | 14 | %.o: %.c fsc.h divide.h 15 | $(CC) $(CFLAGS) -c $< -o $@ 16 | 17 | %.a: 18 | $(AR) $(ARFLAGS) $@ $^ 19 | 20 | libfscutils.a: fsc_utils.o fsc_utils.h divide.h 21 | 22 | libfsc.a: fsc_enc.o fsc_dec.o fsc.h bits.o bits.h alias.o alias.h histo.o divide.h 23 | 24 | test: test.o libfsc.a libfscutils.a 25 | gcc -o test test.o ./libfsc.a ./libfscutils.a $(LDFLAGS) $(CFLAGS) 26 | 27 | fsc: fsc.o libfsc.a libfscutils.a 28 | gcc -o fsc fsc.o ./libfsc.a ./libfscutils.a $(LDFLAGS) $(CFLAGS) 29 | 30 | bit_test: bit_test.o libfsc.a libfscutils.a 31 | gcc -o bit_test bit_test.o ./libfscutils.a ./libfsc.a $(LDFLAGS) $(CFLAGS) 32 | 33 | div_test: div_test.o libfsc.a libfscutils.a divide.h 34 | gcc -o div_test div_test.o ./libfscutils.a ./libfsc.a $(LDFLAGS) $(CFLAGS) 35 | 36 | bit_cmp: bit_cmp.o libfsc.a libfscutils.a 37 | gcc -o bit_cmp bit_cmp.o ./libfscutils.a ./libfsc.a $(LDFLAGS) $(CFLAGS) 38 | 39 | pak: clean 40 | tar czf fsc_oss.tgz *.c *.h Makefile AUTHORS CONTRIBUTORS LICENSE README 41 | 42 | clean: 43 | rm -f *~ *.o *.a $(EXES) 44 | 45 | bug: test 46 | ./test -s 2 40 47 | ./test -s 13 10000 48 | ./test -s 3 -p 49 | 50 | bench: $(EXES) 51 | ./bit_test 52 | ./quick_check.sh 53 | -------------------------------------------------------------------------------- /quick_check.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # quick validation test 4 | 5 | what="all" 6 | 7 | make 8 | 9 | if [[ $what = "all" ]]; then 10 | echo "simple test" 11 | for s in 2 5 10 30 100 200 256; do 12 | ./test 200001 -s $s -buck | grep "errors" | grep -v "#0 " 13 | ./test 200001 -s $s -rev | grep "errors" | grep -v "#0 " 14 | ./test 200001 -s $s -mod | grep "errors" | grep -v "#0 " 15 | ./test 200001 -s $s -pack | grep "errors" | grep -v "#0 " 16 | ./test 200001 -s $s -w | grep "errors" | grep -v "#0 " 17 | ./test 200001 -s $s -w2 | grep "errors" | grep -v "#0 " 18 | ./test 200001 -s $s -w4 | grep "errors" | grep -v "#0 " 19 | ./test 200001 -s $s -a | grep "errors" | grep -v "#0 " 20 | ./test 200001 -s $s -a2 | grep "errors" | grep -v "#0 " 21 | done 22 | fi 23 | 24 | echo "corner case test #1" 25 | for n in `seq 0 33`; do 26 | ./test $n -w | grep "errors" | grep -v "#0 " 27 | ./test $n -w2 | grep "errors" | grep -v "#0 " 28 | ./test $n -w4 | grep "errors" | grep -v "#0 " 29 | ./test $n -a | grep "errors" | grep -v "#0 " 30 | ./test $n -a2 | grep "errors" | grep -v "#0 " 31 | done 32 | 33 | echo "corner case test #2" 34 | for n in `seq 8189 8201`; do 35 | # echo "*** n=$n ***" 36 | ./test $n -w | grep "errors" | grep -v "#0 " 37 | ./test $n -w2 | grep "errors" | grep -v "#0 " 38 | ./test $n -w4 | grep "errors" | grep -v "#0 " 39 | ./test $n -a | grep "errors" | grep -v "#0 " 40 | ./test $n -a2 | grep "errors" | grep -v "#0 " 41 | done 42 | -------------------------------------------------------------------------------- /fsc_utils.h: -------------------------------------------------------------------------------- 1 | //Copyright 2014 The FSC Authors. All Rights Reserved. 2 | // 3 | //Licensed under the Apache License, Version 2.0 (the "License"); 4 | //you may not use this file except in compliance with the License. 5 | //You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | //------------------------------------------------------------------------------ 15 | // 16 | // Testing utilities for FSC 17 | // 18 | // Author: Skal (pascal.massimino@gmail.com) 19 | 20 | #ifndef FSC_UTILS_H_ 21 | #define FSC_UTILS_H_ 22 | 23 | #include "./fsc.h" 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | // Random generator 30 | typedef struct { 31 | unsigned int seed_; 32 | } FSCRandom; 33 | 34 | void FSCInitRandom(FSCRandom* const rg); 35 | int FSCRandomBits(FSCRandom* const rg, int num_bits); 36 | 37 | // Timing 38 | typedef struct timeval MyClock; 39 | double GetElapsed(MyClock* new_clock, MyClock* old_clock); 40 | 41 | // Helper functions 42 | static double xlogx(double p) { return (p == 0.) ? 0 : -p * log2(p); } 43 | double GetEntropy(const uint8_t* in, size_t size); 44 | int DrawSymbol(const uint64_t cumul[256], int max_symbol, 45 | int total, int nb_bits, FSCRandom* rg); 46 | 47 | // Option-parsing utils 48 | int FSCParseCodingMethodOpt(const char opt[], FSCCodingMethod* const method); 49 | void FSCPrintCodingOptions(); 50 | 51 | #endif // FSC_UTILS_H_ 52 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | FSC: Finite State Coder 2 | 3 | An implementation of the compression technique described 4 | in http://arxiv.org/pdf/1311.2540v2.pdf 5 | 6 | To understand ANS, i extensively used the FSE project: 7 | 8 | https://github.com/Cyan4973/FiniteStateEntropy 9 | 10 | from Yann Collet, which is referenced by Jarek's paper. 11 | See the blog entry: 12 | http://fastcompression.blogspot.fr/2013/12/finite-state-entropy-new-breed-of.html 13 | 14 | Fabian Giesen also has interesting implementations ideas. See his blog for pointers: 15 | 16 | http://fgiesen.wordpress.com/ 17 | 18 | Code is located here: https://github.com/rygorous/ryg_rans 19 | I re-implemented some his ideas (Alias method, interleaving, etc.) for 20 | experimentation purpose. 21 | 22 | Building: 23 | ========= 24 | There is a primitive "Makefile" to help you build the library (libfsc.a) 25 | and tests. Just type 'make'. This code has been developed and tested on 26 | Linux/x86_64 mostly. Expect moderate surprises on other platforms. 27 | 28 | 29 | Quick description of the source files: 30 | ====================================== 31 | * fsc.h: main header 32 | * fsc_enc.c: encoder 33 | * fsc_dec.c: decoder 34 | * bits.c / bits.h: bit reading and writing function 35 | 36 | * fsc_utils.[ch]: non-critical utility functions for testing 37 | 38 | * test.c / bit_test.c: tests 39 | * fsc.c: sample program to compress / decompress 40 | 41 | API: 42 | ==== 43 | 44 | // Compresses input buffer (in / in_size) using FSC. 45 | // log_tab_size must be in [log(alphabet_size)..14] range 46 | // for method 0 to 3. For word-based method, the value doesn't matter. 47 | // Compressed output (*out / out_size) must be deallocated using free(). 48 | int FSCEncode(const uint8_t* in, size_t in_size, 49 | uint8_t** out, size_t* out_size, 50 | int log_tab_size, FSCCodingMethod method); 51 | 52 | // Decompresses compressed bytes. 53 | // Decompressed output (*out / out_size) must be deallocated using free(). 54 | int FSCDecode(const uint8_t* in, size_t in_size, uint8_t* out, size_t out_size); 55 | -------------------------------------------------------------------------------- /alias.h: -------------------------------------------------------------------------------- 1 | //Copyright 2014 The FSC Authors. All Rights Reserved. 2 | // 3 | //Licensed under the Apache License, Version 2.0 (the "License"); 4 | //you may not use this file except in compliance with the License. 5 | //You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | //------------------------------------------------------------------------------ 15 | // 16 | // Tools for implementing Vose's alias sampling method. 17 | // 18 | // http://web.eecs.utk.edu/~vose/Publications/random.pdf 19 | // http://en.wikipedia.org/wiki/Alias_method 20 | // 21 | // Author: Skal (pascal.massimino@gmail.com) 22 | 23 | #ifndef ALIAS_H_ 24 | #define ALIAS_H_ 25 | 26 | #include "./fsc.h" 27 | 28 | #define LOG2_MAX_SYMBOLS 8 // such that (1 << LOG2_MAX_SYMBOLS) >= MAX_SYMBOLS 29 | #define ALIAS_MAX_SYMBOLS (1 << LOG2_MAX_SYMBOLS) 30 | // #define DEBUG_ALIAS 31 | 32 | typedef uint8_t alias_t; // enough to encode MAX_SYMBOLS 33 | typedef uint32_t alias_tab_t; // enough to store MAX_TAB_SIZE index 34 | 35 | typedef struct { 36 | alias_tab_t cut_; 37 | alias_t other_; // other symbol if residual >= cut_ 38 | int32_t start_; 39 | int32_t other_start_; 40 | } AliasPair; 41 | 42 | typedef AliasPair AliasTable[ALIAS_MAX_SYMBOLS]; 43 | 44 | static inline alias_t AliasSearchSymbol(const AliasTable t, uint32_t r, 45 | uint32_t* const rank) { 46 | const int s = r >> (MAX_LOG_TAB_SIZE - LOG2_MAX_SYMBOLS); 47 | const int use_alias = (r >= t[s].cut_); 48 | *rank = r - (use_alias ? t[s].other_start_ : t[s].start_); 49 | return use_alias ? t[s].other_ : (alias_t)s; 50 | } 51 | 52 | int AliasInit(AliasTable t, const uint32_t counts[], int max_symbol); 53 | void AliasGenerateMap(const AliasTable t, alias_t map[MAX_TAB_SIZE]); 54 | 55 | int AliasVerifyTable(const AliasTable t, 56 | const uint32_t counts[], int max_symbol); // debug 57 | 58 | // encoding: 59 | int AliasBuildEncMap(const uint32_t counts[], int max_symbol, 60 | uint16_t map[MAX_TAB_SIZE]); 61 | 62 | // Spread function for alias look-up. 63 | int AliasSpreadMap(int max_symbol, const uint32_t counts[], 64 | int log_tab_size, uint8_t symbols[]); 65 | 66 | #endif // ALIAS_H_ 67 | -------------------------------------------------------------------------------- /bits.h: -------------------------------------------------------------------------------- 1 | //Copyright 2014 The FSC Authors. All Rights Reserved. 2 | // 3 | //Licensed under the Apache License, Version 2.0 (the "License"); 4 | //you may not use this file except in compliance with the License. 5 | //You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | //------------------------------------------------------------------------------ 15 | // 16 | // Bit reader/writer 17 | // 18 | // Author: Skal (pascal.massimino@gmail.com) 19 | 20 | #ifndef FSC_BITS_H_ 21 | #define FSC_BITS_H_ 22 | 23 | #include 24 | #ifdef _MSC_VER 25 | #define FSC_INLINE 26 | #else 27 | #define FSC_INLINE inline 28 | #endif 29 | 30 | #include 31 | #include 32 | 33 | #ifdef __cplusplus 34 | extern "C" { 35 | #endif 36 | 37 | // ----------------------------------------------------------------------------- 38 | // 32b/64b type'ing 39 | 40 | #if (defined(__x86_64__) || defined(_M_X64)) // 64 bits 41 | 42 | typedef uint64_t fsc_val_t; 43 | #define RBYTES 4 44 | 45 | #else // 32 bits 46 | 47 | typedef uint32_t fsc_val_t; 48 | #define RBYTES 2 49 | 50 | #endif 51 | 52 | #define RBITS (RBYTES * 8) 53 | 54 | // ----------------------------------------------------------------------------- 55 | // BitReader 56 | 57 | typedef struct { 58 | fsc_val_t bits_; // bits accumulator 59 | const uint8_t* buf_; // current position 60 | const uint8_t* end_; // end of read position 61 | int bit_pos_; // unread bit position 62 | int eof_; // true if buf_ reached end_ 63 | } FSCBitReader; 64 | 65 | void FSCInitBitReader(FSCBitReader* const br, 66 | const uint8_t* const start, 67 | size_t length); 68 | 69 | uint32_t FSCReadBits(FSCBitReader* const br, int nb); 70 | static FSC_INLINE uint32_t FSCSeeBits(FSCBitReader* const br) { 71 | return (uint32_t)(br->bits_ >> br->bit_pos_); 72 | } 73 | 74 | static FSC_INLINE void FSCDiscardBits(FSCBitReader* const br, int nb) { 75 | br->bit_pos_ += nb; 76 | } 77 | 78 | // Make sure the buffer contains at least RBYTES bytes, 79 | // available to FSCSeeBits() 80 | extern void FSCDoFillBitWindow(FSCBitReader* const br); 81 | static FSC_INLINE void FSCFillBitWindow(FSCBitReader* const br) { 82 | if (br->bit_pos_ >= RBITS) FSCDoFillBitWindow(br); 83 | } 84 | 85 | const uint8_t* FSCBitAlign(FSCBitReader* const br); 86 | extern const uint8_t* FSCGetBytePos(FSCBitReader* const br); 87 | extern const uint8_t* FSCGetByteEnd(FSCBitReader* const br); 88 | extern void FSCSetReadBufferPos(FSCBitReader* const br, const uint8_t* buf); 89 | 90 | // ----------------------------------------------------------------------------- 91 | // BitWriter 92 | 93 | typedef struct { 94 | fsc_val_t bits_; // currently assembled bits 95 | int used_; // bit position (<= RBITS) 96 | uint8_t* cur_; // current write position 97 | uint8_t* buf_; // start of writable buffer 98 | uint8_t* end_; // non-writable pos 99 | int error_; // true if malloc failed (or other error) 100 | } FSCBitWriter; 101 | 102 | // Returns 0 in case of malloc error 103 | int FSCBitWriterInit(FSCBitWriter* const bw, size_t expected_size); 104 | 105 | void FSCBitWriterFlush(FSCBitWriter* const bw); 106 | static FSC_INLINE size_t FSCBitWriterNumBytes(FSCBitWriter* const bw) { 107 | return (uint8_t*)bw->cur_ - (uint8_t*)bw->buf_; 108 | } 109 | static FSC_INLINE uint8_t* FSCBitWriterFinish(FSCBitWriter* const bw) { 110 | return (uint8_t*)bw->buf_; 111 | } 112 | void FSCBitWriterDestroy(FSCBitWriter* const bw); 113 | void FSCWriteBits(FSCBitWriter* const bw, uint32_t bits, int nb); 114 | 115 | int FSCAppend(FSCBitWriter* const bw, const uint8_t* const buf, size_t len); 116 | 117 | #ifdef __cplusplus 118 | } // extern "C" 119 | #endif 120 | 121 | #endif /* FSC_BITS_H_ */ 122 | -------------------------------------------------------------------------------- /div_test.c: -------------------------------------------------------------------------------- 1 | //Copyright 2014 The FSC Authors. All Rights Reserved. 2 | // 3 | //Licensed under the Apache License, Version 2.0 (the "License"); 4 | //you may not use this file except in compliance with the License. 5 | //You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | //------------------------------------------------------------------------------ 15 | // 16 | // Test for fast division 17 | // 18 | 19 | #include "./fsc_utils.h" 20 | 21 | #define CHECK_DIV(x, p, div) do { \ 22 | const ANSStateW q0 = (p > 0) ? (ANSStateW)(x) / (p) : (x); \ 23 | const ANSStateW q1 = FSCDivide((ANSStateW)(x), (div)); \ 24 | if (q0 != q1) { \ 25 | printf("Error! 0x%.16llx / 0x%.16llx = 0x%.16llx != 0x%.16llx ", \ 26 | (uint64_t)x, (uint64_t)p, (uint64_t)q0, (uint64_t)q1); \ 27 | } \ 28 | } while (0) 29 | 30 | #define MAX_N 300 // max inside sample points 31 | #define DO_TEST(NAME) \ 32 | static void NAME() { \ 33 | const uint64_t half = 1ull << (WBITS + PROBA_BITS - 1); \ 34 | const ANSStateW max_x = half + (half - 1); /* no overflow!*/ \ 35 | const uint64_t max_p = 1ull << PROBA_BITS; \ 36 | uint64_t p; \ 37 | int n; \ 38 | inv_t div; \ 39 | for (p = 0; p < max_p; ++p) { \ 40 | FSCInitDivide((ANSProba)p, &div); \ 41 | CHECK_DIV(0, p, div); \ 42 | CHECK_DIV(max_x / 3, p, div); \ 43 | CHECK_DIV(max_x / 2, p, div); \ 44 | CHECK_DIV(max_x, p, div); \ 45 | for (n = 0; n < MAX_N; ++n) { \ 46 | const double scale = (n + .5) / MAX_N; \ 47 | const ANSStateW x = (ANSStateW)(scale * max_x); \ 48 | CHECK_DIV(x, p, div); \ 49 | } \ 50 | if (!(p & 512)) printf("%.3lf \r", 100. * p / max_p); \ 51 | } \ 52 | printf("\nDone testing " #NAME " (max_p=%llu)!\n", max_p); \ 53 | } 54 | 55 | ///////// Test 16bit case ////////// 56 | 57 | #define PROBA_BITS 16 // main param: precision for probabilities 58 | #define RECIPROCAL_BITS 16 // controls the method used in divide.h 59 | #define WBITS 16 // word size for I/O 60 | #define ANSProba uint16_t // word for storing PROBA_BITS 61 | #define ANSStateW uint32_t // word for storing WBITS + PROBA_BITS state 62 | #define inv_t inv16_t // names aliasing ... 63 | #define FSCInitDivide FSCInitDivide16 64 | #define FSCDivide FSCDivide16 65 | #include "./divide.h" 66 | DO_TEST(DoTest16) 67 | #undef PROBA_BITS 68 | #undef RECIPROCAL_BITS 69 | #undef WBITS 70 | #undef ANSProba 71 | #undef ANSStateW 72 | #undef inv_t 73 | #undef FSCInitDivide 74 | #undef FSCDivide 75 | 76 | ///////// Test 32bit case ////////// 77 | 78 | #define RECIPROCAL_BITS 32 79 | #define WBITS 32 80 | #define PROBA_BITS 20 // works up to 32bit, but is quite long to test. 81 | #define ANSProba uint64_t 82 | #define ANSStateW uint64_t 83 | #define inv_t inv32_t 84 | #define FSCInitDivide FSCInitDivide32 85 | #define FSCDivide FSCDivide32 86 | #include "./divide.h" 87 | DO_TEST(DoTest32) 88 | #undef PROBA_BITS 89 | #undef RECIPROCAL_BITS 90 | #undef WBITS 91 | #undef ANSProba 92 | #undef ANSStateW 93 | #undef inv_t 94 | #undef FSCInitDivide 95 | #undef FSCDivide 96 | 97 | 98 | int main() { 99 | DoTest16(); 100 | DoTest32(); 101 | return 0; 102 | } 103 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Finite State Coder 2 | 3 | This project is an experimental implementation of Jarek Duda's assymetric 4 | numeral systems (ANS), as described in the following paper: 5 | 6 | http://arxiv.org/abs/1311.2540 7 | 8 | To understand ANS, i extensively used the FSE project: 9 | 10 | https://github.com/Cyan4973/FiniteStateEntropy 11 | 12 | by Yann Collet, which is referenced by Jarek's paper. 13 | See the blog entry: 14 | http://fastcompression.blogspot.fr/2013/12/finite-state-entropy-new-breed-of.html 15 | 16 | Fabian Giesen also has interesting implementations ideas. See his blog for pointers: 17 | 18 | http://fgiesen.wordpress.com/ 19 | 20 | Code is located here: https://github.com/rygorous/ryg_rans 21 | I re-implemented some his ideas (Alias method, interleaving, etc.) for 22 | experimentation purpose. 23 | 24 | ------------------- 25 | 26 | The word-based coding methods (CODING_METHOD_16B and up) will use b=2^16 27 | and write 16b-words at a time. They also have variants with interleaving 28 | and alias method. 29 | 30 | The default CODING_METHOD_16B_4X is the fastest so far, but experimentation 31 | is still underway... 32 | 33 | ------------------- 34 | 35 | The other implementations (CODING_METHOD_BUCKET, etc.) use bit-by-bit 36 | encoding/decoding, or byte-by-byte, where bits are grouped in packets of 8bits. 37 | Note that the coder still emits bits one by one though (b=2, in the 38 | ANS terminology). 39 | 40 | There are several 'Spread Function' available to try different 41 | symbol <-> slots assignments (see BuildSpreadTableXXX() functions). 42 | You can switch from one to another in the command line (-buck, -mod, etc.) 43 | 44 | Known limitations: 45 | - alphabet size should be <= 256 46 | - max table size is 2 ^ 14 47 | 48 | ------------------- 49 | 50 | Command line help: 51 | 52 | ``` 53 | ./fsc -h 54 | usage: ./fsc [options] < in_file > out_file 55 | options: 56 | -c : compression mode (default) 57 | -d : decompression mode 58 | -s : don't emit output, just print stats 59 | -l : change log-table-size (in [2..14], default 12) 60 | -w : use word-based coding. 61 | -w2 : use word-based coding 2x interleave. 62 | -w4 : use word-based coding 4x interleave. 63 | -a : use word-based coding + alias. 64 | -a2 : use word-based coding + alias + interleave. 65 | -mod : use modulo spread function 66 | -rev : use reverse spread function 67 | -pack : use pack spread function 68 | -buck : use bucket spread function 69 | -h : this help 70 | 71 | ./test -h 72 | usage: ./test [options] [size] 73 | options: 74 | -t : distribution type (in [0..5]) 75 | -p : distribution param (>=0) 76 | -s : number of symbols (in [2..256])) 77 | -l : max table size bits (<= LOG_TAB_SIZE) 78 | -save : save input message to file 79 | -d : print distribution 80 | -f : message file name 81 | -w : use word-based coding. 82 | -w2 : use word-based coding 2x interleave. 83 | -w4 : use word-based coding 4x interleave. 84 | -a : use word-based coding + alias. 85 | -a2 : use word-based coding + alias + interleave. 86 | -mod : use modulo spread function 87 | -rev : use reverse spread function 88 | -pack : use pack spread function 89 | -buck : use bucket spread function 90 | -h : this help 91 | 92 | ./bit_test -h 93 | usage: ./bit_test [options] [size] 94 | -l : max table size bits for bit-by-bit 95 | -l8 : max table size bits for byte-by-byte 96 | -p : try only one proba value 97 | -fsc : skip FSC 98 | -fsc8 : skip FSC8 99 | -w : use word-based coding. 100 | -w2 : use word-based coding 2x interleave. 101 | -w4 : use word-based coding 4x interleave. 102 | -a : use word-based coding + alias. 103 | -a2 : use word-based coding + alias + interleave. 104 | -mod : use modulo spread function 105 | -rev : use reverse spread function 106 | -pack : use pack spread function 107 | -buck : use bucket spread function 108 | -h : this help 109 | 110 | ``` 111 | -------------------------------------------------------------------------------- /fsc_utils.c: -------------------------------------------------------------------------------- 1 | //Copyright 2014 The FSC Authors. All Rights Reserved. 2 | // 3 | //Licensed under the Apache License, Version 2.0 (the "License"); 4 | //you may not use this file except in compliance with the License. 5 | //You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | //------------------------------------------------------------------------------ 15 | // 16 | // Testing utilities for FSC 17 | // 18 | // Author: Skal (pascal.massimino@gmail.com) 19 | 20 | #include "./fsc_utils.h" 21 | 22 | //------------------------------------------------------------------------------ 23 | // Random 24 | 25 | void FSCInitRandom(FSCRandom* const rg) { 26 | rg->seed_ = 0x81231f3u; 27 | } 28 | 29 | int FSCRandomBits(FSCRandom* const rg, int num_bits) { 30 | const int val = rand_r(&rg->seed_) >> 6; 31 | return val & ((1 << num_bits) - 1); 32 | } 33 | 34 | //------------------------------------------------------------------------------ 35 | // Timing 36 | 37 | double GetElapsed(MyClock* new_clock, MyClock* old_clock) { 38 | gettimeofday(new_clock, NULL); 39 | if (old_clock != NULL) { 40 | const double elapsed = 41 | new_clock->tv_sec - old_clock->tv_sec 42 | + (new_clock->tv_usec - old_clock->tv_usec) / 1000000.0; 43 | *old_clock = *new_clock; 44 | return elapsed; 45 | } else { 46 | return 0.; 47 | } 48 | } 49 | 50 | //------------------------------------------------------------------------------ 51 | // Misc 52 | 53 | double GetEntropy(const uint8_t* in, size_t size) { 54 | double S = 0.; 55 | uint32_t counts[MAX_SYMBOLS]; 56 | FSCCountSymbols(in, size, counts); 57 | uint32_t total = 0; 58 | int i; 59 | for (i = 0; i < MAX_SYMBOLS; ++i) { 60 | total += counts[i]; 61 | } 62 | for (i = 0; i < MAX_SYMBOLS; ++i) { 63 | if (counts[i]) { 64 | const double p = 1.f * counts[i] / total; 65 | S += -p * log(p); 66 | } 67 | } 68 | S /= 8. * log(2.); 69 | return S; 70 | } 71 | 72 | int DrawSymbol(const uint64_t cumul[256], int max_symbol, 73 | int total, int nb_bits, FSCRandom* rg) { 74 | int p; 75 | do { 76 | p = FSCRandomBits(rg, nb_bits); 77 | } while (p > total); 78 | 79 | int symbol_l = 0, symbol_u = max_symbol; 80 | while (symbol_l + 1 < symbol_u) { 81 | const int mid = (symbol_u + symbol_l) >> 1; 82 | const int c = cumul[mid]; 83 | if (p == c) return mid; 84 | else if (p < c) symbol_u = mid; 85 | else symbol_l = mid; 86 | } 87 | return symbol_l; 88 | } 89 | 90 | //------------------------------------------------------------------------------ 91 | 92 | int FSCParseCodingMethodOpt(const char opt[], FSCCodingMethod* const method) { 93 | if (!strcmp(opt, "-buck")) { 94 | *method = CODING_METHOD_BUCKET; 95 | } else if (!strcmp(opt, "-rev")) { 96 | *method = CODING_METHOD_REVERSE; 97 | } else if (!strcmp(opt, "-mod")) { 98 | *method = CODING_METHOD_MODULO; 99 | } else if (!strcmp(opt, "-pack")) { 100 | *method = CODING_METHOD_PACK; 101 | } else if (!strcmp(opt, "-w")) { 102 | *method = CODING_METHOD_16B; 103 | } else if (!strcmp(opt, "-w2")) { 104 | *method = CODING_METHOD_16B_2X; 105 | } else if (!strcmp(opt, "-w4")) { 106 | *method = CODING_METHOD_16B_4X; 107 | } else if (!strcmp(opt, "-a")) { 108 | *method = CODING_METHOD_16B_ALIAS; 109 | } else if (!strcmp(opt, "-a2")) { 110 | *method = CODING_METHOD_16B_ALIAS_2X; 111 | } else { 112 | return 0; 113 | } 114 | return 1; 115 | } 116 | 117 | void FSCPrintCodingOptions() { 118 | printf("-w : use word-based coding.\n"); 119 | printf("-w2 : use word-based coding 2x interleave.\n"); 120 | printf("-w4 : use word-based coding 4x interleave.\n"); 121 | printf("-a : use word-based coding + alias.\n"); 122 | printf("-a2 : use word-based coding + alias + interleave.\n"); 123 | printf("-mod : use modulo spread function\n"); 124 | printf("-rev : use reverse spread function\n"); 125 | printf("-pack : use pack spread function\n"); 126 | printf("-buck : use bucket spread function\n"); 127 | } 128 | -------------------------------------------------------------------------------- /fsc.h: -------------------------------------------------------------------------------- 1 | //Copyright 2014 The FSC Authors. All Rights Reserved. 2 | // 3 | //Licensed under the Apache License, Version 2.0 (the "License"); 4 | //you may not use this file except in compliance with the License. 5 | //You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | //------------------------------------------------------------------------------ 15 | // 16 | // Finite State Coder. Main header. 17 | // 18 | // based on Jarek Duda's paper: http://arxiv.org/pdf/1311.2540v1.pdf 19 | // 20 | // Author: Skal (pascal.massimino@gmail.com) 21 | 22 | #ifndef FSC_FSC_H_ 23 | #define FSC_FSC_H_ 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | // Coder parameters 30 | #define BLOCK_SIZE 8192 // sliding window size (must be >= 256) 31 | #define MAX_SYMBOLS 256 // byte-based 32 | #define LOG_TAB_SIZE 14 // max internal precision (must be <= 14) 33 | #define MAX_LOG_TAB_SIZE 16 // max precision for word-based coding 34 | #define CRYPTO_KEY 0 35 | // disabled for now (so we investigate core algo): 36 | // #define CRYPTO_KEY 0x3fdc 37 | 38 | // Compression methods 39 | typedef enum { 40 | CODING_METHOD_BUCKET = 0, 41 | CODING_METHOD_REVERSE, 42 | CODING_METHOD_MODULO, 43 | CODING_METHOD_PACK, 44 | 45 | CODING_METHOD_16B, 46 | CODING_METHOD_16B_2X, 47 | CODING_METHOD_16B_ALIAS, 48 | CODING_METHOD_16B_ALIAS_2X, 49 | 50 | CODING_METHOD_16B_4X, // default 51 | 52 | CODING_METHOD_UNIQUE, // internal, do not use directly 53 | 54 | CODING_METHOD_LAST, 55 | CODING_METHOD_DEFAULT = CODING_METHOD_16B_4X 56 | } FSCCodingMethod; 57 | 58 | typedef uint32_t FSCStateW; 59 | typedef uint16_t FSCType; // storage type 60 | #define FSC_BITS 16 61 | #define FSC_MAX ((FSCStateW)1 << FSC_BITS) 62 | #define FSC_BITS_MASK (((FSCStateW)1 << FSC_BITS) - 1) 63 | 64 | // derived params 65 | #define TAB_SIZE (1U << LOG_TAB_SIZE) 66 | #define TAB_MASK (TAB_SIZE - 1) 67 | #define MAX_TAB_SIZE (1 << MAX_LOG_TAB_SIZE) 68 | 69 | // Header parameter 70 | #define TAB_HDR_BITS 6 71 | #define HDR_SYMBOL_LIMIT 20 72 | 73 | //------------------------------------------------------------------------------ 74 | // Decoding 75 | 76 | // Return 0 upon error. 77 | // Result is in *out, must deallocated using free() 78 | int FSCDecode(const uint8_t* in, size_t in_size, uint8_t** out, size_t* out_size); 79 | 80 | // non-canned API: 81 | typedef struct FSCDecoder FSCDecoder; 82 | FSCDecoder* FSCInit(const uint8_t* input, size_t len); 83 | int FSCIsOk(FSCDecoder* dec); 84 | int FSCDecompress(FSCDecoder* dec, uint8_t** out, size_t* size); 85 | void FSCDelete(FSCDecoder* dec); 86 | 87 | //------------------------------------------------------------------------------ 88 | // Encoding 89 | 90 | // Return 0 upon error. 91 | // Result is in *out, must deallocated using free() 92 | int FSCEncode(const uint8_t* in, size_t in_size, 93 | uint8_t** out, size_t* out_size, 94 | int log_tab_size, FSCCodingMethod method); 95 | 96 | // utils 97 | void FSCCountSymbols(const uint8_t* in, size_t in_size, 98 | uint32_t counts[MAX_SYMBOLS]); 99 | int FSCNormalizeCounts(uint32_t counts[MAX_SYMBOLS], int max_symbol, 100 | int log_tab_size); 101 | 102 | // 103 | //------------------------------------------------------------------------------ 104 | // mapping function (common to enc/dec) 105 | 106 | // returns 0 in case of error 107 | typedef int (*FSCBuildSpreadTableFunc)(int max_symbol, const uint32_t counts[], 108 | int log_tab_size, uint8_t symbols[]); 109 | 110 | extern int BuildSpreadTableBucket(int max_symbol, const uint32_t counts[], 111 | int log_tab_size, uint8_t symbols[]); 112 | extern int BuildSpreadTableReverse(int max_symbol, const uint32_t counts[], 113 | int log_tab_size, uint8_t symbols[]); 114 | extern int BuildSpreadTableModulo(int max_symbol, const uint32_t counts[], 115 | int log_tab_size, uint8_t symbols[]); 116 | extern int BuildSpreadTablePack(int max_symbol, const uint32_t counts[], 117 | int log_tab_size, uint8_t symbols[]); 118 | 119 | #endif // FSC_FSC_H_ 120 | -------------------------------------------------------------------------------- /divide.h: -------------------------------------------------------------------------------- 1 | //Copyright 2014 The FSC Authors. All Rights Reserved. 2 | // 3 | //Licensed under the Apache License, Version 2.0 (the "License"); 4 | //you may not use this file except in compliance with the License. 5 | //You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | //------------------------------------------------------------------------------ 15 | // 16 | // Everything needed to implement divide-by-multiply 17 | // 18 | // You need to define RECIPROCAL_BITS before including this header, with value 19 | // being either 32, 16 or 0 (=use float) 20 | // This header will define the following: 21 | // * inv_t type (to store the data needed for the reciprocal calculation) 22 | // * void FSCInitDivide(ANSProba p, inv_t* div): 23 | // to be called to initialize the data to compute 1/p 24 | // * ANSStateW FSCDivide(ANSStateW x, inv_t div) 25 | // to perform x/p (corner case: returns 'x' for p==0). 26 | // 27 | // A bit of theory from this paper (mentioned by Ryg): 28 | // Alverson 1991: "Integer Division Using Reciprocals" 29 | // http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.33.1710 30 | // 31 | // If x and p are unsigned words represented by FIX bits (say: FIX=32 e.g.), 32 | // then you can *exactly* compute x/p (floor-rounded) using: 33 | // k = ceil(log2(p)) <- number of bits needed to represent 'p' 34 | // s = FIX + k 35 | // a = ceil((1 << s) / y) = ((1 << s) - 1) / p + 1 36 | // x/p = (a * x) >> s = ((a * x) >> FIX) >> k 37 | // So, one need to store 'a' using FIX + 1 bits, and to be able to perform 38 | // the multiply 'a * x' with (FIX + 1) x FIX bits operands. 39 | // Fortunately we can decompose the multiply in two steps: 40 | // b = (a * x) >> FIX (usually easier with specialized instruction), 41 | // followed by 'b >> k'. 42 | // Also, 'a' always have a leading '1' at bit-position FIX. We don't need to 43 | // store it and just store a - (1 << FIX), which fits in 'FIX' bits exactly. 44 | // Then the final multiply can be rewritten as: 45 | // b = (a * x) >> FIX // <- FIX*FIX multiply, keeping the upper FIX bits 46 | // x/p = (b + x) >> k 47 | // By using this trick, one can use the full word range for probabilities 'p'. 48 | // Note that the last 'b+x' addition must take care of potential overflow by 49 | // one bit. Beware! 50 | 51 | #include "./fsc_utils.h" 52 | 53 | #ifndef HAVE_CEIL_LOG2 54 | #define HAVE_CEIL_LOG2 55 | static inline int CeilLog2(uint64_t p) { 56 | int s = 0; 57 | while (p > (1ull << s)) ++s; 58 | return s; 59 | } 60 | #endif // HAVE_CEIL_LOG2 61 | 62 | #if (RECIPROCAL_BITS == 32) 63 | 64 | typedef struct { 65 | uint64_t mult_; 66 | int shift_; 67 | } inv_t; 68 | 69 | #define DIV_FIX 64 70 | static inline void FSCInitDivide(ANSProba p, inv_t* div) { 71 | if (p > 0) { 72 | const int s = CeilLog2(p); 73 | assert(s <= PROBA_BITS); 74 | const uint64_t base = 1ull << (s + DIV_FIX - 32); 75 | const uint64_t hi = (base / p) << 32; // remove leading '1' bit 76 | const uint64_t lo = (((base % p) << 32) + p - 1) / p; 77 | div->mult_ = hi | lo; 78 | div->shift_ = s; 79 | } else { 80 | div->mult_ = 0; 81 | div->shift_ = 0; 82 | } 83 | } 84 | 85 | static inline ANSStateW FSCDivide(ANSStateW x, inv_t div) { 86 | const ANSStateW tmp = ((unsigned __int128)x * div.mult_) >> DIV_FIX; 87 | return ((unsigned __int128)tmp + x) >> div.shift_; 88 | } 89 | 90 | #undef DIV_FIX 91 | 92 | #elif (RECIPROCAL_BITS == 16) 93 | 94 | typedef struct { 95 | uint32_t mult_; 96 | int shift_; 97 | } inv_t; 98 | 99 | #define DIV_FIX 32 100 | static inline void FSCInitDivide(ANSProba p, inv_t* div) { 101 | if (p > 0) { 102 | const int s = CeilLog2(p); 103 | assert(s <= PROBA_BITS); 104 | const uint64_t base = (1ull << (DIV_FIX + s)) - 1; 105 | div->mult_ = (base / p + 1) & 0xfffffffful; // remove leading '1' bit 106 | div->shift_ = s; 107 | } else { 108 | div->mult_ = 0; 109 | div->shift_ = 0; 110 | } 111 | } 112 | 113 | static inline ANSStateW FSCDivide(ANSStateW x, inv_t div) { 114 | const uint32_t tmp = ((uint64_t)x * div.mult_) >> DIV_FIX; 115 | return ((uint64_t)tmp + x) >> div.shift_; // (should be add-shift-with-carry-overflow) 116 | } 117 | 118 | #undef DIV_FIX 119 | 120 | #elif (RECIPROCAL_BITS == 0) 121 | 122 | typedef ANSProba inv_t; 123 | 124 | static inline void FSCInitDivide(ANSProba p, inv_t* div) { *div = p; } 125 | 126 | static inline ANSStateW FSCDivide(ANSStateW x, inv_t div) { 127 | return x / div; 128 | } 129 | 130 | #endif 131 | -------------------------------------------------------------------------------- /fsc.c: -------------------------------------------------------------------------------- 1 | //Copyright 2014 The FSC Authors. All Rights Reserved. 2 | // 3 | //Licensed under the Apache License, Version 2.0 (the "License"); 4 | //you may not use this file except in compliance with the License. 5 | //You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | //------------------------------------------------------------------------------ 15 | // 16 | // Finite State Coder compress / decompress utility 17 | // 18 | // fsc [options] < in_file > out_file 19 | // 20 | // Example: 21 | // fsc < foo > foo.compressed 22 | // fsc -d < foo.compressed > bar 23 | // diff foo bar 24 | // 25 | // Basically it's just a wrapper around calls to FSCEncode() / FSCDecode() 26 | // 27 | // Author: Skal (pascal.massimino@gmail.com) 28 | 29 | #include "./fsc_utils.h" 30 | 31 | //------------------------------------------------------------------------------ 32 | 33 | static int ExperimentalSpread(int max_symbol, const uint32_t counts[], 34 | int log_tab_size, uint8_t symbols[]) { 35 | return BuildSpreadTableBucket(max_symbol, counts, log_tab_size, symbols); 36 | } 37 | 38 | static void Help() { 39 | printf("usage: ./fsc [options] < in_file > out_file\n"); 40 | printf("options:\n"); 41 | printf("-c : compression mode (default)\n"); 42 | printf("-d : decompression mode\n"); 43 | printf("-s : don't emit output, just print stats\n"); 44 | printf("-l : change log-table-size (in [2..14], default 12)\n"); 45 | FSCPrintCodingOptions(); 46 | printf("-h : this help\n"); 47 | exit(0); 48 | } 49 | 50 | int main(int argc, const char* argv[]) { 51 | int log_tab_size = 12; 52 | int compress = 1; 53 | FSCCodingMethod method = CODING_METHOD_DEFAULT; 54 | int stats_only = 0; 55 | int ok = 0; 56 | int c; 57 | 58 | for (c = 1; c < argc; ++c) { 59 | if (!strcmp(argv[c], "-l") && c + 1 < argc) { 60 | log_tab_size = atoi(argv[++c]); 61 | if (log_tab_size > LOG_TAB_SIZE) log_tab_size = LOG_TAB_SIZE; 62 | else if (log_tab_size < 2) log_tab_size = 2; 63 | } else if (FSCParseCodingMethodOpt(argv[c], &method)) { 64 | continue; 65 | } else if (!strcmp(argv[c], "-m") && c + 1 < argc) { 66 | method = (FSCCodingMethod)atoi(argv[++c]); 67 | } else if (!strcmp(argv[c], "-s")) { 68 | stats_only = 1; 69 | } else if (!strcmp(argv[c], "-c")) { 70 | compress = 1; 71 | } else if (!strcmp(argv[c], "-d")) { 72 | compress = 0; 73 | } else if (!strcmp(argv[c], "-h")) { 74 | Help(); 75 | } 76 | } 77 | 78 | uint8_t* out = NULL; 79 | size_t out_size = 0; 80 | uint8_t* in = NULL; 81 | size_t in_size = 0; 82 | 83 | // Read input 84 | fseek(stdin, 0L, SEEK_END); 85 | in_size = ftell(stdin); 86 | fseek(stdin, 0L, SEEK_SET); 87 | if (in_size == (size_t)-1) { 88 | fprintf(stderr, "Missing/erroneous input!\n"); 89 | goto End; 90 | } 91 | in = (uint8_t*)malloc(in_size * sizeof(*in)); 92 | if (in == NULL) { 93 | fprintf(stderr, "Malloc(%lu) failed!\n", in_size); 94 | exit(-1); 95 | } 96 | ok = (fread(in, in_size, 1, stdin) == 1); 97 | if (!ok) { 98 | fprintf(stderr, "Error reading from stdin!\n"); 99 | goto End; 100 | } 101 | 102 | // Compress or decompress. 103 | MyClock start, tmp; 104 | if (compress) { // encoding 105 | GetElapsed(&start, NULL); 106 | ok = FSCEncode(in, in_size, &out, &out_size, log_tab_size, method); 107 | if (!ok) { 108 | fprintf(stderr, "ERROR while encoding!\n"); 109 | goto End; 110 | } 111 | 112 | if (stats_only) { 113 | const double elapsed = GetElapsed(&tmp, &start); 114 | const double entropy = GetEntropy(in, in_size); 115 | const double MS = 1.e-6 * in_size; 116 | const double reduction = 1. * out_size / in_size; 117 | printf("Enc time: %.3f sec [%.2lf MS/s] (%ld bytes out, %ld in).\n", 118 | elapsed, MS / elapsed, out_size, in_size); 119 | printf("Entropy: %.4lf vs expected %.4lf " 120 | "(off by %.5lf bit/symbol [%.3lf%%])\n", 121 | reduction, entropy, reduction - entropy, 122 | 100. * (reduction - entropy) / entropy); 123 | } 124 | } else { // decoding 125 | GetElapsed(&start, NULL); 126 | ok = FSCDecode(in, in_size, &out, &out_size); 127 | if (!ok) { 128 | fprintf(stderr, "ERROR while decoding!\n"); 129 | goto End; 130 | } 131 | if (stats_only) { 132 | const double elapsed = GetElapsed(&tmp, &start); 133 | const double MS = 1.e-6 * out_size; 134 | printf("Dec time: %.3f sec [%.2lf MS/s].\n", elapsed, MS / elapsed); 135 | } 136 | } 137 | 138 | if (!stats_only) { 139 | ok = (fwrite(out, out_size, 1, stdout) == 1); 140 | if (!ok) { 141 | fprintf(stderr, "Error writing to stdout!\n"); 142 | goto End; 143 | } 144 | } 145 | 146 | End: 147 | free(in); 148 | free(out); 149 | return !ok; 150 | } 151 | -------------------------------------------------------------------------------- /alias.c: -------------------------------------------------------------------------------- 1 | //Copyright 2014 The FSC Authors. All Rights Reserved. 2 | // 3 | //Licensed under the Apache License, Version 2.0 (the "License"); 4 | //you may not use this file except in compliance with the License. 5 | //You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | //------------------------------------------------------------------------------ 15 | // 16 | // Tools for implementing Vose's alias sampling method. 17 | // 18 | // Author: Skal (pascal.massimino@gmail.com) 19 | 20 | #include "./alias.h" 21 | 22 | #include 23 | #include 24 | 25 | int AliasInit(AliasTable t, const uint32_t counts[], int max_symbol) { 26 | // partition: small symbols at bottom, larges on top 27 | uint8_t symbols[ALIAS_MAX_SYMBOLS]; 28 | int l = ALIAS_MAX_SYMBOLS, s = 0; 29 | int i; 30 | alias_tab_t proba[ALIAS_MAX_SYMBOLS]; 31 | const uint32_t cut = MAX_TAB_SIZE >> LOG2_MAX_SYMBOLS; // 1/n 32 | uint32_t total = 0; 33 | assert((MAX_TAB_SIZE % ALIAS_MAX_SYMBOLS) == 0); 34 | if (max_symbol > ALIAS_MAX_SYMBOLS || max_symbol <= 0) return 0; 35 | 36 | for (i = 0; i < ALIAS_MAX_SYMBOLS; ++i) { 37 | proba[i] = (i < max_symbol) ? counts[i] : 0; 38 | total += proba[i]; 39 | if (proba[i] >= cut) { 40 | symbols[--l] = i; 41 | } else { 42 | symbols[s++] = i; 43 | } 44 | assert(s <= l); 45 | } 46 | assert(s == l); 47 | if (total != MAX_TAB_SIZE) return 0; // unnormalized 48 | 49 | while (s > 0) { 50 | const int S = symbols[--s]; 51 | const int L = symbols[l++]; 52 | assert(proba[S] < cut); // check that S is a small one 53 | t[S].cut_ = proba[S] + S * cut; 54 | t[S].other_ = L; 55 | proba[L] -= cut - proba[S]; // decrease large proba 56 | if (proba[L] >= cut) { 57 | --l; // large symbol stays large. Reuse the slot. 58 | } else { 59 | symbols[s++] = L; // large becomes small 60 | } 61 | } 62 | while (l < ALIAS_MAX_SYMBOLS) { 63 | const int L = symbols[l++]; 64 | t[L].other_ = L; 65 | t[L].cut_ = cut + L * cut; // large symbols with max proba 66 | } 67 | 68 | // Accumulate counts and compute the start_. 69 | uint32_t c[MAX_SYMBOLS] = { 0 }; 70 | for (s = 0; s < MAX_SYMBOLS; ++s) { 71 | const int other = t[s].other_; 72 | const int count_s = t[s].cut_ - s * cut; 73 | const int count_other = cut - count_s; // complement to 'cut' 74 | t[s].start_ = s * cut - c[s]; 75 | t[s].other_start_ = s * cut + count_s - c[other]; 76 | c[s] += count_s; 77 | c[other] += count_other; 78 | } 79 | return AliasVerifyTable(t, counts, max_symbol); 80 | } 81 | 82 | //------------------------------------------------------------------------------ 83 | 84 | void AliasGenerateMap(const AliasTable t, alias_t map[MAX_TAB_SIZE]) { 85 | int r; 86 | for (r = 0; r < MAX_TAB_SIZE; ++r) { 87 | uint32_t dummy; 88 | map[r] = AliasSearchSymbol(t, r, &dummy); 89 | } 90 | } 91 | 92 | int AliasSpreadMap(int max_symbol, const uint32_t counts[], 93 | int log_tab_size, uint8_t symbols[]) { 94 | AliasTable t; 95 | int i; 96 | assert(log_tab_size == MAX_LOG_TAB_SIZE); // TODO(skal): support more sizes! 97 | if (!AliasInit(t, counts, max_symbol)) return 0; 98 | for (i = 0; i < (1 << log_tab_size); ++i) { 99 | uint32_t dummy; 100 | symbols[i] = AliasSearchSymbol(t, i, &dummy); 101 | } 102 | return 1; 103 | } 104 | 105 | int AliasBuildEncMap(const uint32_t counts[], int max_symbol, 106 | uint16_t map[MAX_TAB_SIZE]) { 107 | AliasTable t; 108 | uint32_t r; 109 | uint32_t starts[MAX_SYMBOLS]; 110 | uint32_t start = 0; 111 | if (!AliasInit(t, counts, max_symbol)) return 0; 112 | 113 | for (r = 0; r < max_symbol; ++r) { 114 | starts[r] = start; 115 | start += counts[r]; 116 | } 117 | if (start != MAX_TAB_SIZE) return 0; 118 | 119 | for (r = 0; r < MAX_TAB_SIZE; ++r) { 120 | uint32_t rank; 121 | const uint32_t s = AliasSearchSymbol(t, r, &rank); 122 | map[rank + starts[s]] = r; 123 | } 124 | return 1; 125 | } 126 | 127 | //------------------------------------------------------------------------------ 128 | 129 | int AliasVerifyTable(const AliasTable t, 130 | const uint32_t counts[], int max_symbol) { 131 | int error = 0; 132 | #ifdef DEBUG_ALIAS 133 | int i, s; 134 | uint32_t c[MAX_SYMBOLS] = { 0 }; 135 | const float norm = 100.f / MAX_TAB_SIZE; 136 | { 137 | uint32_t r; 138 | alias_t map[MAX_TAB_SIZE]; 139 | AliasGenerateMap(t, map); 140 | for (r = 0; r < MAX_TAB_SIZE; ++r) ++c[map[r]]; 141 | } 142 | for (s = 0; s < max_symbol; ++s) { 143 | error += abs(c[s] - counts[s]); 144 | } 145 | for (; s < MAX_SYMBOLS; ++s) { 146 | error += (c[s] != 0); 147 | } 148 | 149 | memset(c, 0, sizeof(c)); 150 | for (i = 0; i < MAX_TAB_SIZE; ++i) { 151 | uint32_t rank; 152 | const int s = AliasSearchSymbol(t, i, &rank); 153 | const int count = c[s]++; 154 | if (rank != count) { 155 | const int r = i >> (MAX_LOG_TAB_SIZE - LOG2_MAX_SYMBOLS); 156 | const int use_alias = (i >= t[r].cut_); 157 | printf("%c s=%d%c %d / %d r=%d bucket=%d offset=%d | %d\n", 158 | " !"[rank != count], s, " *"[use_alias], rank, count, i, r, 159 | t[r].start_, t[r].other_start_); 160 | error += (rank != count); 161 | } 162 | } 163 | // printf("Error: %d\n", error); 164 | #else 165 | (void)t; 166 | (void)counts; 167 | (void)max_symbol; 168 | #endif 169 | return (error == 0); 170 | } 171 | 172 | //------------------------------------------------------------------------------ 173 | -------------------------------------------------------------------------------- /bit_test.c: -------------------------------------------------------------------------------- 1 | //Copyright 2014 The FSC Authors. All Rights Reserved. 2 | // 3 | //Licensed under the Apache License, Version 2.0 (the "License"); 4 | //you may not use this file except in compliance with the License. 5 | //You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | //------------------------------------------------------------------------------ 15 | // 16 | // Test proggy for FSC compression 17 | // 18 | // Compare various encoding stategy (bit-by-bit vs byte-by-byte) 19 | 20 | #include "./fsc_utils.h" 21 | 22 | static void Generate(uint8_t* in, size_t size, int p, uint8_t* in8, int N8, 23 | FSCRandom* rg) { 24 | uint8_t syms[255]; 25 | int i, k; 26 | for (i = 0; i < 255; ++i) { 27 | syms[i] = (i < p); 28 | } 29 | for (i = 0; i < size; ++i) { 30 | int k; 31 | do { 32 | k = FSCRandomBits(rg, 8); 33 | } while (k >= 255); 34 | in[i] = syms[k]; 35 | } 36 | // Pack 8 bits together 37 | memset(in8, 0, N8 * sizeof(*in8)); 38 | for (k = 0; k < 8; ++k) { 39 | for (i = k; i < size; i += 8) { 40 | const int bit = in[i]; 41 | if (k == 0) in8[i >> 3] = bit; 42 | else in8[i >> 3] |= bit << k; 43 | } 44 | } 45 | } 46 | 47 | static int CheckErrors(size_t N, const uint8_t* out, const uint8_t* base, 48 | const char* name) { 49 | int nb_errors = 0; 50 | int i; 51 | for (i = 0; i < N; ++i) { 52 | nb_errors += (out[i] != base[i]); 53 | } 54 | if (nb_errors) { 55 | printf("%s Decoding errors! (%d)\n", name, nb_errors); 56 | for (i = 0; i < (N > 40 ? 40 : N); ++i) { 57 | printf("[%d/%d]%c", out[i], base[i], " *"[out[i] != base[i]]); 58 | } 59 | printf("\n"); 60 | for (i = 0; i < N; ++i) { 61 | printf("%c", ".*"[out[i] != base[i]]); 62 | if ((i & 31) == 31) printf("\n"); 63 | } 64 | printf("\n"); 65 | } 66 | return nb_errors; 67 | } 68 | 69 | //------------------------------------------------------------------------------ 70 | 71 | void Help() { 72 | printf("usage: ./bit_test [options] [size]\n"); 73 | printf("-l : max table size bits for bit-by-bit\n"); 74 | printf("-l8 : max table size bits for byte-by-byte\n"); 75 | printf("-p : try only one proba value\n"); 76 | printf("-fsc : skip FSC\n"); 77 | printf("-fsc8 : skip FSC8\n"); 78 | FSCPrintCodingOptions(); 79 | printf("-h : this help\n"); 80 | exit(0); 81 | } 82 | 83 | int main(int argc, const char* argv[]) { 84 | int N = 10000; 85 | int log_tab_size = 7; 86 | int log_tab_size_8 = LOG_TAB_SIZE - 1; 87 | int nb_errors = 0; 88 | int pmin = 0, pmax = 255; 89 | FSCCodingMethod method = CODING_METHOD_DEFAULT; 90 | int skip_FSC = 0; 91 | int skip_FSC8 = 0; 92 | int c; 93 | 94 | for (c = 1; c < argc; ++c) { 95 | if (!strcmp(argv[c], "-h")) { 96 | Help(); 97 | } else if (FSCParseCodingMethodOpt(argv[c], &method)) { 98 | continue; 99 | } else if (!strcmp(argv[c], "-m") && c + 1 < argc) { 100 | method = (FSCCodingMethod)atoi(argv[++c]); 101 | } else if (!strcmp(argv[c], "-fsc")) { 102 | skip_FSC = 1; 103 | } else if (!strcmp(argv[c], "-fsc8")) { 104 | skip_FSC8 = 1; 105 | } else if (!strcmp(argv[c], "-l") && c + 1 < argc) { 106 | log_tab_size = atoi(argv[++c]); 107 | } else if (!strcmp(argv[c], "-p") && c + 1 < argc) { 108 | pmin = pmax = atoi(argv[++c]); 109 | } else if (!strcmp(argv[c], "-l8") && c + 1 < argc) { 110 | log_tab_size_8 = atoi(argv[++c]); 111 | } else { 112 | N = atoi(argv[c]); 113 | if (N <= 2) N = 2; 114 | } 115 | } 116 | const int N8 = (N + 7) >> 3; 117 | const double MS = 1.e-6 * N / 8.; 118 | 119 | uint8_t* const base = (uint8_t*)malloc((N8 + N) * sizeof(*base)); 120 | if (base == NULL) return 0; 121 | uint8_t* const base8 = base + N; 122 | uint8_t* out = NULL; 123 | size_t out_size = 0; 124 | uint8_t* out8 = NULL; 125 | size_t out8_size = 0; 126 | 127 | FSCRandom r; 128 | FSCInitRandom(&r); 129 | 130 | int p; 131 | for (p = pmin; p <= pmax && nb_errors == 0; ++p) { 132 | const double P = p / 255.; 133 | MyClock start, tmp; 134 | double S_FSC = 0., S_FSC8 = 0.; 135 | double t_FSC_enc = 0., t_FSC_dec = 0.; 136 | double t_FSC8_enc = 0., t_FSC8_dec = 0.; 137 | int i; 138 | 139 | Generate(base, N, p, base8, N8, &r); 140 | const double S0 = xlogx(P) + xlogx(1. - P); 141 | const double S1 = GetEntropy(base, N); 142 | uint8_t* bits = NULL; 143 | size_t bits_size = 0; 144 | 145 | if (!skip_FSC) { 146 | GetElapsed(&start, NULL); 147 | nb_errors = !FSCEncode(base, N, &bits, &bits_size, log_tab_size, method); 148 | if (nb_errors) { 149 | printf("Encoding error!\n"); 150 | goto end; 151 | } 152 | S_FSC = 8.0 * bits_size / N; 153 | t_FSC_enc = MS / GetElapsed(&tmp, &start); 154 | 155 | GetElapsed(&start, NULL); 156 | 157 | nb_errors = !FSCDecode(bits, bits_size, &out, &out_size); 158 | t_FSC_dec = MS / GetElapsed(&tmp, &start); 159 | nb_errors += (out_size != N); 160 | nb_errors += CheckErrors(out_size, out, base, "FSC"); 161 | free(bits); 162 | } 163 | 164 | if (!skip_FSC8) { 165 | GetElapsed(&start, NULL); 166 | nb_errors = 167 | !FSCEncode(base8, N8, &bits, &bits_size, log_tab_size_8, method); 168 | if (nb_errors) { 169 | printf("FSC8 Encoding error!\n"); 170 | goto end; 171 | } 172 | S_FSC8 = 8.0 * bits_size / N; 173 | t_FSC8_enc = MS / GetElapsed(&tmp, &start); 174 | 175 | GetElapsed(&start, NULL); 176 | nb_errors = !FSCDecode(bits, bits_size, &out8, &out8_size); 177 | t_FSC8_dec = MS / GetElapsed(&tmp, &start); 178 | nb_errors += (out8_size != N8); 179 | nb_errors += CheckErrors(N8, out8, base8, "FSC8"); 180 | free(bits); 181 | } 182 | 183 | printf("%.7lf %.7lf %.7lf %.7lf %.7lf " 184 | " %3.1lf %3.1lf " 185 | " %3.1lf %3.1lf\n", 186 | P, S_FSC, S_FSC8, S0, S1, 187 | t_FSC_enc, t_FSC_dec, 188 | t_FSC8_enc, t_FSC8_dec); 189 | } 190 | printf("# 1 Proba|2 S_FSC |3 S_FSC8 |4 measure|5 theory " 191 | "|6 FSC enc|7 FSC dec" 192 | "|8 FSC8 c|9 FSC8 d\n"); 193 | 194 | end: 195 | free(base); 196 | free(out); 197 | free(out8); 198 | return (nb_errors != 0); 199 | } 200 | -------------------------------------------------------------------------------- /bits.c: -------------------------------------------------------------------------------- 1 | //Copyright 2014 The FSC Authors. All Rights Reserved. 2 | // 3 | //Licensed under the Apache License, Version 2.0 (the "License"); 4 | //you may not use this file except in compliance with the License. 5 | //You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | //------------------------------------------------------------------------------ 15 | // 16 | // Bit reader/writer 17 | // 18 | // Author: Skal (pascal.massimino@gmail.com) 19 | 20 | #include "./bits.h" 21 | #include // for memcpy() 22 | 23 | #define MAX_BITS 16 // max number of bit we have to read or write 24 | 25 | //------------------------------------------------------------------------------ 26 | // endian-ness 27 | 28 | #if defined(__APPLE__) 29 | #include 30 | #define htole32 OSSwapHostToLittleInt32 31 | #define htole16 OSSwapHostToLittleInt16 32 | #define le16toh OSSwapLittleToHostInt16 33 | #define le32toh OSSwapLittleToHostInt32 34 | #else 35 | #include 36 | #endif 37 | 38 | #if (RBYTES == 4) 39 | #define RSWAP le32toh 40 | #else 41 | #define RSWAP le16toh 42 | #endif 43 | 44 | //------------------------------------------------------------------------------ 45 | // BitReader 46 | 47 | #define LBITS (sizeof(fsc_val_t) * 8) 48 | 49 | void FSCInitBitReader(FSCBitReader* const br, 50 | const uint8_t* const start, size_t length) { 51 | size_t i; 52 | assert(br != NULL); 53 | assert(start != NULL); 54 | 55 | br->buf_ = start; 56 | br->end_ = start + length; 57 | br->bits_ = 0; 58 | br->bit_pos_ = 0; 59 | br->eof_ = 0; 60 | for (i = 0; i < sizeof(br->bits_) && i < length; ++i) { 61 | br->bits_ |= ((fsc_val_t)(*br->buf_++)) << (8 * i); 62 | } 63 | } 64 | 65 | void FSCSetReadBufferPos(FSCBitReader* const br, const uint8_t* buf) { 66 | br->buf_ = buf; 67 | br->bits_ = 0; 68 | br->bit_pos_ = sizeof(br->bits_) * 8; 69 | br->eof_ = (buf > br->end_); 70 | } 71 | 72 | const uint8_t* FSCGetBytePos(FSCBitReader* const br) { 73 | return br->buf_; 74 | } 75 | const uint8_t* FSCGetByteEnd(FSCBitReader* const br) { 76 | return br->end_; 77 | } 78 | const uint8_t* FSCBitAlign(FSCBitReader* const br) { 79 | br->buf_ -= (LBITS - br->bit_pos_) >> 3; 80 | br->bit_pos_ = 0; 81 | br->bits_ = 0; 82 | br->eof_ |= (br->buf_ <= br->end_); 83 | return br->buf_; 84 | } 85 | 86 | void FSCDoFillBitWindow(FSCBitReader* const br) { 87 | if (br->buf_ + sizeof(br->bits_) < br->end_) { 88 | // read several bytes at a time without bswap 89 | br->bits_ >>= RBITS; 90 | br->bit_pos_ -= RBITS; 91 | br->bits_ |= (fsc_val_t)RSWAP(*(const fsc_val_t*)(br->buf_)) << (LBITS - RBITS); 92 | br->buf_ += RBYTES; 93 | return; 94 | } else { // finish with bytes 95 | while (br->bit_pos_ >= 8 && br->buf_ < br->end_) { 96 | br->bit_pos_ -= 8; 97 | br->bits_ >>= 8; 98 | br->bits_ |= ((fsc_val_t)(*br->buf_++)) << (LBITS - 8); 99 | } 100 | br->eof_ = (br->buf_ == br->end_) && (br->bit_pos_ >= LBITS); 101 | } 102 | } 103 | 104 | uint32_t FSCReadBits(FSCBitReader* const br, int nb) { 105 | assert(nb > 0 && nb <= RBITS); 106 | FSCFillBitWindow(br); 107 | const uint32_t ret = 108 | (uint32_t)(br->bits_ >> br->bit_pos_) & ((1 << nb) - 1); 109 | br->bit_pos_ += nb; 110 | return ret; 111 | } 112 | 113 | //------------------------------------------------------------------------------ 114 | // BitWriter 115 | 116 | static int SetSize(FSCBitWriter* const bw, size_t new_size) { 117 | if (new_size < 4096) new_size = 4096; 118 | uint8_t* const new_buf = (uint8_t*)malloc(new_size * sizeof(*new_buf)); 119 | if (new_buf == NULL) { 120 | bw->error_ = 1; 121 | return 0; 122 | } 123 | const size_t cur_size = bw->cur_ - bw->buf_; 124 | if (cur_size > 0) memcpy(new_buf, bw->buf_, cur_size * sizeof(*new_buf)); 125 | free(bw->buf_); 126 | bw->buf_ = new_buf; 127 | bw->cur_ = new_buf + cur_size; 128 | bw->end_ = bw->buf_ + new_size; 129 | return 1; 130 | } 131 | 132 | static int GrowSize(FSCBitWriter* const bw) { 133 | size_t new_size = (3 * (bw->end_ - bw->buf_)) >> 1; 134 | return SetSize(bw, new_size + 16384u); 135 | } 136 | 137 | static void CheckRoom(FSCBitWriter* const bw, int nb) { 138 | uint8_t* const cur = bw->cur_; 139 | uint8_t* const end = bw->end_; 140 | if (&cur[(nb + 7) >> 3] > end) GrowSize(bw); 141 | } 142 | 143 | int FSCBitWriterInit(FSCBitWriter* const bw, size_t expected_size) { 144 | memset(bw, 0, sizeof(*bw)); 145 | return SetSize(bw, expected_size / sizeof(*bw->buf_)); 146 | } 147 | 148 | void FSCBitWriterFlush(FSCBitWriter* const bw) { 149 | CheckRoom(bw, bw->used_); 150 | while (bw->used_ > 0) { 151 | *bw->cur_++ = bw->bits_; 152 | bw->bits_ >>= 8; 153 | bw->used_ -= 8; 154 | } 155 | bw->used_ = 0; 156 | bw->bits_ = 0; 157 | } 158 | 159 | void FSCBitWriterDestroy(FSCBitWriter* const bw) { 160 | if (bw != NULL) { 161 | free(bw->buf_); 162 | memset(bw, 0, sizeof(*bw)); 163 | } 164 | } 165 | 166 | #if (defined(__x86_64__) || defined(_M_X64)) // 64 bits 167 | typedef uint32_t fsc_wval_t; 168 | #define WBYTES 4 169 | #define WSWAP htole32 170 | #else // 32 bits 171 | typedef uint16_t fsc_wval_t; 172 | #define WBYTES 2 173 | #define WSWAP htole16 174 | #endif 175 | #define WBITS (WBYTES * 8) 176 | 177 | void FSCWriteBits(FSCBitWriter* const bw, uint32_t bits, int nb) { 178 | assert(nb <= MAX_BITS); 179 | assert(bits < (1 << nb)); 180 | if (nb > 0) { 181 | bw->bits_ |= ((fsc_val_t)bits) << bw->used_; 182 | bw->used_ += nb; 183 | if (bw->used_ >= WBITS) { 184 | CheckRoom(bw, WBITS); 185 | *(fsc_wval_t*)bw->cur_ = WSWAP(bw->bits_); 186 | bw->cur_ += WBYTES; 187 | bw->bits_ >>= WBITS; 188 | bw->used_ -= WBITS; 189 | } 190 | } 191 | } 192 | 193 | int FSCAppend(FSCBitWriter* const bw, const uint8_t* const buf, size_t len) { 194 | FSCBitWriterFlush(bw); 195 | uint8_t* const new_end = bw->cur_ + len; 196 | if (bw->cur_ + len > bw->end_) { 197 | const size_t min_len = new_end - bw->buf_; 198 | size_t request = (bw->end_ - bw->buf_) * 2; // double buffer 199 | if (request < min_len) request = min_len; 200 | request = ((request >> 10) + 1) << 10; 201 | if (!SetSize(bw, request)) return 0; 202 | } 203 | memcpy(bw->cur_, buf, len); 204 | bw->cur_ += len; 205 | return 1; 206 | } 207 | 208 | //------------------------------------------------------------------------------ 209 | -------------------------------------------------------------------------------- /test.c: -------------------------------------------------------------------------------- 1 | //Copyright 2014 The FSC Authors. All Rights Reserved. 2 | // 3 | //Licensed under the Apache License, Version 2.0 (the "License"); 4 | //you may not use this file except in compliance with the License. 5 | //You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | //------------------------------------------------------------------------------ 15 | // 16 | // Test proggy for FSC compression 17 | // 18 | // Generate various PDF and compress / decompress them 19 | // 20 | // Author: Skal (pascal.massimino@gmail.com) 21 | 22 | #include "./fsc_utils.h" 23 | 24 | //------------------------------------------------------------------------------ 25 | 26 | static int GeneratePdf(int type, int param, uint32_t pdf[256], 27 | FSCRandom* rg, int max_symbol) { 28 | int i; 29 | uint32_t total = 0; 30 | const double fparam = param / 16.0; 31 | for (i = 0; i < max_symbol; ++i) { 32 | const double x = (1. / 256.) * i; 33 | double v = 0; 34 | if (FSCRandomBits(rg, 8) < 254) { 35 | if (type == 0) v = 65536. / (1. + 512. * fparam * x); // ~1/x 36 | else if (type == 1) v = 65536. / (1. + fparam * 1024. * x * x); // ~1/x/x 37 | else if (type == 2) v = FSCRandomBits(rg, 8); // random 38 | else if (type == 3) { // exp 39 | v = 1000 * exp(-x * 64. * fparam); 40 | } else if (type == 4) { 41 | v = 1000 * exp(-log(1. + 512. * x) * fparam); // power 42 | } else { 43 | v = 256; // uniform 44 | } 45 | uint32_t V = (uint32_t)v; 46 | V += FSCRandomBits(rg, 4); // a little extra noise 47 | pdf[i] = V; 48 | total += V; 49 | } else { // force few 0's from time to time 50 | pdf[i] = 0; 51 | } 52 | } 53 | return total; 54 | } 55 | 56 | static void PrintPdf(const uint8_t* base, int size) { 57 | uint32_t counts[MAX_SYMBOLS]; 58 | FSCCountSymbols(base, size, counts); 59 | uint32_t max = 0; 60 | int max_symbol = 0; 61 | int i; 62 | for (i = 0; i < MAX_SYMBOLS; ++i) { 63 | if (counts[i] > max) max = counts[i]; 64 | if (counts[i]) max_symbol = i + 1; 65 | } 66 | 67 | for (i = 0; i < max_symbol; ++i) { 68 | int len = 80 * counts[i] / max; 69 | printf("#%d ", i); 70 | while (len-- > 0) printf("-"); 71 | printf("|\n"); 72 | } 73 | for (i = max_symbol; i < 256; ++i) { 74 | if (counts[i]) { 75 | fprintf(stderr, "Error for symbol #%d (%d)!!\n", i, counts[i]); 76 | exit(-1); 77 | } 78 | } 79 | } 80 | 81 | static void SavePdfToFile(const uint8_t* base, int size, const char* pdf_file) { 82 | FILE* const file = fopen(pdf_file, "wb"); 83 | if (file == NULL) { 84 | fprintf(stderr, "Error opening pdf output file [%s] !\n", pdf_file); 85 | } else { 86 | if (fwrite(base, size * sizeof(base[0]), 1, file) != 1) { 87 | fprintf(stderr, "Error while writing pdf output file\n"); 88 | } 89 | fclose(file); 90 | fprintf(stderr, "Saved pdf into file [%s].\n", pdf_file); 91 | } 92 | } 93 | 94 | //------------------------------------------------------------------------------ 95 | 96 | static void Help() { 97 | printf("usage: ./test [options] [size]\n"); 98 | printf("options:\n"); 99 | printf("-t : distribution type (in [0..5])\n"); 100 | printf("-p : distribution param (>=0)\n"); 101 | printf("-s : number of symbols (in [2..256]))\n"); 102 | printf("-l : max table size bits (<= LOG_TAB_SIZE)\n"); 103 | printf("-save : save input message to file\n"); 104 | printf("-d : print distribution\n"); 105 | printf("-f : message file name\n"); 106 | FSCPrintCodingOptions(); 107 | printf("-h : this help\n"); 108 | exit(0); 109 | } 110 | 111 | int main(int argc, const char* argv[]) { 112 | int N = 100000000; 113 | int pdf_type = 2; 114 | int pdf_param = 5; 115 | int max_symbol = MAX_SYMBOLS; 116 | int print_pdf = 0; 117 | int log_tab_size = LOG_TAB_SIZE; 118 | FSCCodingMethod method = CODING_METHOD_DEFAULT; 119 | const char* in_file = NULL; 120 | const char* pdf_file = NULL; 121 | int c; 122 | 123 | for (c = 1; c < argc; ++c) { 124 | if (!strcmp(argv[c], "-t") && c + 1 < argc) { 125 | pdf_type = atoi(argv[++c]); 126 | if (pdf_type < 0) pdf_type = 0; 127 | else if (pdf_type > 5) pdf_type = 5; 128 | } else if (!strcmp(argv[c], "-p") && c + 1 < argc) { 129 | pdf_param = atoi(argv[++c]); 130 | if (pdf_type < 0) pdf_param = 0; 131 | } else if (!strcmp(argv[c], "-s") && c + 1 < argc) { 132 | max_symbol = atoi(argv[++c]); 133 | if (max_symbol < 2) max_symbol = 2; 134 | else if (max_symbol > 256) max_symbol = 256; 135 | } else if (!strcmp(argv[c], "-l") && c + 1 < argc) { 136 | log_tab_size = atoi(argv[++c]); 137 | if (log_tab_size > LOG_TAB_SIZE) log_tab_size = LOG_TAB_SIZE; 138 | } else if (!strcmp(argv[c], "-f") && c + 1 < argc) { 139 | in_file = argv[++c]; 140 | } else if (FSCParseCodingMethodOpt(argv[c], &method)) { 141 | continue; 142 | } else if (!strcmp(argv[c], "-m") && c + 1 < argc) { 143 | method = (FSCCodingMethod)atoi(argv[++c]); 144 | } else if (!strcmp(argv[c], "-save") && c + 1 < argc) { 145 | pdf_file = argv[++c]; 146 | } else if (!strcmp(argv[c], "-d")) { 147 | print_pdf = 1; 148 | } else if (!strcmp(argv[c], "-h")) { 149 | Help(); 150 | } else { 151 | N = atoi(argv[c]); 152 | if (N <= 2) N = 2; 153 | } 154 | } 155 | uint8_t* base; 156 | FILE* file = NULL; 157 | if (in_file != NULL) { 158 | file = fopen(in_file, "rb"); 159 | if (file == NULL) { 160 | fprintf(stderr, "Error opening file %s!\n", in_file); 161 | exit(-1); 162 | } 163 | fseek(file, 0L, SEEK_END); 164 | N = ftell(file); 165 | fseek(file, 0L, SEEK_SET); 166 | printf("Read File [%s] (%d bytes)\n", in_file, N); 167 | } 168 | 169 | base = (uint8_t*)malloc(N * sizeof(*base)); 170 | if (base == NULL) { 171 | fprintf(stderr, "Malloc(%d) failed!\n", N); 172 | exit(-1); 173 | } 174 | 175 | if (file != NULL) { 176 | N = fread(base, 1, N, file); 177 | fclose(file); 178 | } else { 179 | uint32_t pdf[256] = { 0 }; 180 | int i; 181 | FSCRandom r; 182 | FSCInitRandom(&r); 183 | const int total = GeneratePdf(pdf_type, pdf_param, pdf, &r, max_symbol); 184 | const int nb_bits = 1 + log2(total - 1); 185 | uint64_t cumul[256 + 1]; 186 | cumul[0] = 0; 187 | for (i = 1; i <= max_symbol; ++i) { 188 | cumul[i] = cumul[i - 1] + pdf[i - 1]; 189 | } 190 | for (i = 0; i < N; ++i) { 191 | base[i] = DrawSymbol(cumul, max_symbol, total, nb_bits, &r); 192 | } 193 | } 194 | printf("PDF generated OK (max symbol:%d).\n", max_symbol); 195 | if (print_pdf) { 196 | PrintPdf(base, N); 197 | printf("[Params: type=%d param=%d max_symbol=%d size=%d]\n", 198 | pdf_type, pdf_param, max_symbol, N); 199 | } 200 | if (pdf_file != NULL) { 201 | SavePdfToFile(base, N, pdf_file); 202 | } 203 | const double entropy = GetEntropy(base, N); 204 | 205 | int nb_errors = 0; 206 | uint8_t* out = NULL; 207 | size_t out_size = 0; 208 | // Encode 209 | uint8_t* bits = NULL; 210 | size_t bits_size = 0; 211 | MyClock start, tmp; 212 | GetElapsed(&start, NULL); 213 | int ok = FSCEncode(base, N, &bits, &bits_size, log_tab_size, method); 214 | double elapsed = GetElapsed(&tmp, &start); 215 | const double MS = 1.e-6 * N; // 8.e-6 * bits_size; 216 | const double reduction = 1. * bits_size / N; 217 | 218 | printf("Enc time: %.3f sec [%.2lf MS/s] (%ld bytes out, %d in).\n", 219 | elapsed, MS / elapsed, bits_size, N); 220 | printf("Entropy: %.4lf vs expected %.4lf " 221 | "(off by %.5lf bit/symbol [%.3lf%%])\n", 222 | reduction, entropy, reduction - entropy, 223 | 100. * (reduction - entropy) / entropy); 224 | 225 | if (!ok) { 226 | fprintf(stderr, "ERROR while encoding!\n"); 227 | nb_errors = 1; 228 | } else { // Decode 229 | GetElapsed(&start, NULL); 230 | ok = FSCDecode(bits, bits_size, &out, &out_size); 231 | elapsed = GetElapsed(&tmp, &start); 232 | printf("Dec time: %.3f sec [%.2lf MS/s].\n", elapsed, MS / elapsed); 233 | ok &= (out_size == N); 234 | 235 | if (!ok) { 236 | fprintf(stderr, "Decoding error!\n"); 237 | nb_errors = 1; 238 | } else { 239 | int i; 240 | for (i = 0; i < N; ++i) { 241 | nb_errors += (out[i] != base[i]); 242 | } 243 | printf("#%d errors\n", nb_errors); 244 | if (nb_errors) fprintf(stderr, "*** PROBLEM!! ***\n"); 245 | } 246 | } 247 | 248 | End: 249 | free(base); 250 | free(out); 251 | free(bits); 252 | return (nb_errors != 0); 253 | } 254 | -------------------------------------------------------------------------------- /histo.c: -------------------------------------------------------------------------------- 1 | //Copyright 2014 The FSC Authors. All Rights Reserved. 2 | // 3 | //Licensed under the Apache License, Version 2.0 (the "License"); 4 | //you may not use this file except in compliance with the License. 5 | //You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | //------------------------------------------------------------------------------ 15 | // 16 | // Histograms / cumulative frequencies / spread functions 17 | // 18 | // Author: Skal (pascal.massimino@gmail.com) 19 | 20 | #include "./fsc.h" 21 | 22 | #include 23 | #include 24 | #include 25 | 26 | //------------------------------------------------------------------------------ 27 | 28 | void FSCCountSymbols(const uint8_t* in, size_t in_size, 29 | uint32_t counts[MAX_SYMBOLS]) { 30 | size_t n; 31 | memset(counts, 0, MAX_SYMBOLS * sizeof(counts[0])); 32 | for (n = 0; n < in_size; ++n) ++counts[in[n]]; 33 | } 34 | 35 | //------------------------------------------------------------------------------ 36 | // Selection helper function 37 | 38 | static void SwapU32(uint32_t* const A, uint32_t* const B) { 39 | const uint32_t tmp = *A; 40 | *A = *B; 41 | *B = tmp; 42 | } 43 | static void CheckSwapU32(uint32_t* const A, uint32_t* const B) { 44 | assert(A <= B); 45 | if (A != B && *A < *B) SwapU32(A, B); 46 | } 47 | 48 | // select the Mth largest keys amongst N 49 | void Select(uint32_t* const keys, int M, int N) { 50 | if (M == N || N <= 1) return; // done 51 | int low = 0, hi = N - 1; 52 | while (1) { 53 | if (low + 1 >= hi) { // only 1 or 2 left 54 | if (low + 1 == hi) CheckSwapU32(keys + low, keys + low + 1); 55 | return; // done! 56 | } 57 | const int mid = (low + hi) >> 1; 58 | // sort low | mid | hi triplet of entries 59 | CheckSwapU32(keys + low, keys + hi); 60 | CheckSwapU32(keys + mid, keys + hi); 61 | CheckSwapU32(keys + low, keys + mid); 62 | // move mid in position low + 1 (will serve as pivot) 63 | SwapU32(keys + low + 1, keys + mid); 64 | const uint32_t pivot = keys[low + 1]; 65 | // and start loop over [low + 2, hi - 1] sub-range 66 | int i = low + 2; 67 | int j = hi - 1; 68 | while (1) { 69 | while (keys[i] > pivot) ++i; 70 | while (keys[j] < pivot) --j; 71 | if (j < i) break; // they crossed the streams! 72 | SwapU32(keys + i, keys + j); 73 | } 74 | keys[low + 1] = keys[j]; // move pivot back to position 75 | keys[j] = pivot; 76 | // recurse down (only one branch) 77 | if (j >= M) { 78 | hi = j - 1; 79 | } else { 80 | low = j + 1; 81 | } 82 | } 83 | } 84 | 85 | //------------------------------------------------------------------------------ 86 | // Analyze counts[] and renormalize with Squeaky Wheel fix, so that 87 | // the total is rescaled to be equal to tab_size exactly. 88 | int FSCNormalizeCounts(uint32_t counts[MAX_SYMBOLS], int max_symbol, 89 | int log_tab_size) { 90 | const int tab_size = 1 << log_tab_size; 91 | uint64_t total = 0; 92 | int nb_symbols = 0; 93 | int n; 94 | int last_nz = 0; 95 | 96 | for (n = 0; n < max_symbol; ++n) { 97 | total += counts[n]; 98 | if (counts[n] > 0) { 99 | ++nb_symbols; 100 | last_nz = n + 1; 101 | } 102 | } 103 | if (nb_symbols < 1) return 0; // won't work 104 | if (log_tab_size < 1) return 0; 105 | if (nb_symbols > tab_size) return 0; 106 | max_symbol = last_nz; 107 | 108 | uint32_t keys[MAX_SYMBOLS]; 109 | int miss = tab_size; 110 | const float norm = 1.f * tab_size / total; 111 | int non_zero = 0; 112 | const float key_norm = (float)((1u << 24) / MAX_SYMBOLS); 113 | for (n = 0; n < max_symbol; ++n) { 114 | if (counts[n] > 0) { 115 | const float target = norm * counts[n]; 116 | counts[n] = (uint32_t)(target + .5); // round 117 | if (counts[n] == 0) counts[n] = 1; 118 | miss -= counts[n]; 119 | const uint32_t error = (uint32_t)(key_norm * (target - counts[n])); 120 | keys[non_zero++] = (error * MAX_SYMBOLS) + n; 121 | } 122 | } 123 | if (miss == 0) return max_symbol; 124 | 125 | if (miss > 0) { 126 | Select(keys, miss, non_zero); 127 | for (n = 0; n < miss; ++n) { 128 | ++counts[keys[n] % MAX_SYMBOLS]; 129 | } 130 | } else { 131 | // Overflow case. We need to decrease some counts, but need extra care 132 | // to not make any counts[] go to zero. So we just loop and shave off 133 | // the largest elements greater than 2 until we're good. It's garanteed 134 | // to terminate. 135 | non_zero = 0; 136 | const uint32_t cap_count = (1u << 23) - 1; // to avoid overflow 137 | for (n = 0; n < max_symbol; ++n) { 138 | if (counts[n] > 1) { 139 | const uint32_t c = (counts[n] > cap_count) ? cap_count : counts[n]; 140 | keys[non_zero++] = (c * MAX_SYMBOLS) + n; 141 | } 142 | } 143 | assert(non_zero > 0); 144 | miss = -miss; 145 | Select(keys, miss, non_zero); 146 | int to_fix = miss; 147 | while (to_fix > 0) { 148 | for (n = 0; n < miss && to_fix > 0; ++n) { 149 | const uint32_t idx = keys[n] % MAX_SYMBOLS; 150 | if (counts[idx] > 1) { 151 | --counts[idx]; 152 | --to_fix; 153 | } 154 | } 155 | } 156 | } 157 | return max_symbol; 158 | } 159 | 160 | //------------------------------------------------------------------------------ 161 | // Spread functions 162 | 163 | #define MAX_INSERT_ITERATION 0 // limit bucket-sort complexity (0=off) 164 | 165 | // insert with limited bucket sort 166 | #define INSERT(s, key) do { \ 167 | const double k = (key); \ 168 | const int b = (int)(k); \ 169 | if (b < tab_size) { \ 170 | const int S = (s); \ 171 | int16_t* p = &buckets[b]; \ 172 | int M = MAX_INSERT_ITERATION; \ 173 | while (M-- && *p != -1 && keys[*p] < k) { \ 174 | p = &next[*p]; \ 175 | } \ 176 | next[S] = *p; \ 177 | *p = S; \ 178 | keys[S] = k; \ 179 | } \ 180 | } while (0) 181 | 182 | int BuildSpreadTableBucket(int max_symbol, const uint32_t counts[], 183 | int log_tab_size, uint8_t symbols[]) { 184 | const int tab_size = 1 << log_tab_size; 185 | int s, n, pos; 186 | int16_t* buckets = NULL; // entry to linked list of bucket's symbol 187 | int16_t next[MAX_SYMBOLS]; // linked list of symbols in the same bucket 188 | double keys[MAX_SYMBOLS]; // key associated to each symbol 189 | buckets = (int16_t*)malloc(tab_size * sizeof(*buckets)); 190 | if (buckets == NULL) return 0; 191 | 192 | for (n = 0; n < tab_size; ++n) { 193 | buckets[n] = -1; // NIL 194 | } 195 | for (s = 0; s < max_symbol; ++s) { 196 | if (counts[s] > 0) { 197 | INSERT(s, 0.5 * tab_size / counts[s]); 198 | } 199 | } 200 | for (n = 0, pos = 0; n < tab_size && pos < tab_size; ++pos) { 201 | while (1) { 202 | const int s = buckets[pos]; 203 | if (s < 0) break; 204 | symbols[n++] = s; 205 | buckets[pos] = next[s]; // POP s 206 | INSERT(s, keys[s] + 1. * tab_size / counts[s]); 207 | } 208 | } 209 | // n < tab_size can happen due to rounding errors 210 | for (; n != tab_size; ++n) symbols[n] = symbols[n - 1]; 211 | free(buckets); 212 | return 1; 213 | } 214 | 215 | //------------------------------------------------------------------------------ 216 | 217 | static inline int ReverseBits(int i, int max_bits) { 218 | const int tab_size = 1 << max_bits; 219 | int v = 0, n = max_bits; 220 | while (n-- > 0) { 221 | v |= (i & 1) << n; 222 | i >>= 1; 223 | } 224 | return v; 225 | } 226 | 227 | int BuildSpreadTableReverse(int max_symbol, const uint32_t counts[], 228 | int log_tab_size, uint8_t symbols[]) { 229 | const int tab_size = 1 << log_tab_size; 230 | int s, n, pos; 231 | for (s = 0, pos = 0; s < max_symbol; ++s) { 232 | for (n = 0; n < counts[s]; ++n, ++pos) { 233 | symbols[ReverseBits(pos, log_tab_size)] = s; 234 | } 235 | } 236 | return 1; 237 | } 238 | 239 | //------------------------------------------------------------------------------ 240 | 241 | int BuildSpreadTableModulo(int max_symbol, const uint32_t counts[], 242 | int log_tab_size, uint8_t symbols[]) { 243 | const int tab_size = 1 << log_tab_size; 244 | const int kStep = ((tab_size >> 1) + (tab_size >> 3) + 1); 245 | int s, n, pos; 246 | for (s = 0, pos = 0; s < max_symbol; ++s) { 247 | for (n = 0; n < counts[s]; ++n, ++pos) { 248 | const int v = pos * kStep; 249 | const int slot = (v ^ CRYPTO_KEY) & (tab_size - 1); 250 | symbols[slot] = s; 251 | } 252 | } 253 | return 1; 254 | } 255 | 256 | //------------------------------------------------------------------------------ 257 | 258 | int BuildSpreadTablePack(int max_symbol, const uint32_t counts[], 259 | int log_tab_size, uint8_t symbols[]) { 260 | const int tab_size = 1 << log_tab_size; 261 | const int kStep = ((tab_size >> 1) + (tab_size >> 3) + 1); 262 | int s, n, pos; 263 | for (s = 0, pos = 0; s < max_symbol; ++s) { 264 | for (n = 0; n < counts[s]; ++n, ++pos) { 265 | symbols[pos] = s; 266 | } 267 | } 268 | return 1; 269 | } 270 | 271 | //------------------------------------------------------------------------------ 272 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2014 Google Inc. 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /bit_cmp.c: -------------------------------------------------------------------------------- 1 | //Copyright 2014 The FSC Authors. All Rights Reserved. 2 | // 3 | //Licensed under the Apache License, Version 2.0 (the "License"); 4 | //you may not use this file except in compliance with the License. 5 | //You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | //------------------------------------------------------------------------------ 15 | // 16 | // Test proggy for comparing Arithmetic / binary ANS compression 17 | // 18 | 19 | #include "./fsc_utils.h" 20 | 21 | #define PROBA_BITS 16 22 | #define BITS 16 23 | typedef uint16_t ANSProba; 24 | typedef uint16_t ANSBaseW; // I/O words 25 | typedef uint32_t ANSStateW; // internal state 26 | 27 | #define PROBA_MAX (1ull << PROBA_BITS) 28 | #define PROBA_MASK (PROBA_MAX - 1) 29 | #define BITS_LIMIT ((ANSStateW)1 << BITS) 30 | #define BITS_MASK (BITS_LIMIT - 1) 31 | 32 | #define RECIPROCAL_BITS (16) 33 | #include "./divide.h" 34 | 35 | // #define USE_TABLE 36 | 37 | #ifdef USE_TABLE 38 | static ANSProba kSpreadTab[PROBA_MASK + 1]; 39 | void InitSpreadTable(ANSProba p0, ANSProba tab[], int inverse) { 40 | int r; 41 | FSCRandom rg; 42 | FSCInitRandom(&rg); 43 | if (inverse) { 44 | ANSProba tmp[PROBA_MASK + 1]; 45 | InitSpreadTable(p0, tmp, 0); 46 | for (r = 0; r <= PROBA_MASK; ++r) tab[tmp[r]] = r; 47 | return; 48 | } 49 | for (r = 0; r <= PROBA_MASK; ++r) { 50 | //tab[r] = (r < p0) ? (PROBA_MASK + 1 - p0 + r) : r - p0; 51 | const int K = (PROBA_MAX>>2) + (PROBA_MAX>>3) + 1; 52 | tab[(r * K) & PROBA_MASK] = r; 53 | } 54 | } 55 | #endif 56 | 57 | static size_t bANSEncode(const uint8_t* in, size_t in_size, 58 | uint8_t* const buf_start, uint8_t* const buf_end, 59 | ANSProba p0) { 60 | ANSStateW x; 61 | const ANSProba q0 = PROBA_MAX - p0; 62 | const ANSStateW threshold0 = BITS_LIMIT * p0; 63 | const ANSStateW threshold1 = BITS_LIMIT * q0; 64 | #if (RECIPROCAL_BITS >= 0) 65 | inv_t inv_p0, inv_q0; 66 | FSCInitDivide(p0, &inv_p0); 67 | FSCInitDivide(q0, &inv_q0); 68 | #endif 69 | #ifdef USE_TABLE 70 | InitSpreadTable(p0, kSpreadTab, 0); 71 | #endif 72 | ANSBaseW* buf = (ANSBaseW*)buf_end; 73 | assert(sizeof(ANSBaseW) * 8 == BITS); 74 | 75 | int i = in_size - 1; 76 | if (in_size <= BITS/8) { // special corner case for too-small input 77 | if (buf_end - in_size < buf_start) return 0; // error 78 | memcpy(buf_end - in_size, in, in_size); 79 | return in_size; 80 | } 81 | // We encode the first few bytes into initial state. 82 | x = 1ull; 83 | while (x < BITS_LIMIT) { 84 | x = (x << 8) | in[i]; 85 | --i; 86 | } 87 | // encode the rest... (in reverse) 88 | for (; i >= 0; --i) { 89 | if (x >= (in[i] ? threshold1 : threshold0)) { 90 | if (buf <= (ANSBaseW*)buf_start) return 0; // error 91 | *--buf = x & BITS_MASK; 92 | x >>= BITS; 93 | } 94 | #if (RECIPROCAL_BITS >= 0) 95 | // x is decomposed as: x = k.p0 + r 96 | // x' = k.L + r , where k = x/p0 = (x * inv_p0) >> FIX 97 | // x' = k.L + x - k.p0 = k.(L-p0) + x = k.q0 + x 98 | // and similarly: x' = x + (x / q0) * p0 + p0 for the other symbol 99 | if (in[i]) { 100 | const ANSStateW q = FSCDivide(x, inv_q0); 101 | x += q * p0 + p0; 102 | } else { 103 | const ANSStateW q = FSCDivide(x, inv_p0); 104 | x += q * q0 + 0; 105 | } 106 | #else 107 | #ifndef USE_TABLE 108 | if (in[i]) { 109 | // here, gcc-x86 is faster because x/q0 and x%q0 are a single instruction 110 | x = ((x / q0) << PROBA_BITS) + (x % q0) + p0; 111 | } else { 112 | x = ((x / p0) << PROBA_BITS) + (x % p0); 113 | } 114 | #else 115 | if (in[i]) { 116 | x = ((x / q0) << PROBA_BITS) + kSpreadTab[(x % q0) + p0]; 117 | } else { 118 | x = ((x / p0) << PROBA_BITS) + kSpreadTab[(x % p0) + 0]; 119 | } 120 | #endif 121 | #endif 122 | } 123 | if (buf - 2 < (ANSBaseW*)buf_start) { 124 | printf("BUFFER ERROR!\n"); 125 | return 0; 126 | } 127 | *--buf = (x >> 0) & BITS_MASK; 128 | *--buf = (x >> BITS) & BITS_MASK; 129 | 130 | return buf_end - (uint8_t*)buf; 131 | } 132 | 133 | static int bANSDecode(const ANSBaseW* ptr, 134 | uint8_t* out, size_t in_size, ANSProba p0) { 135 | ANSStateW x = ((ANSStateW)ptr[0] << BITS) | (ANSStateW)ptr[1]; 136 | ptr += 2; 137 | const ANSProba q0 = PROBA_MAX - p0; 138 | int i; 139 | #ifdef USE_TABLE 140 | InitSpreadTable(p0, kSpreadTab, 1); 141 | #endif 142 | if (in_size <= 4) { 143 | memcpy(out, ptr, in_size); 144 | return 1; 145 | } 146 | in_size -= BITS / 8; // few last bytes are encoded in the final state 147 | for (i = 0; i < in_size; ++i) { 148 | if (x < PROBA_MAX) { 149 | x = (x << BITS) | *ptr++; // decode forward 150 | } 151 | #ifndef USE_TABLE 152 | const ANSProba xfrac = x & PROBA_MASK; 153 | #else 154 | const ANSProba xfrac = kSpreadTab[x & PROBA_MASK]; 155 | #endif 156 | out[i] = (xfrac >= p0); 157 | if (xfrac < p0) { 158 | x = p0 * (x >> PROBA_BITS) + xfrac; 159 | } else { 160 | x = q0 * (x >> PROBA_BITS) + xfrac - p0; 161 | } 162 | } 163 | while (x != 1ull) { 164 | out[i++] = x & 0xff; 165 | x >>= 8; 166 | } 167 | return 1; 168 | } 169 | 170 | //------------------------------------------------------------------------------ 171 | 172 | static size_t bArithEncode(const uint8_t* in, size_t in_size, 173 | uint8_t* const buf_start, uint8_t* const buf_end, 174 | ANSProba p0) { 175 | ANSStateW low = 0; 176 | ANSStateW hi = ~0; 177 | ANSBaseW* buf = (ANSBaseW*)buf_start; 178 | int i; 179 | for (i = 0; i < in_size; ++i) { 180 | const ANSStateW diff = hi - low; 181 | #if (2 * PROBA_BITS + BITS > 64) 182 | ANSStateW split = low + (diff >> PROBA_BITS) * p0; 183 | split += ((diff & PROBA_MASK) * p0) >> PROBA_BITS; 184 | #else 185 | const ANSStateW split = low + ((uint64_t)diff * p0 >> PROBA_BITS); 186 | #endif 187 | if (!in[i]) { 188 | hi = split; 189 | } else { 190 | low = split + 1; 191 | } 192 | if ((low ^ hi) < BITS_LIMIT) { 193 | if (buf >= (ANSBaseW*)buf_end) return 0; // error 194 | *buf++ = hi >> BITS; 195 | low <<= BITS; 196 | hi <<= BITS; 197 | hi |= BITS_MASK; 198 | } 199 | } 200 | if (buf + 1 > (ANSBaseW*)buf_end) return 0; // error 201 | *buf++ = hi >> BITS; 202 | hi <<= BITS; 203 | *buf++ = hi >> BITS; 204 | 205 | const size_t size = (uint8_t*)buf - buf_start; 206 | return size; 207 | } 208 | 209 | static int bArithDecode(const ANSBaseW* ptr, 210 | uint8_t* out, size_t in_size, ANSProba p0) { 211 | 212 | ANSStateW low = 0; 213 | ANSStateW hi = ~0; 214 | ANSStateW x = *ptr++; 215 | x = (x << BITS) | *ptr++; 216 | 217 | int i; 218 | for (i = 0; i < in_size; ++i) { 219 | const ANSStateW diff = hi - low; 220 | #if (2 * PROBA_BITS + BITS > 64) 221 | ANSStateW split = low + (diff >> PROBA_BITS) * p0; 222 | split += ((diff & PROBA_MASK) * p0) >> PROBA_BITS; 223 | #else 224 | const ANSStateW split = low + ((uint64_t)diff * p0 >> PROBA_BITS); 225 | #endif 226 | out[i] = (x > split); 227 | if (!out[i]) { 228 | hi = split; 229 | } else { 230 | low = split + 1; 231 | } 232 | if ((low ^ hi) < BITS_LIMIT) { 233 | x = (x << BITS) | *ptr++; 234 | low <<= BITS; 235 | hi <<= BITS; 236 | hi |= BITS_MASK; 237 | } 238 | } 239 | return 1; 240 | } 241 | 242 | //------------------------------------------------------------------------------ 243 | 244 | static int CheckErrors(size_t N, const uint8_t out[], const uint8_t base[], 245 | const char* name) { 246 | int nb_errors = 0; 247 | int i; 248 | for (i = 0; i < N; ++i) { 249 | nb_errors += (out[i] != base[i]); 250 | } 251 | if (nb_errors) { 252 | printf("%s Decoding errors! (%d)\n", name, nb_errors); 253 | for (i = 0; i < (N > 40 ? 40 : N); ++i) { 254 | printf("[%d/%d]%c", out[i], base[i], " *"[out[i] != base[i]]); 255 | } 256 | printf("\n"); 257 | } 258 | return nb_errors; 259 | } 260 | 261 | static void Generate(uint8_t* in, size_t size, ANSProba p0, FSCRandom* rg) { 262 | int i; 263 | for (i = 0; i < size; ++i) { 264 | ANSProba k = 0; 265 | int b = 0; 266 | while (b < PROBA_BITS) { 267 | int B = PROBA_BITS - b; 268 | if (B > 16) B = 16; 269 | k = (k << B) | FSCRandomBits(rg, B); 270 | b += B; 271 | } 272 | in[i] = (k >= p0); 273 | } 274 | } 275 | 276 | //------------------------------------------------------------------------------ 277 | 278 | void Help() { 279 | printf("usage: ./bit_cmp [options] [size]\n"); 280 | printf("-h : this help\n"); 281 | exit(0); 282 | } 283 | 284 | int main(int argc, const char* argv[]) { 285 | int N = 100000; 286 | int L = 16; 287 | int nb_errors = 0; 288 | int pmin = 1, pmax = 255; 289 | int c; 290 | 291 | for (c = 1; c < argc; ++c) { 292 | if (!strcmp(argv[c], "-h")) { 293 | Help(); 294 | } else if (!strcmp(argv[c], "-l") && c + 1 < argc) { 295 | L = atoi(argv[++c]); 296 | } else if (!strcmp(argv[c], "-p") && c + 1 < argc) { 297 | pmin = pmax = atoi(argv[++c]); 298 | } else { 299 | N = atoi(argv[c]); 300 | if (N <= 2) N = 2; 301 | } 302 | } 303 | const double MS = 1.e-6 * N; 304 | 305 | uint8_t* const base = (uint8_t*)malloc(2 * N * sizeof(*base)); 306 | uint8_t* const out = base + N; 307 | if (base == NULL) return 0; 308 | const size_t kExtraBytes = 32; 309 | const size_t total_size = (N + kExtraBytes + 7) & ~7; 310 | uint8_t* const bits = (uint8_t*)malloc(total_size); 311 | if (bits == NULL) goto End; 312 | uint8_t* const bits_end = bits + total_size; 313 | size_t bits_size = 0; 314 | 315 | FSCRandom r; 316 | FSCInitRandom(&r); 317 | 318 | int p; 319 | for (p = pmin; (p <= pmax) && (nb_errors == 0); ++p) { 320 | const ANSProba p0 = (ANSProba)((double)p * PROBA_MAX / 256.); 321 | MyClock start, tmp; 322 | double S_ANS = 0., S_AC = 0.; 323 | double t_ANS_enc = 0., t_ANS_dec = 0.; 324 | double t_AC_enc = 0., t_AC_dec = 0.; 325 | int i; 326 | 327 | Generate(base, N, p0, &r); 328 | const double S1 = 8. * GetEntropy(base, N); 329 | 330 | { 331 | GetElapsed(&start, NULL); 332 | bits_size = bANSEncode(base, N, bits, bits_end, p0); 333 | if (bits_size == 0) { 334 | printf("ANS Encoding error!\n"); 335 | goto End; 336 | } 337 | S_ANS = 8.0 * bits_size / N; 338 | t_ANS_enc = MS / GetElapsed(&tmp, &start); 339 | 340 | GetElapsed(&start, NULL); 341 | 342 | nb_errors = !bANSDecode((ANSBaseW*)(bits_end - bits_size), out, N, p0); 343 | t_ANS_dec = MS / GetElapsed(&tmp, &start); 344 | nb_errors += CheckErrors(N, out, base, "ANS"); 345 | } 346 | 347 | { 348 | GetElapsed(&start, NULL); 349 | bits_size = bArithEncode(base, N, bits, bits_end, p0); 350 | if (bits_size == 0) { 351 | printf("Arith Encoding error!\n"); 352 | goto End; 353 | } 354 | S_AC = 8.0 * bits_size / N; 355 | t_AC_enc = MS / GetElapsed(&tmp, &start); 356 | 357 | GetElapsed(&start, NULL); 358 | 359 | nb_errors = !bArithDecode((ANSBaseW*)bits, out, N, p0); 360 | t_AC_dec = MS / GetElapsed(&tmp, &start); 361 | nb_errors += CheckErrors(N, out, base, "AC"); 362 | } 363 | 364 | printf("%.7lf %.7lf %.7lf %.7lf " 365 | " %3.1lf %3.1lf " 366 | " %3.1lf %3.1lf\n", 367 | 1. * p0 / PROBA_MAX, S_ANS, S_AC, S1, 368 | t_ANS_enc, t_ANS_dec, 369 | t_AC_enc, t_AC_dec); 370 | } 371 | printf("# 1 Proba|2 S_ANS |3 S_AC |4 entropy" 372 | "|5 ANS enc|6 ANS dec" 373 | "|7 AC enc |8 AC dec\n"); 374 | 375 | End: 376 | free(base); 377 | free(bits); 378 | return (nb_errors != 0); 379 | } 380 | -------------------------------------------------------------------------------- /fsc_dec.c: -------------------------------------------------------------------------------- 1 | //Copyright 2014 The FSC Authors. All Rights Reserved. 2 | // 3 | //Licensed under the Apache License, Version 2.0 (the "License"); 4 | //you may not use this file except in compliance with the License. 5 | //You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | //------------------------------------------------------------------------------ 15 | // 16 | // Finite State Coder (FSC) decoder implementation 17 | // 18 | // based on Jarek Duda's paper: http://arxiv.org/pdf/1311.2540v1.pdf 19 | // 20 | // Author: Skal (pascal.massimino@gmail.com) 21 | 22 | #include "./fsc.h" 23 | #include 24 | #include 25 | 26 | #include "./bits.h" 27 | #include "./alias.h" 28 | 29 | //------------------------------------------------------------------------------ 30 | // Decoding 31 | 32 | typedef enum { 33 | FSC_OK = 0, 34 | FSC_ERROR = 1, 35 | FSC_EOF = 2 36 | } FSC_STATUS; 37 | 38 | //------------------------------------------------------------------------------ 39 | // Generic methods for decoding process 40 | 41 | typedef int (*FSCReadParamsFunc)(FSCDecoder* dec, FSCBitReader* br, 42 | uint32_t counts[MAX_SYMBOLS]); 43 | typedef int (*FSCBuildTables)(FSCDecoder* dec, const uint32_t counts[]); 44 | typedef int (*FSCGetBlockFunc)(FSCDecoder* dec, uint8_t* out, int size, 45 | FSCBitReader* br); 46 | 47 | typedef struct { 48 | FSCReadParamsFunc read_params; 49 | FSCGetBlockFunc get_block; 50 | FSCBuildTables build_tables; 51 | FSCBuildSpreadTableFunc spread; 52 | } DecMethods; 53 | static const DecMethods kDecMethods[CODING_METHOD_LAST]; 54 | 55 | //------------------------------------------------------------------------------ 56 | 57 | typedef struct { 58 | int16_t next_; // relative delta jump from this state to the next 59 | uint8_t symbol_; // symbol associated to the state 60 | int8_t len_; // number of bits to read for transitioning this state 61 | } FSCState; 62 | 63 | typedef struct { 64 | uint32_t start_; 65 | uint32_t freq_; 66 | } Symbol; 67 | 68 | struct FSCDecoder { 69 | FSCCodingMethod method_; 70 | DecMethods methods_; 71 | 72 | FSCBitReader br_; 73 | FSC_STATUS status_; 74 | int log_tab_size_; 75 | int max_symbol_; 76 | int unique_symbol_; 77 | uint32_t out_size_; 78 | 79 | FSCState tab_[TAB_SIZE]; // ~16k for LOG_TAB_SIZE=12 80 | 81 | Symbol symbols_[MAX_SYMBOLS]; 82 | uint8_t map_[MAX_TAB_SIZE]; 83 | AliasTable alias_; 84 | }; 85 | 86 | //------------------------------------------------------------------------------ 87 | // State table building 88 | 89 | static int SymbolsInit(FSCDecoder* dec, 90 | const uint32_t counts[], int max_symbol) { 91 | uint32_t start = 0; 92 | int s; 93 | if (max_symbol > MAX_SYMBOLS || max_symbol <= 0) return 0; 94 | for (s = 0; s < max_symbol; ++s) { 95 | const uint32_t freq = counts[s]; 96 | dec->symbols_[s].start_ = start & 0xffff; 97 | dec->symbols_[s].freq_ = freq; 98 | start += freq; 99 | } 100 | return (start == (1 << dec->log_tab_size_)); 101 | } 102 | 103 | //------------------------------------------------------------------------------ 104 | 105 | static int BuildSymbolMap(FSCDecoder* dec, 106 | const uint32_t counts[], int max_symbol) { 107 | if (!SymbolsInit(dec, counts, max_symbol)) return 0; 108 | uint32_t start = 0; 109 | int s; 110 | for (s = 0; s < max_symbol; ++s) { 111 | const uint32_t freq = counts[s]; 112 | int i; 113 | for (i = 0; i < freq; ++i) dec->map_[start++] = s; 114 | } 115 | return 1; 116 | } 117 | 118 | static int BuildStateTableW(FSCDecoder* dec, const uint32_t counts[]) { 119 | return BuildSymbolMap(dec, counts, dec->max_symbol_); 120 | } 121 | 122 | static uint8_t NextSymbol(const FSCDecoder* const dec, FSCStateW* const state) { 123 | uint32_t rank; 124 | const uint32_t r = (*state) & (MAX_TAB_SIZE - 1); 125 | const uint8_t s = dec->map_[r]; 126 | rank = r - dec->symbols_[s].start_; 127 | const int freq = dec->symbols_[s].freq_; 128 | *state = freq * ((*state) >> MAX_LOG_TAB_SIZE) + rank; 129 | return s; 130 | } 131 | 132 | //------------------------------------------------------------------------------ 133 | 134 | static int BuildStateTableAliasW(FSCDecoder* dec, const uint32_t counts[]) { 135 | return SymbolsInit(dec, counts, dec->max_symbol_) && 136 | AliasInit(dec->alias_, counts, dec->max_symbol_); 137 | } 138 | 139 | static uint8_t NextSymbolAlias(const FSCDecoder* const dec, FSCStateW* const state) { 140 | uint32_t rank; 141 | const uint32_t r = (*state) & (MAX_TAB_SIZE - 1); 142 | const uint8_t s = AliasSearchSymbol(dec->alias_, r, &rank); 143 | const int freq = dec->symbols_[s].freq_; 144 | *state = freq * ((*state) >> MAX_LOG_TAB_SIZE) + rank; 145 | return s; 146 | } 147 | 148 | //------------------------------------------------------------------------------ 149 | 150 | static int Log2(uint32_t v) { 151 | int s = 31; 152 | while (v < (1 << s)) --s; 153 | return s; 154 | } 155 | 156 | static int BuildStateTable(FSCDecoder* dec, const uint32_t counts[]) { 157 | int s, i, pos; 158 | uint16_t state[MAX_SYMBOLS]; // next state of symbol 's' 159 | FSCState* const tab = dec->tab_; 160 | const int log_tab_size = dec->log_tab_size_; 161 | const int tab_size = 1 << log_tab_size; 162 | const int max_symbol = dec->max_symbol_; 163 | 164 | assert(max_symbol <= MAX_SYMBOLS && max_symbol > 0); 165 | uint8_t* const symbols = (uint8_t*)malloc(tab_size * sizeof(*symbols)); 166 | if (symbols == NULL) return 0; 167 | if (!dec->methods_.spread(max_symbol, counts, log_tab_size, symbols)) { 168 | free(symbols); 169 | return 0; 170 | } 171 | 172 | uint8_t nb_bits[MAX_SYMBOLS]; 173 | uint16_t wrap[MAX_SYMBOLS]; 174 | for (s = 0; s < max_symbol; ++s) { 175 | state[s] = counts[s]; 176 | const int len = (state[s] > 0) ? Log2(state[s]) : 0; 177 | nb_bits[s] = log_tab_size - len; 178 | wrap[s] = (2 << len); 179 | } 180 | 181 | for (pos = 0; pos < tab_size; ++pos) { 182 | s = symbols[pos]; 183 | tab[pos].symbol_ = s; 184 | const int next_state = state[s]++; 185 | const int len = nb_bits[s] - (next_state >= wrap[s]); 186 | const int new_pos = (next_state << len) - tab_size; 187 | tab[pos].next_ = new_pos - pos; // how to jump from Is to I 188 | tab[pos].len_ = len; 189 | } 190 | free(symbols); 191 | if (pos != tab_size) return 0; // input not normalized! 192 | 193 | return 1; 194 | } 195 | 196 | //------------------------------------------------------------------------------ 197 | // Decoding loop 198 | 199 | static int GetBlock(FSCDecoder* dec, uint8_t* out, int size, FSCBitReader* br) { 200 | const FSCState* state = dec->tab_; // state_idx=0 at start 201 | int next_nb_bits = dec->log_tab_size_; 202 | int n; 203 | for (n = 0; n < size; ++n) { 204 | FSCFillBitWindow(br); 205 | state += FSCSeeBits(br) & ((1 << next_nb_bits) - 1); 206 | FSCDiscardBits(br, next_nb_bits); 207 | *out++ = state->symbol_; 208 | next_nb_bits = state->len_; 209 | state += state->next_; 210 | } 211 | return !br->eof_; 212 | } 213 | 214 | //------------------------------------------------------------------------------ 215 | 216 | #define RENORMALIZE_STATE(state) do { \ 217 | if ((state) < FSC_MAX) { \ 218 | if (buf < buf_end) { \ 219 | (state) = ((state) << FSC_BITS) | (*buf++); \ 220 | } else { \ 221 | lbr.eof_ |= 1; \ 222 | } \ 223 | } \ 224 | } while (0) 225 | 226 | static int GetBlockW1(FSCDecoder* dec, uint8_t* out, int size, 227 | FSCBitReader* br) { 228 | FSCBitReader lbr = *br; // it's faster to make a local copy 229 | const FSCType* buf = (const FSCType*)FSCBitAlign(&lbr); 230 | const FSCType* const buf_end = (const FSCType*)FSCGetByteEnd(&lbr); 231 | const Symbol* const syms = dec->symbols_; 232 | 233 | lbr.eof_ = (buf == buf_end); 234 | if (lbr.eof_) goto End; 235 | const FSCType* buf0 = buf; 236 | FSCStateW state = *buf++; 237 | 238 | int n; 239 | for (n = 0; n < size - FSC_BITS / 8; ++n) { 240 | RENORMALIZE_STATE(state); 241 | if (lbr.eof_) break; 242 | out[n] = NextSymbol(dec, &state); 243 | } 244 | RENORMALIZE_STATE(state); 245 | FSCSetReadBufferPos(&lbr, (const uint8_t*)buf); 246 | // The trailing bytes are encoded in the final state's lower bytes. 247 | while (state != 1 && n < size) { 248 | out[n++] = state & 0xff; 249 | state >>= 8; 250 | } 251 | End: 252 | *br = lbr; 253 | return !br->eof_; 254 | } 255 | 256 | static int GetBlockW2(FSCDecoder* dec, uint8_t* out, int size, 257 | FSCBitReader* br) { 258 | FSCBitReader lbr = *br; // it's faster to make a local copy 259 | const FSCType* buf = (const FSCType*)FSCBitAlign(&lbr); 260 | const FSCType* const buf_end = (const FSCType*)FSCGetByteEnd(&lbr); 261 | const Symbol* const syms = dec->symbols_; 262 | lbr.eof_ = (buf == buf_end); 263 | if (lbr.eof_) goto End; 264 | FSCStateW state1 = *buf++; 265 | FSCStateW state0 = (size > 1) ? (*buf++) : 0; 266 | 267 | int n; 268 | const int size_limit = (size - 2 * (FSC_BITS / 8)) & ~1; 269 | for (n = 0; n < size_limit; n += 2) { 270 | RENORMALIZE_STATE(state1); 271 | RENORMALIZE_STATE(state0); 272 | if (lbr.eof_) break; 273 | out[n + 0] = NextSymbol(dec, &state1); 274 | out[n + 1] = NextSymbol(dec, &state0); 275 | } 276 | RENORMALIZE_STATE(state1); 277 | RENORMALIZE_STATE(state0); 278 | if (size & 1) { 279 | RENORMALIZE_STATE(state1); 280 | if (!lbr.eof_) out[n++] = NextSymbol(dec, &state1); 281 | } 282 | 283 | FSCSetReadBufferPos(&lbr, (const uint8_t*)buf); 284 | // The trailing bytes are encoded in the final state's lower bytes. 285 | while (state1 != 1 && n < size) { 286 | out[n++] = state1 & 0xff; 287 | state1 >>= 8; 288 | } 289 | while (state0 != 1 && n < size) { 290 | out[n++] = state0 & 0xff; 291 | state0 >>= 8; 292 | } 293 | 294 | End: 295 | *br = lbr; 296 | return !br->eof_; 297 | } 298 | 299 | static int GetBlockW4(FSCDecoder* dec, uint8_t* out, int size, 300 | FSCBitReader* br) { 301 | FSCBitReader lbr = *br; // it's faster to make a local copy 302 | const FSCType* buf = (const FSCType*)FSCBitAlign(&lbr); 303 | const FSCType* const buf_end = (const FSCType*)FSCGetByteEnd(&lbr); 304 | const Symbol* const syms = dec->symbols_; 305 | FSCStateW states[4]; 306 | lbr.eof_ = (buf == buf_end); 307 | if (lbr.eof_) goto End; 308 | int r; 309 | for (r = 0; r < 4; ++r) { 310 | states[r] = (size > 0) ? *buf++ : 0; 311 | } 312 | 313 | int n; 314 | for (n = 0; n < (size & ~3); n += 4) { 315 | RENORMALIZE_STATE(states[0]); 316 | RENORMALIZE_STATE(states[1]); 317 | RENORMALIZE_STATE(states[2]); 318 | RENORMALIZE_STATE(states[3]); 319 | if (lbr.eof_) break; 320 | out[n + 0] = NextSymbol(dec, &states[0]); 321 | out[n + 1] = NextSymbol(dec, &states[1]); 322 | out[n + 2] = NextSymbol(dec, &states[2]); 323 | out[n + 3] = NextSymbol(dec, &states[3]); 324 | } 325 | RENORMALIZE_STATE(states[0]); 326 | RENORMALIZE_STATE(states[1]); 327 | RENORMALIZE_STATE(states[2]); 328 | RENORMALIZE_STATE(states[3]); 329 | for (; n < size; ++n) { 330 | RENORMALIZE_STATE(states[n & 3]); 331 | if (!lbr.eof_) out[n] = NextSymbol(dec, &states[n & 3]); 332 | RENORMALIZE_STATE(states[n & 3]); 333 | } 334 | FSCSetReadBufferPos(&lbr, (const uint8_t*)buf); 335 | End: 336 | *br = lbr; 337 | return !br->eof_; 338 | } 339 | 340 | static int GetBlockAliasW1(FSCDecoder* dec, uint8_t* out, int size, 341 | FSCBitReader* br) { 342 | FSCBitReader lbr = *br; // it's faster to make a local copy 343 | const FSCType* buf = (const FSCType*)FSCBitAlign(&lbr); 344 | const FSCType* const buf_end = (const FSCType*)FSCGetByteEnd(&lbr); 345 | const Symbol* const syms = dec->symbols_; 346 | lbr.eof_ = (buf == buf_end); 347 | if (lbr.eof_) goto End; 348 | FSCStateW state = *buf++; 349 | 350 | int n; 351 | for (n = 0; n < size; ++n) { 352 | RENORMALIZE_STATE(state); 353 | if (lbr.eof_) break; 354 | out[n] = NextSymbolAlias(dec, &state); 355 | } 356 | RENORMALIZE_STATE(state); 357 | FSCSetReadBufferPos(&lbr, (const uint8_t*)buf); 358 | End: 359 | *br = lbr; 360 | return !br->eof_; 361 | } 362 | 363 | static int GetBlockAliasW2(FSCDecoder* dec, uint8_t* out, int size, 364 | FSCBitReader* br) { 365 | FSCBitReader lbr = *br; // it's faster to make a local copy 366 | const FSCType* buf = (const FSCType*)FSCBitAlign(&lbr); 367 | const FSCType* const buf_end = (const FSCType*)FSCGetByteEnd(&lbr); 368 | const Symbol* const syms = dec->symbols_; 369 | lbr.eof_ = (buf == buf_end); 370 | if (lbr.eof_) goto End; 371 | FSCStateW state1 = (*buf++); 372 | FSCStateW state0 = (size > 1) ? (*buf++) : 0; 373 | 374 | int n; 375 | for (n = 0; n + 1 < size; n += 2) { 376 | RENORMALIZE_STATE(state1); 377 | RENORMALIZE_STATE(state0); 378 | if (lbr.eof_) break; 379 | out[n + 0] = NextSymbolAlias(dec, &state1); 380 | out[n + 1] = NextSymbolAlias(dec, &state0); 381 | } 382 | RENORMALIZE_STATE(state0); 383 | if (size & 1) { 384 | RENORMALIZE_STATE(state1); 385 | if (!lbr.eof_) out[n++] = NextSymbolAlias(dec, &state1); 386 | RENORMALIZE_STATE(state0); 387 | } 388 | FSCSetReadBufferPos(&lbr, (const uint8_t*)buf); 389 | End: 390 | *br = lbr; 391 | return !br->eof_; 392 | } 393 | 394 | //------------------------------------------------------------------------------ 395 | // Header 396 | 397 | static int ReadSequence(uint32_t seq[], int len, int sparse, int nb_bits, 398 | FSCBitReader* br) { 399 | uint32_t total = 1 << nb_bits; 400 | uint32_t half = total >> 1; 401 | int i; 402 | if (sparse == 2) sparse = FSCReadBits(br, 1); 403 | for (i = 0; i < len - 1; ++i ) { 404 | uint16_t c; 405 | if (sparse && !FSCReadBits(br, 1)) { 406 | seq[i] = 0; 407 | continue; 408 | } 409 | c = FSCReadBits(br, nb_bits); 410 | seq[i] = c; 411 | if (total < c) return 0; // normalization problem 412 | total -= c; 413 | if (total < half) { 414 | --nb_bits; 415 | half >>= 1; 416 | } 417 | } 418 | seq[len - 1] = total; // remaining part 419 | return 1; 420 | } 421 | 422 | static int ReadHeader(FSCDecoder* dec, FSCBitReader* br, uint32_t counts[TAB_SIZE]) { 423 | const int log_tab_size = dec->log_tab_size_; 424 | const uint32_t tab_size = 1 << log_tab_size; 425 | const int max_symbol = 1 + FSCReadBits(br, 8); 426 | dec->max_symbol_ = max_symbol; 427 | dec->unique_symbol_ = -1; 428 | 429 | if (max_symbol < HDR_SYMBOL_LIMIT) { // Use method #1 for small alphabet 430 | if (!ReadSequence(counts, max_symbol, 2, log_tab_size, br)) { 431 | return 0; 432 | } 433 | } else { // Use more complex method #2 for large alphabet 434 | const int hlen = 1 + FSCReadBits(br, 5); 435 | uint32_t bHisto[LOG_TAB_SIZE + 1]; 436 | uint8_t bins[MAX_SYMBOLS] = { 0 }; 437 | if (hlen == 32) { // sparse case 438 | int i; 439 | for (i = 0; i < max_symbol - 1; ++i) counts[i] = 0; 440 | counts[max_symbol - 1] = tab_size; 441 | } else { 442 | if (!ReadSequence(bHisto, hlen, 2, TAB_HDR_BITS, br)) { 443 | return 0; 444 | } 445 | { 446 | FSCDecoder dec2; 447 | memset(&dec2, 0, sizeof(dec2)); 448 | dec2.log_tab_size_ = TAB_HDR_BITS; 449 | dec2.max_symbol_ = hlen; 450 | dec2.method_ = CODING_METHOD_BUCKET; 451 | dec2.methods_ = kDecMethods[dec2.method_]; 452 | if (hlen > log_tab_size) return 0; 453 | if (!dec2.methods_.build_tables(&dec2, bHisto)) { 454 | fprintf(stderr, "Sub-Decoder initialization failed!\n"); 455 | return 0; 456 | } 457 | dec2.methods_.get_block(&dec2, bins, max_symbol - 1, br); 458 | } 459 | { 460 | int i; 461 | uint32_t total = tab_size; 462 | for (i = 0; i < max_symbol - 1; ++i) { 463 | const int b = bins[i]; 464 | const int residue = (b > 0) ? FSCReadBits(br, b) : 0; 465 | const int c = (1 << b) | residue; 466 | counts[i] = c - 1; 467 | if (total < counts[i]) return 0; // normalization error 468 | total -= counts[i]; 469 | } 470 | counts[max_symbol - 1] = total; // remaining part 471 | } 472 | } 473 | } 474 | return !br->eof_; 475 | } 476 | 477 | static int ReadParams(FSCDecoder* dec, FSCBitReader* br, 478 | uint32_t counts[MAX_SYMBOLS]) { 479 | dec->log_tab_size_ = LOG_TAB_SIZE - FSCReadBits(br, 4); 480 | return ReadHeader(dec, br, counts); 481 | } 482 | 483 | static int ReadParamsW(FSCDecoder* dec, FSCBitReader* br, 484 | uint32_t counts[MAX_SYMBOLS]) { 485 | dec->log_tab_size_ = MAX_LOG_TAB_SIZE; 486 | return ReadHeader(dec, br, counts); 487 | } 488 | 489 | //------------------------------------------------------------------------------ 490 | // corner case of only-one-symbol 491 | 492 | static int GetBlockUnique(FSCDecoder* dec, uint8_t* out, int size, 493 | FSCBitReader* br) { 494 | memset(out, dec->unique_symbol_, size); 495 | return 1; 496 | } 497 | 498 | static int ReadParamsUnique(FSCDecoder* dec, FSCBitReader* br, 499 | uint32_t counts[MAX_SYMBOLS]) { 500 | memset(counts, 0, MAX_SYMBOLS * sizeof(counts[0])); 501 | dec->unique_symbol_ = FSCReadBits(br, 8); 502 | dec->max_symbol_ = dec->unique_symbol_ + 1; 503 | dec->log_tab_size_ = MAX_LOG_TAB_SIZE; 504 | return !br->eof_; 505 | } 506 | 507 | static int BuildTableUnique(FSCDecoder* dec, const uint32_t counts[]) { 508 | (void)dec; 509 | (void)counts; 510 | return 1; 511 | } 512 | 513 | //------------------------------------------------------------------------------ 514 | 515 | static const DecMethods kDecMethods[CODING_METHOD_LAST] = { 516 | { ReadParams, GetBlock, BuildStateTable, BuildSpreadTableBucket }, 517 | { ReadParams, GetBlock, BuildStateTable, BuildSpreadTableReverse }, 518 | { ReadParams, GetBlock, BuildStateTable, BuildSpreadTableModulo }, 519 | { ReadParams, GetBlock, BuildStateTable, BuildSpreadTablePack }, 520 | 521 | { ReadParamsW, GetBlockW1, BuildStateTableW, NULL }, 522 | { ReadParamsW, GetBlockW2, BuildStateTableW, NULL }, 523 | { ReadParamsW, GetBlockAliasW1, BuildStateTableAliasW, NULL }, 524 | { ReadParamsW, GetBlockAliasW2, BuildStateTableAliasW, NULL }, 525 | 526 | { ReadParamsW, GetBlockW4, BuildStateTableW, NULL }, 527 | 528 | { ReadParamsUnique, GetBlockUnique, BuildTableUnique, NULL }, 529 | }; 530 | 531 | //------------------------------------------------------------------------------ 532 | 533 | FSCDecoder* FSCInit(const uint8_t* input, size_t len) { 534 | FSCDecoder* dec = (FSCDecoder*)calloc(1, sizeof(*dec)); 535 | if (dec == NULL) return NULL; 536 | 537 | FSCInitBitReader(&dec->br_, input, len); 538 | dec->unique_symbol_ = -1; 539 | dec->out_size_ = 0; 540 | int i; 541 | for (i = 0; i < 8 && FSCReadBits(&dec->br_, 1); ++i) { 542 | dec->out_size_ |= FSCReadBits(&dec->br_, 8) << (8 * i); 543 | } 544 | 545 | dec->method_ = (FSCCodingMethod)FSCReadBits(&dec->br_, 4); 546 | if (dec->method_ >= CODING_METHOD_LAST) goto Error; 547 | dec->methods_ = kDecMethods[dec->method_]; 548 | 549 | uint32_t counts[MAX_SYMBOLS]; 550 | if (!dec->methods_.read_params(dec, &dec->br_, counts) || 551 | !dec->methods_.build_tables(dec, counts)) { 552 | Error: 553 | dec->status_ = FSC_ERROR; 554 | } else { 555 | dec->status_ = FSC_OK; 556 | } 557 | return dec; 558 | } 559 | 560 | int FSCIsOk(FSCDecoder* dec) { 561 | return (dec != NULL) && (dec->status_ != FSC_ERROR); 562 | } 563 | 564 | void FSCDelete(FSCDecoder* dec) { 565 | free(dec); 566 | } 567 | 568 | int FSCDecompress(FSCDecoder* dec, uint8_t** out, size_t* out_size) { 569 | if (dec == NULL || out == NULL || out_size == NULL) return 0; 570 | size_t size = dec->out_size_; 571 | int need_allocate = (*out == NULL); 572 | if (need_allocate) { 573 | *out = (uint8_t*)malloc(size * sizeof(*out)); 574 | if (*out == NULL) return 0; 575 | *out_size = size; 576 | } else { 577 | if (*out_size < size) return 0; // not enough room 578 | } 579 | 580 | uint8_t* ptr = *out; 581 | FSCGetBlockFunc get_block = dec->methods_.get_block; 582 | while (size > 0 && dec->status_ == FSC_OK) { 583 | const int next_size = (size > BLOCK_SIZE) ? BLOCK_SIZE : (int)size; 584 | if (!get_block(dec, ptr, next_size, &dec->br_)) { 585 | dec->status_ = FSC_EOF; 586 | break; 587 | } 588 | ptr += next_size; 589 | size -= next_size; 590 | } 591 | if (dec->status_ == FSC_ERROR) { 592 | if (need_allocate) { 593 | free(*out); 594 | *out = 0; 595 | *out_size = 0; 596 | } 597 | return 0; 598 | } 599 | return 1; 600 | } 601 | 602 | //------------------------------------------------------------------------------ 603 | 604 | int FSCDecode(const uint8_t* in, size_t in_size, uint8_t** out, size_t* size) { 605 | FSCDecoder* const dec = FSCInit(in, in_size); 606 | if (dec == NULL || out == NULL || size == NULL) return 0; 607 | const int ok = FSCDecompress(dec, out, size) && FSCIsOk(dec); 608 | FSCDelete(dec); 609 | return ok; 610 | } 611 | 612 | //------------------------------------------------------------------------------ 613 | -------------------------------------------------------------------------------- /fsc_enc.c: -------------------------------------------------------------------------------- 1 | //Copyright 2014 The FSC Authors. All Rights Reserved. 2 | // 3 | //Licensed under the Apache License, Version 2.0 (the "License"); 4 | //you may not use this file except in compliance with the License. 5 | //You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | //------------------------------------------------------------------------------ 15 | // 16 | // Finite State Coder (FSC) encoder implementation 17 | // 18 | // based on Jarek Duda's paper: http://arxiv.org/pdf/1311.2540v1.pdf 19 | // 20 | // Author: Skal (pascal.massimino@gmail.com) 21 | 22 | #include "./fsc.h" 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | #include "./bits.h" 29 | #include "./alias.h" 30 | 31 | #define USE_INV_DIV // for speeding up encoder 32 | 33 | typedef struct FSCEncoder FSCEncoder; 34 | 35 | // #define SHOW_SIMULATION 36 | 37 | //------------------------------------------------------------------------------ 38 | // States and tables 39 | 40 | typedef void (*FSCPutBlockFunc)(const FSCEncoder* enc, const uint8_t* in, int size, 41 | FSCBitWriter* const bw); 42 | typedef int (*FSCBuildTablesFunc)(FSCEncoder* const enc, const uint32_t counts[]); 43 | typedef int (*FSCWriteParamsFunc)(FSCEncoder* const enc, 44 | const uint32_t counts[MAX_SYMBOLS], 45 | FSCBitWriter* const bw); 46 | 47 | typedef struct { // encoding interface 48 | FSCWriteParamsFunc write_params; 49 | FSCPutBlockFunc put_block; 50 | FSCBuildTablesFunc build_tables; 51 | FSCBuildSpreadTableFunc spread; 52 | } EncMethods; 53 | static const EncMethods kEncMethods[CODING_METHOD_LAST]; 54 | 55 | //------------------------------------------------------------------------------ 56 | 57 | typedef struct { 58 | int32_t offset_; 59 | uint16_t wrap_; 60 | uint8_t nb_bits_; 61 | } transf_t; 62 | 63 | typedef struct { 64 | uint32_t start_; 65 | uint32_t freq_; 66 | #if defined(USE_INV_DIV) 67 | uint64_t mult_; 68 | uint32_t imult_; 69 | #endif 70 | } Symbol; 71 | 72 | struct FSCEncoder { 73 | int method_; 74 | EncMethods methods_; 75 | int max_symbol_; 76 | int unique_symbol_; 77 | uint16_t states_[TAB_SIZE]; 78 | transf_t transforms_[MAX_SYMBOLS]; 79 | size_t in_size_; 80 | int log_tab_size_; 81 | 82 | Symbol symbols_[MAX_SYMBOLS]; 83 | uint16_t alias_map_[MAX_TAB_SIZE]; 84 | }; 85 | 86 | 87 | //------------------------------------------------------------------------------ 88 | 89 | static int Log2Ceil(uint32_t v) { // not a critical function 90 | if (v > 1) { 91 | int s = 31; 92 | while (v <= (1U << s)) --s; 93 | return s + 1; 94 | } else { 95 | return 0; 96 | } 97 | } 98 | 99 | // ----------------------------------------------------------------------------- 100 | 101 | static int BuildTables(FSCEncoder* const enc, const uint32_t counts[]) { 102 | int s, pos; 103 | const int log_tab_size = enc->log_tab_size_; 104 | const int tab_size = 1 << log_tab_size; 105 | uint16_t state[MAX_SYMBOLS]; 106 | uint8_t* symbols; // symbols, spread on the [0, tab_size) interval 107 | const int max_symbol = enc->max_symbol_; 108 | uint16_t* const tab = enc->states_; 109 | transf_t* const transforms = enc->transforms_; 110 | 111 | if (max_symbol > MAX_SYMBOLS || max_symbol <= 0) return 0; 112 | 113 | for (s = 0, pos = 0; s < max_symbol; ++s) { 114 | int cnt = counts[s]; 115 | // start of Is segment of symbol 's' in the states_ array 116 | // Length of the Is segment: cnt 117 | // Sum of all segments = tab_size 118 | state[s] = pos; 119 | // We map the [tab_size, 2*tab_size) segment to Is segments 120 | // and then remap then to I using symbols[] 121 | if (cnt > 0) { 122 | transf_t* const t = &transforms[s]; 123 | t->nb_bits_ = log_tab_size - Log2Ceil(cnt); // log(1/ps) 124 | t->wrap_ = cnt << (1 + t->nb_bits_); 125 | t->offset_ = pos - cnt; 126 | pos += cnt; 127 | } 128 | } 129 | if (pos != tab_size) return 0; // input not normalized! 130 | 131 | symbols = (uint8_t*)malloc(tab_size * sizeof(*symbols)); 132 | if (symbols == NULL) return 0; 133 | 134 | // Prepare map from symbol to state 135 | if (!enc->methods_.spread(max_symbol, counts, log_tab_size, symbols)) { 136 | free(symbols); 137 | return 0; 138 | } 139 | for (pos = 0; pos < tab_size; ++pos) { 140 | const uint8_t s = symbols[pos]; 141 | tab[state[s]++] = pos + tab_size; 142 | } 143 | free(symbols); 144 | return max_symbol; 145 | } 146 | 147 | #if defined(USE_INV_DIV) 148 | 149 | // As mentioned by Ryg: 150 | // Alverson 1991: "Integer Division Using Reciprocals" 151 | // http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.33.1710 152 | #define MULT_SHIFT (8 * sizeof(FSCStateW) + FSC_BITS) 153 | #define DIV_BY_MULT(A, B) (((A) * (B)) >> MULT_SHIFT) 154 | 155 | void EncodeDividers(Symbol syms[], int max_symbol) { 156 | int s; 157 | for (s = 0; s < max_symbol; ++s) { 158 | Symbol* const sym = &syms[s]; 159 | const uint32_t freq = sym->freq_; 160 | sym->imult_ = (1u << MAX_LOG_TAB_SIZE) - freq; 161 | if (freq > 0) { 162 | sym->mult_ = ((1ull << MULT_SHIFT) + freq - 1) / freq; 163 | } else { 164 | sym->mult_ = 0; // shouldn't be needed 165 | } 166 | } 167 | } 168 | #endif 169 | 170 | static int BuildTablesW(FSCEncoder* const enc, const uint32_t counts[]) { 171 | int s; 172 | uint64_t start = 0; 173 | const int log_tab_size = enc->log_tab_size_; 174 | const int tab_size = 1 << log_tab_size; 175 | const int max_symbol = enc->max_symbol_; 176 | 177 | for (s = 0; s < max_symbol; ++s) { 178 | enc->symbols_[s].start_ = start; 179 | enc->symbols_[s].freq_ = counts[s]; 180 | start += counts[s]; 181 | } 182 | if (start != tab_size) return 0; // not normalized? 183 | 184 | #if defined(USE_INV_DIV) 185 | EncodeDividers(enc->symbols_, max_symbol); 186 | #endif 187 | return 1; 188 | } 189 | 190 | static int BuildTablesAliasW(FSCEncoder* const enc, const uint32_t counts[]) { 191 | return BuildTablesW(enc, counts) && 192 | AliasBuildEncMap(counts, enc->max_symbol_, enc->alias_map_); 193 | } 194 | 195 | static int IsUniqueSymbol(int max_symbol, const uint32_t counts[]) { 196 | int i; 197 | int unique = max_symbol; 198 | for (i = 0; i < max_symbol; ++i) { 199 | if (counts[i]) { 200 | if (unique == max_symbol) { 201 | unique = i; 202 | } else { 203 | unique = -1; // more than one symbol 204 | break; 205 | } 206 | } 207 | } 208 | assert(unique < max_symbol); 209 | return unique; 210 | } 211 | 212 | static int EncoderInit(FSCEncoder* const enc, uint32_t counts[], 213 | int max_symbol, int log_tab_size, 214 | FSCCodingMethod method) { 215 | int ok = 0; 216 | memset(enc, 0, sizeof(*enc)); 217 | if (max_symbol == 0) max_symbol = MAX_SYMBOLS; 218 | if (log_tab_size < 1) return 0; 219 | if (method >= CODING_METHOD_LAST) return 0; 220 | 221 | if (method >= CODING_METHOD_16B) { 222 | log_tab_size = MAX_LOG_TAB_SIZE; 223 | } else if (log_tab_size > LOG_TAB_SIZE) { 224 | fprintf(stderr, "!! log_tab_size: %d\n", log_tab_size); 225 | return 0; 226 | } 227 | enc->log_tab_size_ = log_tab_size; 228 | enc->max_symbol_ = FSCNormalizeCounts(counts, max_symbol, log_tab_size); 229 | if (enc->max_symbol_ < 1) { 230 | fprintf(stderr, "!! enc->max_symbol_: %d\n", enc->max_symbol_); 231 | return 0; 232 | } 233 | 234 | enc->unique_symbol_ = IsUniqueSymbol(max_symbol, counts); 235 | assert(enc->unique_symbol_ < max_symbol); 236 | if (enc->unique_symbol_ >= 0) { 237 | method = CODING_METHOD_UNIQUE; 238 | } 239 | if (enc->max_symbol_ > (1 << log_tab_size)) return 0; 240 | 241 | enc->method_ = method; 242 | enc->methods_ = kEncMethods[method]; 243 | return enc->methods_.build_tables(enc, counts); 244 | } 245 | 246 | // ----------------------------------------------------------------------------- 247 | // Coding loop 248 | 249 | typedef struct { // for delayed bitstream writing 250 | uint16_t val_; 251 | uint8_t nb_bits_; 252 | } token_t; 253 | 254 | static void PutBlock(const FSCEncoder* enc, const uint8_t* in, int size, 255 | FSCBitWriter* bw) { 256 | token_t tokens[BLOCK_SIZE]; 257 | const transf_t* const transforms = enc->transforms_; 258 | const uint16_t* const states = enc->states_; 259 | const int log_tab_size = enc->log_tab_size_; 260 | const int tab_size = 1 << log_tab_size; 261 | int state = tab_size; 262 | int k; 263 | for (k = size - 1; k >= 0; --k) { 264 | const transf_t* const transf = &transforms[in[k]]; 265 | const int extra_bit = (state >= transf->wrap_); 266 | const int nb_bits = transf->nb_bits_ + extra_bit; 267 | tokens[k].nb_bits_ = nb_bits; 268 | tokens[k].val_ = state & ((1 << nb_bits) - 1); 269 | state = states[(state >> nb_bits) + transf->offset_]; 270 | } 271 | // Direction reversal 272 | FSCWriteBits(bw, state & (tab_size - 1), log_tab_size); 273 | for (k = 0; k < size - 1; ++k) { // no need to write the last token 274 | FSCWriteBits(bw, tokens[k].val_, tokens[k].nb_bits_); 275 | } 276 | } 277 | 278 | // ----------------------------------------------------------------------------- 279 | 280 | #define FLUSH_STATE(state, limit) do { \ 281 | if ((state) >= (limit)) { \ 282 | output[--pos] = (FSCType)((state) & FSC_BITS_MASK); \ 283 | (state) >>= FSC_BITS; \ 284 | } \ 285 | } while (0) 286 | 287 | #if defined(USE_INV_DIV) 288 | // Alternative version, which is a little slower than below: 289 | // const FSCStateW R = state - q * freq; // <- that's 'state % freq' 290 | // state = (q << MAX_LOG_TAB_SIZE) + R + start; 291 | #define RENORMALIZE_STATE(state, s) do { \ 292 | const uint32_t start = (s)->start_; \ 293 | const uint32_t q = DIV_BY_MULT(state, s->mult_); \ 294 | state = q * s->imult_ + start + state; \ 295 | } while (0) 296 | #else 297 | // reference calculation 298 | #define RENORMALIZE_STATE(state, s) do { \ 299 | const uint32_t freq = (s)->freq_, start = (s)->start_; \ 300 | state = ((state / freq) << MAX_LOG_TAB_SIZE) + (state % freq) + start; \ 301 | } while (0) 302 | // slower version: 303 | // (state / freq) * ((1 << MAX_LOG_TAB_SIZE) - freq) + state + start; 304 | #endif // USE_INV_DIV 305 | 306 | // with ALIAS: 307 | #if defined(USE_INV_DIV) 308 | #define RENORMALIZE_STATE_ALIAS(state, s) do { \ 309 | const uint32_t freq = (s)->freq_, start = (s)->start_; \ 310 | const uint32_t q = DIV_BY_MULT(state, s->mult_); \ 311 | const uint32_t R = state - q * freq; /* <- that's 'state % freq' */ \ 312 | state = (q << MAX_LOG_TAB_SIZE) + enc->alias_map_[R + start]; \ 313 | } while (0) 314 | #else 315 | #define RENORMALIZE_STATE_ALIAS(state, s) do { \ 316 | const uint32_t freq = (s)->freq_, start = (s)->start_; \ 317 | state = ((state / freq) << MAX_LOG_TAB_SIZE) \ 318 | + enc->alias_map_[(state % freq) + start]; \ 319 | } while (0) 320 | #endif // USE_INV_DIV 321 | 322 | static int DoPutBlockW1(const FSCEncoder* enc, const uint8_t* in, int size, 323 | FSCType output[BLOCK_SIZE]) { 324 | const FSCStateW norm = (FSC_MAX >> MAX_LOG_TAB_SIZE) << FSC_BITS; 325 | int pos = BLOCK_SIZE; 326 | FSCStateW state = 1; 327 | int k = size; 328 | // We encode the first few bytes into initial state. 329 | while (state < FSC_MAX && k > 0) { 330 | state = (state << 8) | in[--k]; 331 | } 332 | assert(enc->log_tab_size_ == MAX_LOG_TAB_SIZE); 333 | while (k > 0) { 334 | const Symbol* const s = &enc->symbols_[in[--k]]; 335 | FLUSH_STATE(state, norm * s->freq_); 336 | RENORMALIZE_STATE(state, s); 337 | } 338 | FLUSH_STATE(state, 0); 339 | FLUSH_STATE(state, 0); 340 | return pos; 341 | } 342 | 343 | static int DoPutBlockW2(const FSCEncoder* enc, const uint8_t* in, int size, 344 | FSCType output[BLOCK_SIZE]) { 345 | int pos = BLOCK_SIZE; 346 | FSCStateW state0 = 1, state1 = 1; 347 | const FSCStateW norm = (FSC_MAX >> MAX_LOG_TAB_SIZE) << FSC_BITS; 348 | int k = size; 349 | assert(enc->log_tab_size_ == MAX_LOG_TAB_SIZE); 350 | // We encode the first few bytes into initial states. 351 | while (state0 < FSC_MAX && k > 0) { 352 | state0 = (state0 << 8) | in[--k]; 353 | } 354 | while (state1 < FSC_MAX && k > 0) { 355 | state1 = (state1 << 8) | in[--k]; 356 | } 357 | if (k & 1) { 358 | const Symbol* const s1 = &enc->symbols_[in[--k]]; 359 | FLUSH_STATE(state1, norm * s1->freq_); 360 | RENORMALIZE_STATE(state1, s1); 361 | } 362 | while (k > 0) { 363 | const Symbol* const s0 = &enc->symbols_[in[--k]]; 364 | const Symbol* const s1 = &enc->symbols_[in[--k]]; 365 | FLUSH_STATE(state0, norm * s0->freq_); 366 | FLUSH_STATE(state1, norm * s1->freq_); 367 | RENORMALIZE_STATE(state0, s0); 368 | RENORMALIZE_STATE(state1, s1); 369 | } 370 | FLUSH_STATE(state0, 0); 371 | FLUSH_STATE(state1, 0); 372 | if (size > 1) { 373 | FLUSH_STATE(state0, 0); 374 | FLUSH_STATE(state1, 0); 375 | } 376 | return pos; 377 | } 378 | 379 | static int DoPutBlockW4(const FSCEncoder* enc, const uint8_t* in, int size, 380 | FSCType output[BLOCK_SIZE]) { 381 | int pos = BLOCK_SIZE; 382 | FSCStateW states[4] = { FSC_MAX, FSC_MAX, FSC_MAX, FSC_MAX }; 383 | const FSCStateW norm = (FSC_MAX >> MAX_LOG_TAB_SIZE) << FSC_BITS; 384 | int k = size; 385 | int r = size & 3; 386 | assert(enc->log_tab_size_ == MAX_LOG_TAB_SIZE); 387 | while (r-- > 0) { 388 | const Symbol* const s = &enc->symbols_[in[--k]]; 389 | FLUSH_STATE(states[3 - r], norm * s->freq_); 390 | RENORMALIZE_STATE(states[3 - r], s); 391 | } 392 | while (k > 0) { 393 | const Symbol* const s0 = &enc->symbols_[in[--k]]; 394 | const Symbol* const s1 = &enc->symbols_[in[--k]]; 395 | const Symbol* const s2 = &enc->symbols_[in[--k]]; 396 | const Symbol* const s3 = &enc->symbols_[in[--k]]; 397 | FLUSH_STATE(states[0], norm * s0->freq_); 398 | FLUSH_STATE(states[1], norm * s1->freq_); 399 | FLUSH_STATE(states[2], norm * s2->freq_); 400 | FLUSH_STATE(states[3], norm * s3->freq_); 401 | RENORMALIZE_STATE(states[0], s0); 402 | RENORMALIZE_STATE(states[1], s1); 403 | RENORMALIZE_STATE(states[2], s2); 404 | RENORMALIZE_STATE(states[3], s3); 405 | } 406 | for (r = 0; r < 4; ++r) { 407 | FLUSH_STATE(states[r], 0); 408 | } 409 | for (r = 0; r < 4; ++r) { 410 | if (size > 0) FLUSH_STATE(states[r], 0); 411 | } 412 | return pos; 413 | } 414 | 415 | // Generic N-states interleaving function (slow) 416 | #if 0 417 | #define NB_STATES 8 418 | static int DoPutBlockWN(const FSCEncoder* enc, const uint8_t* in, int size, 419 | FSCType output[BLOCK_SIZE]) { 420 | int pos = BLOCK_SIZE; 421 | FSCStateW states[NB_STATES]; 422 | const FSCStateW norm = (FSC_MAX >> MAX_LOG_TAB_SIZE) << FSC_BITS; 423 | int k = size; 424 | int r; 425 | assert(enc->log_tab_size_ == MAX_LOG_TAB_SIZE); 426 | for (r = 0; r < NB_STATES; ++r) { 427 | states[r] = FSC_MAX; 428 | } 429 | r = size & (NB_STATES - 1); 430 | while (r-- > 0) { 431 | const Symbol* const s = &enc->symbols_[in[--k]]; 432 | FLUSH_STATE(states[NB_STATES - 1 - r], norm * s->freq_); 433 | RENORMALIZE_STATE(states[NB_STATES - 1 - r], s); 434 | } 435 | while (k > 0) { 436 | for (r = 0; r < NB_STATES; ++r) { 437 | const Symbol* const s = &enc->symbols_[in[--k]]; 438 | FLUSH_STATE(states[r], norm * s->freq_); 439 | RENORMALIZE_STATE(states[r], s); 440 | } 441 | } 442 | for (r = 0; r < 2 * NB_STATES; ++r) { 443 | FLUSH_STATE(states[r & (NB_STATES - 1)], 0); 444 | } 445 | return pos; 446 | } 447 | #endif 448 | 449 | // ----------------------------------------------------------------------------- 450 | 451 | static int DoPutBlockAliasW1(const FSCEncoder* enc, const uint8_t* in, int size, 452 | FSCType output[BLOCK_SIZE]) { 453 | int pos = BLOCK_SIZE; 454 | FSCStateW state = FSC_MAX; 455 | const FSCStateW norm = (FSC_MAX >> MAX_LOG_TAB_SIZE) << FSC_BITS; 456 | int k = size; 457 | assert(enc->log_tab_size_ == MAX_LOG_TAB_SIZE); 458 | while (k > 0) { 459 | const Symbol* const s = &enc->symbols_[in[--k]]; 460 | FLUSH_STATE(state, norm * s->freq_); 461 | RENORMALIZE_STATE_ALIAS(state, s); 462 | } 463 | FLUSH_STATE(state, 0); 464 | FLUSH_STATE(state, 0); 465 | return pos; 466 | } 467 | 468 | static int DoPutBlockAliasW2(const FSCEncoder* enc, const uint8_t* in, int size, 469 | FSCType output[BLOCK_SIZE]) { 470 | int pos = BLOCK_SIZE; 471 | FSCStateW state0 = FSC_MAX, state1 = FSC_MAX; 472 | const FSCStateW norm = (FSC_MAX >> MAX_LOG_TAB_SIZE) << FSC_BITS; 473 | int k = size; 474 | assert(enc->log_tab_size_ == MAX_LOG_TAB_SIZE); 475 | if (k & 1) { 476 | const Symbol* const s1 = &enc->symbols_[in[--k]]; 477 | FLUSH_STATE(state1, norm * s1->freq_); 478 | RENORMALIZE_STATE_ALIAS(state1, s1); 479 | } 480 | while (k > 0) { 481 | const Symbol* const s0 = &enc->symbols_[in[--k]]; 482 | const Symbol* const s1 = &enc->symbols_[in[--k]]; 483 | FLUSH_STATE(state0, norm * s0->freq_); 484 | FLUSH_STATE(state1, norm * s1->freq_); 485 | RENORMALIZE_STATE_ALIAS(state0, s0); 486 | RENORMALIZE_STATE_ALIAS(state1, s1); 487 | } 488 | FLUSH_STATE(state0, 0); 489 | FLUSH_STATE(state1, 0); 490 | FLUSH_STATE(state0, 0); 491 | FLUSH_STATE(state1, 0); 492 | return pos; 493 | } 494 | 495 | // ----------------------------------------------------------------------------- 496 | 497 | #define PUT_BLOCK_WRAPPER(FUNC_NAME, CALL) \ 498 | static void FUNC_NAME(const FSCEncoder* enc, const uint8_t* in, int size, \ 499 | FSCBitWriter* const bw) { \ 500 | FSCType output[BLOCK_SIZE]; \ 501 | assert(size <= BLOCK_SIZE); \ 502 | const int pos = CALL(enc, in, size, output); \ 503 | assert(pos >= 0); \ 504 | FSCAppend(bw, (const uint8_t*)&output[pos], \ 505 | (BLOCK_SIZE - pos) * sizeof(output[0])); \ 506 | } 507 | 508 | PUT_BLOCK_WRAPPER(PutBlockW1, DoPutBlockW1) 509 | PUT_BLOCK_WRAPPER(PutBlockW2, DoPutBlockW2) 510 | PUT_BLOCK_WRAPPER(PutBlockW4, DoPutBlockW4) 511 | PUT_BLOCK_WRAPPER(PutBlockAliasW1, DoPutBlockAliasW1) 512 | PUT_BLOCK_WRAPPER(PutBlockAliasW2, DoPutBlockAliasW2) 513 | 514 | // ----------------------------------------------------------------------------- 515 | // Coding 516 | 517 | static int SparseIsBetter(const uint32_t seq[], int len, int nb_bits) { 518 | uint32_t total = 1 << nb_bits; 519 | uint32_t half = total >> 1; 520 | int i; 521 | int saved_bits = -(len - 1); 522 | for (i = 0; i < len - 1; ++i) { 523 | const uint32_t c = seq[i]; 524 | if (c == 0) saved_bits += nb_bits; 525 | total -= c; 526 | if (total < half) { 527 | --nb_bits; 528 | half >>= 1; 529 | } 530 | } 531 | return (saved_bits > 0); 532 | } 533 | 534 | static int WriteSequence(const uint32_t seq[], int len, int sparse, int nb_bits, 535 | FSCBitWriter* const bw) { 536 | uint32_t total = 1 << nb_bits; 537 | uint32_t half = total >> 1; 538 | int i; 539 | int total_bits = 0; 540 | if (sparse == 2) { 541 | sparse = SparseIsBetter(seq, len, nb_bits); 542 | FSCWriteBits(bw, sparse, 1); 543 | } 544 | for (i = 0; i < len - 1; ++i) { 545 | const uint32_t c = seq[i]; 546 | if (sparse) { 547 | FSCWriteBits(bw, c > 0, 1); 548 | total_bits += 1; 549 | if (c == 0) continue; 550 | } 551 | FSCWriteBits(bw, c, nb_bits); 552 | total_bits += nb_bits; 553 | total -= c; 554 | if (total < half) { 555 | --nb_bits; 556 | half >>= 1; 557 | } 558 | } 559 | if (total != seq[len - 1]) return -1; // verify normalization 560 | return total_bits; 561 | } 562 | 563 | // Write the distribution table as header 564 | static int WriteHeader(FSCEncoder* const enc, const uint32_t counts[MAX_SYMBOLS], 565 | FSCBitWriter* bw) { 566 | const int max_symbol = enc->max_symbol_; 567 | const int log_tab_size = enc->log_tab_size_; 568 | uint32_t tab_size = 1u << log_tab_size; 569 | 570 | assert(enc->unique_symbol_ < 0); 571 | assert(max_symbol > 1); 572 | FSCWriteBits(bw, max_symbol - 1, 8); 573 | 574 | if (max_symbol < HDR_SYMBOL_LIMIT) { // Method #1 for small alphabet 575 | if (WriteSequence(counts, max_symbol, 2, log_tab_size, bw) < 0) { 576 | return 0; 577 | } 578 | } else { // Method #2 for large alphabet 579 | int ok = 0; 580 | int i; 581 | uint8_t bins[MAX_SYMBOLS]; 582 | uint32_t* const bHisto = 583 | (uint32_t*)calloc(sizeof(*bHisto), log_tab_size + 1); 584 | uint16_t bits[MAX_SYMBOLS]; 585 | if (bHisto == NULL) return 0; 586 | // Decompose into prefix and suffix 587 | { 588 | uint32_t total = tab_size; 589 | for (i = 0; i < max_symbol; ++i) { 590 | const int c = counts[i] + 1; 591 | int bin, b; 592 | for (bin = 0, b = c; b != 1; ++bin) { b >>= 1; } 593 | if (bin > log_tab_size) goto Error; 594 | bins[i] = bin; // prefix 595 | bits[i] = c - (1 << bin); // suffix 596 | ++bHisto[bin]; // record prefix distribution 597 | if (total < counts[i]) goto Error; 598 | total -= counts[i]; 599 | } 600 | if (total != 0) goto Error; // Unnormalized distribution!? 601 | } 602 | if (bHisto[0] == max_symbol - 1) { // only one symbol? 603 | FSCWriteBits(bw, 32 - 1, 5); // special marker for sparse case 604 | } else { // Compress the prefix sequence using a sub-encoder 605 | FSCEncoder enc2; 606 | if (!EncoderInit(&enc2, bHisto, log_tab_size + 1, 607 | TAB_HDR_BITS, CODING_METHOD_BUCKET)) { 608 | fprintf(stderr, "Sub-Encoder initialization failed!\n"); 609 | goto Error; 610 | } 611 | const int hlen = enc2.max_symbol_; 612 | FSCWriteBits(bw, hlen - 1, 5); 613 | if (WriteSequence(bHisto, hlen, 2, TAB_HDR_BITS, bw) < 0) { 614 | fprintf(stderr, "Error during WriteSequence()!\n"); 615 | goto Error; 616 | } 617 | enc2.methods_.put_block(&enc2, bins, max_symbol - 1, bw); 618 | // Write the suffix sequence 619 | for (i = 0; i < max_symbol - 1; ++i) { 620 | FSCWriteBits(bw, bits[i], bins[i]); 621 | } 622 | } 623 | ok = 1; 624 | Error: 625 | free(bHisto); 626 | if (!ok) return 0; 627 | } 628 | End: 629 | return !bw->error_; 630 | } 631 | 632 | static int WriteParams(FSCEncoder* const enc, const uint32_t counts[MAX_SYMBOLS], 633 | FSCBitWriter* const bw) { 634 | FSCWriteBits(bw, LOG_TAB_SIZE - enc->log_tab_size_, 4); 635 | return WriteHeader(enc, counts, bw); 636 | } 637 | 638 | static int WriteParamsW(FSCEncoder* const enc, const uint32_t counts[MAX_SYMBOLS], 639 | FSCBitWriter* const bw) { 640 | return WriteHeader(enc, counts, bw); 641 | } 642 | 643 | // ----------------------------------------------------------------------------- 644 | 645 | static int WriteParamsUnique(FSCEncoder* const enc, const uint32_t counts[MAX_SYMBOLS], 646 | FSCBitWriter* const bw) { 647 | (void)counts; 648 | assert(enc->unique_symbol_ >= 0 && enc->unique_symbol_ < enc->max_symbol_); 649 | FSCWriteBits(bw, enc->unique_symbol_, 8); 650 | return !bw->error_; 651 | } 652 | 653 | static int BuildTablesUnique(FSCEncoder* const enc, const uint32_t counts[]) { 654 | (void)enc; 655 | (void)counts; 656 | return 1; 657 | } 658 | 659 | static void PutBlockUnique(const FSCEncoder* enc, const uint8_t* in, int size, 660 | FSCBitWriter* const bw) { 661 | (void)enc; 662 | (void)in; 663 | (void)size; 664 | (void)bw; 665 | } 666 | 667 | // ----------------------------------------------------------------------------- 668 | // Simulation and comparison against ideal case 669 | 670 | #ifdef SHOW_SIMULATION 671 | static void SimulateCoding(const FSCEncoder* enc, const uint32_t counts[], 672 | const uint8_t* message, size_t size, int tab_size) { 673 | const int max_symbol = enc->max_symbol_; 674 | int s, N; 675 | const transf_t* const transforms = enc->transforms_; 676 | const uint16_t* const states = enc->states_; 677 | int state = tab_size; 678 | double S0 = 0., S1 = 0.; // theoretical entropy 679 | { 680 | const double norm = 1. / tab_size; 681 | for (s = 0; s < max_symbol; ++s) { 682 | if (counts[s] > 0) { 683 | const double p = norm * counts[s]; 684 | S0 += -p * log(p); 685 | } 686 | } 687 | S0 /= 8. * log(2.); 688 | } 689 | 690 | { 691 | uint32_t real_counts[MAX_SYMBOLS]; 692 | FSCCountSymbols(message, size, real_counts); 693 | const double real_norm = 1. / size; 694 | for (N = 0; N < size; ++N) { 695 | S1 += -log(real_norm * real_counts[message[N]]); 696 | } 697 | S1 /= size * 8. * log(2.); 698 | } 699 | 700 | size_t bits = 0; // count overhead too? 701 | for (N = size - 1; N >= 0; --N) { 702 | const transf_t* const transf = &transforms[message[N]]; 703 | const int nb_bits = transf->nb_bits_ + (state >= transf->wrap_); 704 | bits += nb_bits; 705 | state = states[(state >> nb_bits) + transf->offset_]; 706 | } 707 | printf("ENTROPY:\n"); 708 | printf(" Simulated: %.2lf%% (imperfect coder, real message)\n", 709 | 100. * bits / (size * 8.)); 710 | printf(" Real: %.2lf%% (perfect approx. coder, real message)\n", 711 | 100. * S1); 712 | printf(" Theory: %.2lf%% (perfect approx. coder, perfect message)\n", 713 | 100. * S0); 714 | } 715 | #endif 716 | 717 | // ----------------------------------------------------------------------------- 718 | // Entry point 719 | 720 | static const EncMethods kEncMethods[CODING_METHOD_LAST] = { 721 | { WriteParams, PutBlock, BuildTables, BuildSpreadTableBucket }, 722 | { WriteParams, PutBlock, BuildTables, BuildSpreadTableReverse }, 723 | { WriteParams, PutBlock, BuildTables, BuildSpreadTableModulo }, 724 | { WriteParams, PutBlock, BuildTables, BuildSpreadTablePack }, 725 | 726 | { WriteParamsW, PutBlockW1, BuildTablesW, NULL }, 727 | { WriteParamsW, PutBlockW2, BuildTablesW, NULL }, 728 | { WriteParamsW, PutBlockAliasW1, BuildTablesAliasW, NULL }, 729 | { WriteParamsW, PutBlockAliasW2, BuildTablesAliasW, NULL }, 730 | { WriteParamsW, PutBlockW4, BuildTablesW, NULL }, 731 | 732 | { WriteParamsUnique, PutBlockUnique, BuildTablesUnique, NULL }, 733 | }; 734 | 735 | static int Encode(const uint8_t* in, size_t size, 736 | uint32_t counts[MAX_SYMBOLS], 737 | uint8_t** out, size_t* out_size, int log_tab_size, 738 | FSCCodingMethod method) { 739 | int ok = 0; 740 | FSCEncoder enc; 741 | FSCBitWriter bw; 742 | 743 | if (!FSCBitWriterInit(&bw, size >> 8)) return 0; 744 | 745 | if (!EncoderInit(&enc, counts, 0, log_tab_size, method)) { 746 | fprintf(stderr, "Error during EncoderInit() call\n"); 747 | goto end; 748 | } 749 | size_t val = size; 750 | while (val) { 751 | FSCWriteBits(&bw, 1, 1); 752 | FSCWriteBits(&bw, val & 0xff, 8); 753 | val >>= 8; 754 | } 755 | FSCWriteBits(&bw, 0, 1); 756 | 757 | FSCWriteBits(&bw, enc.method_, 4); 758 | if (!enc.methods_.write_params(&enc, counts, &bw)) { 759 | fprintf(stderr, "Error during WriteParams() call\n"); 760 | goto end; 761 | } 762 | #ifdef SHOW_SIMULATION 763 | SimulateCoding(&enc, counts, in, size, 1 << log_tab_size); 764 | #endif 765 | 766 | FSCPutBlockFunc put_block = enc.methods_.put_block; 767 | while (size > 0) { 768 | const int next = (size > BLOCK_SIZE) ? BLOCK_SIZE : size; 769 | put_block(&enc, in, next, &bw); 770 | in += next; 771 | size -= next; 772 | } 773 | FSCBitWriterFlush(&bw); 774 | ok = !bw.error_; 775 | 776 | end: 777 | if (ok) { 778 | *out = FSCBitWriterFinish(&bw); 779 | *out_size = FSCBitWriterNumBytes(&bw); 780 | } else { 781 | FSCBitWriterDestroy(&bw); 782 | } 783 | return ok; 784 | } 785 | 786 | int FSCEncode(const uint8_t* in, size_t in_size, 787 | uint8_t** out, size_t* out_size, int log_tab_size, 788 | FSCCodingMethod method) { 789 | uint32_t counts[MAX_SYMBOLS]; 790 | FSCCountSymbols(in, in_size, counts); 791 | return Encode(in, in_size, counts, out, out_size, log_tab_size, method); 792 | } 793 | 794 | // ----------------------------------------------------------------------------- 795 | --------------------------------------------------------------------------------