├── .travis.yml
├── CMakeLists.txt
├── LICENSE
├── Makefile
├── README.md
├── include
    ├── bpacking.h
    ├── common.h
    ├── compression.h
    ├── turbocompression.h
    ├── turbopacking32.h
    ├── turbopacking64.h
    └── util.h
├── sampledata.txt
├── scripts
    ├── turbopacking32.py
    └── turbopacking64.py
└── src
    ├── bpacking.cpp
    └── test.cpp


/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: cpp
 2 | sudo: false
 3 | compiler:
 4 |   - clang
 5 | 
 6 | branches:
 7 |   only:
 8 |     - master
 9 | 
10 | script: mkdir -p build && cd build && cmake .. && make VERBOSE=1 && make test
11 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | cmake_minimum_required(VERSION 2.8.7)
 3 | if (NOT CMAKE_BUILD_TYPE)
 4 |                 message(STATUS "No build type selected, default to Release")
 5 |                 set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
 6 | endif()
 7 | MESSAGE( STATUS "CMAKE_BUILD_TYPE: " ${CMAKE_BUILD_TYPE} )
 8 | 
 9 | project(FrameOfReference CXX C)
10 | set(PROJECT_URL "https://github.com/lemire/FrameOfReference")
11 | set(PROJECT_DESCRIPTION "C++ library to pack and unpack vectors of integers having a small range of values using a technique called Frame of Reference")
12 | 
13 | ### Next line would be ideal, but it does not work on all platforms:
14 | ###set(CMAKE_CXX_STANDARD 11)
15 | ####
16 | set(CMAKE_CXX_FLAGS                "-Wall -std=c++11")
17 | 
18 | 
19 | # library target
20 | include_directories(include)
21 | add_library(FrameOfReference STATIC ./src/bpacking.cpp
22 |                                                                 ${HEADERS}
23 |                                                                 )
24 | enable_testing()
25 | 
26 | 
27 | add_executable(unit src/test)
28 | target_link_libraries(unit FrameOfReference)
29 | add_test(unit unit ${PROJECT_SOURCE_DIR}/sampledata.txt )
30 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 
203 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .SUFFIXES:
 2 | #
 3 | .SUFFIXES: .cpp .o .c .h
 4 | # replace the YOURCXX variable with a path to a C++11 compatible compiler.
 5 | ifeq ($(INTEL), 1)
 6 | # if you wish to use the Intel compiler, please do "make INTEL=1".
 7 |     YOURCXX ?= /opt/intel/bin/icpc
 8 | ifeq ($(DEBUG),1)
 9 |     CXXFLAGS =  -std=c++11 -O3 -Wall -ansi  -DDEBUG=1 -D_GLIBCXX_DEBUG   -ggdb
10 | else
11 |     CXXFLAGS =  -std=c++11 -O3 -Wall -ansi  -DNDEBUG=1  -ggdb
12 | endif # debug
13 | else #intel
14 |     YOURCXX ?= g++
15 | ifeq ($(DEBUG),1)
16 |     CXXFLAGS =   -std=c++11 -Weffc++ -pedantic -ggdb -DDEBUG=1 -D_GLIBCXX_DEBUG -Wall -Wextra  -Wcast-align  
17 | else
18 |     CXXFLAGS =  -std=c++11 -Weffc++ -pedantic -O3 -Wall -Wextra  -Wcast-align  
19 | endif #debug
20 | endif #intel
21 | 
22 | 
23 | 
24 | 
25 | HEADERS= $(shell ls include/*h)
26 | 
27 | all: test
28 | 	@echo "type ./test sampledata.txt" 
29 | 	@echo "if you have OpenMP support, type make testmp" 
30 | 
31 | bpacking.o: include/bpacking.h src/bpacking.cpp
32 | 	$(CXX) $(CXXFLAGS) -c src/bpacking.cpp -Iinclude
33 | 
34 | testmp: $(HEADERS) src/test.cpp bpacking.o
35 | ifeq ($(INTEL), 1)
36 | 	$(CXX)  -openmp -lpthread -D_OPENMP $(CXXFLAGS) -o testmp src/test.cpp bpacking.o  -Iinclude 	
37 | else
38 | 	$(CXX)  -fopenmp -lpthread $(CXXFLAGS) -o testmp src/test.cpp bpacking.o  -Iinclude 	
39 | endif
40 | 
41 | test: $(HEADERS) src/test.cpp bpacking.o
42 | 	$(CXX)  $(CXXFLAGS) -o test src/test.cpp bpacking.o  -Iinclude
43 | 	
44 | clean: 
45 | 	rm -f *.o test testmp
46 | 
47 | 
48 | 
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Frame of Reference (FOR) C++ library
  2 | 
  3 | ## What is this?
  4 | 
  5 | C++ library to pack and unpack vectors of integers having a small
  6 | range of values using a technique called Frame of Reference (Goldstein et al. 1998).
  7 | It should run fast even though it is written in simple C++.
  8 | 
  9 | Code from this library is part [Apache Arrow](https://github.com/apache/arrow)
 10 | and [Apache Impala](https://github.com/cloudera/Impala).
 11 | 
 12 | ## Code usage :
 13 | 
 14 | Given an array of 32-bit integers, you can compress it as follows:
 15 | 
 16 | ```C
 17 | #include "compression.h"
 18 | 
 19 | ...
 20 | 
 21 | uint32_t * inputdata = ... // length values
 22 | uint32_t * compresseddata = ... // enough data
 23 | uint32_t *out = compress(inputdata, length, compresseddata);
 24 | // compressed data lies between compresseddata and out
 25 | uint32_t nvalue = 0;
 26 | uint32_t * recoverydata = ... // available buffer with at least length elements
 27 | uncompress(compresseddata, recoverydata, nvalue);
 28 | // nvalue will be equal to length
 29 | ```
 30 | 
 31 | There is a similar API with ``turbocompress`` and ``turbouncompress`` with the difference
 32 | that ``compresseddata`` uses an ``uint8_t`` pointer type.
 33 | 
 34 | ```C
 35 | #include "turbocompression.h"
 36 | 
 37 | ...
 38 | 
 39 | uint32_t * inputdata = ... // length values
 40 | uint8_t * compresseddata = ... // enough data
 41 | uint8_t *out = turbocompress(inputdata, length, compresseddata);
 42 | // compressed data lies between compresseddata and out
 43 | uint32_t nvalue = 0;
 44 | uint32_t * recoverydata = ... // available buffer with at least length elements
 45 | turbouncompress(compresseddata, recoverydata, nvalue);
 46 | // nvalue will be equal to length
 47 | ```
 48 | 
 49 | We can also compress 64-bit arrays:
 50 | 
 51 | ```C
 52 | #include "turbocompression.h"
 53 | 
 54 | ...
 55 | 
 56 | uint64_t * inputdata = ... // length values
 57 | uint8_t * compresseddata = ... // enough data
 58 | uint8_t *out = turbocompress64(inputdata, length, compresseddata);
 59 | // compressed data lies between compresseddata and out
 60 | uint32_t nvalue = 0;
 61 | uint64_t * recoverydata = ... // available buffer with at least length elements
 62 | turbouncompress64(compresseddata, recoverydata, nvalue);
 63 | // nvalue will be equal to length
 64 | ```
 65 | 
 66 | ##  Usage (with Makefile)
 67 | 
 68 | To run a simple benchmark, do
 69 | 
 70 |      make
 71 |      ./test sampledata.txt
 72 | 
 73 | where sampledata.txt is a text data file with one integer per line.
 74 | 
 75 | For a parallelized version, type
 76 | 
 77 |      make testmp
 78 |      ./testmp sampledata.txt
 79 | 
 80 | This requires OpenMP support however.
 81 | 
 82 | 
 83 | ##  Building (with CMake under macOS and Linux)
 84 | 
 85 | You need to have ``cmake`` installed and available as a command.
 86 | 
 87 |      mkdir release
 88 |      cd release
 89 |      cmake ..
 90 |      make
 91 |      make test
 92 | 
 93 | # Building (Visual Studio under Windows)
 94 | 
 95 | We are assuming that you have a common Windows PC with at least Visual Studio 2015, and an x64 processor.
 96 | 
 97 | To build with at least Visual Studio 2015 from the command line:
 98 | - Grab the FrameOfReference code from GitHub, e.g., by cloning it using [GitHub Desktop](https://desktop.github.com/).
 99 | - Install [CMake](https://cmake.org/download/). When you install it, make sure to ask that ``cmake`` be made available from the command line.
100 | - Create a subdirectory within FrameOfReference, such as ``VisualStudio``.
101 | - Using a shell, go to this newly created directory. For example, within GitHub Desktop, you can right-click on  ``FrameOfReference`` in your GitHub repository list, and select ``Open in Git Shell``, then type ``cd VisualStudio`` in the newly created shell.
102 | - Type ``cmake -DCMAKE_GENERATOR_PLATFORM=x64 ..`` in the shell while in the ``VisualStudio`` repository.
103 | - This last command created a Visual Studio solution file in the newly created directory (e.g., ``FrameOfReference.sln``). Open this file in Visual Studio. You should now be able to build the project and run the tests. For example, in the ``Solution Explorer`` window (available from the ``View`` menu), right-click ``ALL_BUILD`` and select ``Build``. To test the code, still in the ``Solution Explorer`` window, select ``RUN_TESTS`` and select ``Build``.
104 | 
105 | To build with at least Visual Studio 2017 directly in the IDE:
106 | - Grab the FrameOfReference code from GitHub, e.g., by cloning it using [GitHub Desktop](https://desktop.github.com/).
107 | - Select the ``Visual C++ tools for CMake`` optional component when installing the C++ Development Workload within Visual Studio.
108 | - Within Visual Studio use ``File > Open > Folder...`` to open the FrameOfReference folder.
109 | - Right click on CMakeLists.txt in the parent directory within ``Solution Explorer`` and select ``Build`` to build the project.
110 | - For testing, in the Standard toolbar, drop the ``Select Startup Item...`` menu and choose one of the tests. Run the test by pressing the button to the left of the dropdown.
111 | 
112 | 
113 | ## Requirements:
114 | 
115 | This was tested with GNU G++ and clang++ After suitable adjustments, it should
116 | build under most C++ compilers.
117 | 
118 | ## Other relevant libraries
119 | 
120 | * Fast decoder for VByte-compressed integers https://github.com/lemire/MaskedVByte
121 | * Fast integer compression in C using StreamVByte https://github.com/lemire/streamvbyte
122 | * FastPFOR is a C++ research library well suited to compress unsorted arrays: https://github.com/lemire/FastPFor
123 | * SIMDCompressionAndIntersection is a C++ research library well suited for sorted arrays (differential coding)
124 | and computing intersections: https://github.com/lemire/SIMDCompressionAndIntersection
125 | 
126 | ## References
127 | 
128 | * Daniel Lemire, Nathan Kurz, Christoph Rupp, Stream VByte: Faster Byte-Oriented Integer Compression, Information Processing Letters (to appear) https://arxiv.org/abs/1709.08990
129 | * Goldstein J, Ramakrishnan R, Shaft U. Compressing relations and indexes. Proceedings of the Fourteenth International Conference on Data Engineering, ICDE ’98, IEEE Computer Society: Washington, DC, USA, 1998; 370–379.
130 | * Daniel Lemire and Leonid Boytsov, Decoding billions of integers per second through vectorization, Software Practice & Experience 45 (1), 2015.  http://arxiv.org/abs/1209.2137 http://onlinelibrary.wiley.com/doi/10.1002/spe.2203/abstract
131 | * Daniel Lemire, Leonid Boytsov, Nathan Kurz, SIMD Compression and the Intersection of Sorted Integers, Software Practice & Experience 46 (6), 2016. http://arxiv.org/abs/1401.6399
132 | * Jeff Plaisance, Nathan Kurz, Daniel Lemire, Vectorized VByte Decoding, International Symposium on Web Algorithms 2015, 2015. http://arxiv.org/abs/1503.07387
133 | * Wayne Xin Zhao, Xudong Zhang, Daniel Lemire, Dongdong Shan, Jian-Yun Nie, Hongfei Yan, Ji-Rong Wen, A General SIMD-based Approach to Accelerating Compression Algorithms, ACM Transactions on Information Systems 33 (3), 2015. http://arxiv.org/abs/1502.01916
134 | * Jianguo Wang, Chunbin Lin, Yannis Papakonstantinou, Steven Swanson, An Experimental Study of Bitmap Compression vs. Inverted List Compression, SIGMOD 2017 http://db.ucsd.edu/wp-content/uploads/2017/03/sidm338-wangA.pdf
135 | 


--------------------------------------------------------------------------------
/include/bpacking.h:
--------------------------------------------------------------------------------
  1 | /**
  2 | *
  3 | * This code is released under the
  4 | * Apache License Version 2.0 http://www.apache.org/licenses/.
  5 | * (c) Daniel Lemire 2013
  6 | */
  7 | #ifndef BPACKING
  8 | #define BPACKING
  9 | 
 10 | #include "common.h"
 11 | uint32_t * pack1_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 12 | 
 13 | uint32_t * pack2_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 14 | 
 15 | uint32_t * pack3_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 16 | 
 17 | uint32_t * pack4_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 18 | 
 19 | uint32_t * pack5_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 20 | 
 21 | uint32_t * pack6_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 22 | 
 23 | uint32_t * pack7_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 24 | 
 25 | uint32_t * pack8_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 26 | 
 27 | uint32_t * pack9_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 28 | 
 29 | uint32_t * pack10_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 30 | 
 31 | uint32_t * pack11_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 32 | 
 33 | uint32_t * pack12_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 34 | 
 35 | uint32_t * pack13_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 36 | 
 37 | uint32_t * pack14_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 38 | 
 39 | uint32_t * pack15_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 40 | 
 41 | uint32_t * pack16_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 42 | 
 43 | uint32_t * pack17_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 44 | 
 45 | uint32_t * pack18_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 46 | 
 47 | uint32_t * pack19_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 48 | 
 49 | uint32_t * pack20_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 50 | 
 51 | uint32_t * pack21_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 52 | 
 53 | uint32_t * pack22_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 54 | 
 55 | uint32_t * pack23_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 56 | 
 57 | uint32_t * pack24_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 58 | 
 59 | uint32_t * pack25_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 60 | 
 61 | uint32_t * pack26_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 62 | 
 63 | uint32_t * pack27_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 64 | 
 65 | uint32_t * pack28_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 66 | 
 67 | uint32_t * pack29_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 68 | 
 69 | uint32_t * pack30_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 70 | 
 71 | uint32_t * pack31_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 72 | 
 73 | uint32_t * pack32_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 74 | 
 75 | 
 76 | uint32_t * unpack1_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 77 | 
 78 | 
 79 | uint32_t * unpack2_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 80 | 
 81 | 
 82 | uint32_t * unpack3_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 83 | 
 84 | 
 85 | uint32_t * unpack4_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 86 | 
 87 | 
 88 | uint32_t * unpack5_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 89 | 
 90 | 
 91 | uint32_t * unpack6_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 92 | 
 93 | 
 94 | uint32_t * unpack7_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 95 | 
 96 | 
 97 | uint32_t * unpack8_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
 98 | 
 99 | 
100 | uint32_t * unpack9_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
101 | 
102 | 
103 | uint32_t * unpack10_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
104 | 
105 | 
106 | uint32_t * unpack11_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
107 | 
108 | 
109 | uint32_t * unpack12_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
110 | 
111 | 
112 | uint32_t * unpack13_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
113 | 
114 | 
115 | uint32_t * unpack14_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
116 | 
117 | 
118 | uint32_t * unpack15_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
119 | 
120 | 
121 | uint32_t * unpack16_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
122 | 
123 | 
124 | uint32_t * unpack17_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
125 | 
126 | 
127 | uint32_t * unpack18_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
128 | 
129 | 
130 | uint32_t * unpack19_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
131 | 
132 | 
133 | uint32_t * unpack20_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
134 | 
135 | 
136 | uint32_t * unpack21_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
137 | 
138 | 
139 | uint32_t * unpack22_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
140 | 
141 | 
142 | uint32_t * unpack23_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
143 | 
144 | 
145 | uint32_t * unpack24_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
146 | 
147 | 
148 | uint32_t * unpack25_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
149 | 
150 | 
151 | uint32_t * unpack26_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
152 | 
153 | 
154 | uint32_t * unpack27_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
155 | 
156 | 
157 | uint32_t * unpack28_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
158 | 
159 | 
160 | uint32_t * unpack29_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
161 | 
162 | 
163 | uint32_t * unpack30_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
164 | 
165 | 
166 | uint32_t * unpack31_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
167 | 
168 | 
169 | uint32_t * unpack32_8( uint32_t base,  uint32_t *   in, uint32_t *    out);
170 | 
171 | uint32_t * pack1_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
172 | 
173 | uint32_t * pack2_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
174 | 
175 | uint32_t * pack3_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
176 | 
177 | uint32_t * pack4_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
178 | 
179 | uint32_t * pack5_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
180 | 
181 | uint32_t * pack6_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
182 | 
183 | uint32_t * pack7_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
184 | 
185 | uint32_t * pack8_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
186 | 
187 | uint32_t * pack9_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
188 | 
189 | uint32_t * pack10_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
190 | 
191 | uint32_t * pack11_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
192 | 
193 | uint32_t * pack12_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
194 | 
195 | uint32_t * pack13_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
196 | 
197 | uint32_t * pack14_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
198 | 
199 | uint32_t * pack15_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
200 | 
201 | uint32_t * pack16_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
202 | 
203 | uint32_t * pack17_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
204 | 
205 | uint32_t * pack18_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
206 | 
207 | uint32_t * pack19_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
208 | 
209 | uint32_t * pack20_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
210 | 
211 | uint32_t * pack21_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
212 | 
213 | uint32_t * pack22_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
214 | 
215 | uint32_t * pack23_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
216 | 
217 | uint32_t * pack24_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
218 | 
219 | uint32_t * pack25_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
220 | 
221 | uint32_t * pack26_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
222 | 
223 | uint32_t * pack27_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
224 | 
225 | uint32_t * pack28_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
226 | 
227 | uint32_t * pack29_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
228 | 
229 | uint32_t * pack30_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
230 | 
231 | uint32_t * pack31_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
232 | 
233 | uint32_t * pack32_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
234 | 
235 | 
236 | uint32_t * unpack1_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
237 | 
238 | 
239 | uint32_t * unpack2_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
240 | 
241 | 
242 | uint32_t * unpack3_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
243 | 
244 | 
245 | uint32_t * unpack4_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
246 | 
247 | 
248 | uint32_t * unpack5_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
249 | 
250 | 
251 | uint32_t * unpack6_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
252 | 
253 | 
254 | uint32_t * unpack7_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
255 | 
256 | 
257 | uint32_t * unpack8_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
258 | 
259 | 
260 | uint32_t * unpack9_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
261 | 
262 | 
263 | uint32_t * unpack10_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
264 | 
265 | 
266 | uint32_t * unpack11_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
267 | 
268 | 
269 | uint32_t * unpack12_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
270 | 
271 | 
272 | uint32_t * unpack13_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
273 | 
274 | 
275 | uint32_t * unpack14_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
276 | 
277 | 
278 | uint32_t * unpack15_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
279 | 
280 | 
281 | uint32_t * unpack16_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
282 | 
283 | 
284 | uint32_t * unpack17_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
285 | 
286 | 
287 | uint32_t * unpack18_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
288 | 
289 | 
290 | uint32_t * unpack19_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
291 | 
292 | 
293 | uint32_t * unpack20_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
294 | 
295 | 
296 | uint32_t * unpack21_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
297 | 
298 | 
299 | uint32_t * unpack22_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
300 | 
301 | 
302 | uint32_t * unpack23_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
303 | 
304 | 
305 | uint32_t * unpack24_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
306 | 
307 | 
308 | uint32_t * unpack25_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
309 | 
310 | 
311 | uint32_t * unpack26_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
312 | 
313 | 
314 | uint32_t * unpack27_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
315 | 
316 | 
317 | uint32_t * unpack28_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
318 | 
319 | 
320 | uint32_t * unpack29_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
321 | 
322 | 
323 | uint32_t * unpack30_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
324 | 
325 | 
326 | uint32_t * unpack31_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
327 | 
328 | 
329 | uint32_t * unpack32_16( uint32_t base,  uint32_t *   in, uint32_t *    out);
330 | 
331 | uint32_t * pack1_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
332 | 
333 | uint32_t * pack2_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
334 | 
335 | uint32_t * pack3_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
336 | 
337 | uint32_t * pack4_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
338 | 
339 | uint32_t * pack5_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
340 | 
341 | uint32_t * pack6_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
342 | 
343 | uint32_t * pack7_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
344 | 
345 | uint32_t * pack8_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
346 | 
347 | uint32_t * pack9_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
348 | 
349 | uint32_t * pack10_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
350 | 
351 | uint32_t * pack11_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
352 | 
353 | uint32_t * pack12_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
354 | 
355 | uint32_t * pack13_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
356 | 
357 | uint32_t * pack14_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
358 | 
359 | uint32_t * pack15_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
360 | 
361 | uint32_t * pack16_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
362 | 
363 | uint32_t * pack17_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
364 | 
365 | uint32_t * pack18_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
366 | 
367 | uint32_t * pack19_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
368 | 
369 | uint32_t * pack20_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
370 | 
371 | uint32_t * pack21_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
372 | 
373 | uint32_t * pack22_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
374 | 
375 | uint32_t * pack23_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
376 | 
377 | uint32_t * pack24_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
378 | 
379 | uint32_t * pack25_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
380 | 
381 | uint32_t * pack26_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
382 | 
383 | uint32_t * pack27_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
384 | 
385 | uint32_t * pack28_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
386 | 
387 | uint32_t * pack29_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
388 | 
389 | uint32_t * pack30_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
390 | 
391 | uint32_t * pack31_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
392 | 
393 | uint32_t * pack32_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
394 | 
395 | 
396 | uint32_t * unpack1_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
397 | 
398 | 
399 | uint32_t * unpack2_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
400 | 
401 | 
402 | uint32_t * unpack3_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
403 | 
404 | 
405 | uint32_t * unpack4_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
406 | 
407 | 
408 | uint32_t * unpack5_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
409 | 
410 | 
411 | uint32_t * unpack6_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
412 | 
413 | 
414 | uint32_t * unpack7_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
415 | 
416 | 
417 | uint32_t * unpack8_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
418 | 
419 | 
420 | uint32_t * unpack9_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
421 | 
422 | 
423 | uint32_t * unpack10_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
424 | 
425 | 
426 | uint32_t * unpack11_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
427 | 
428 | 
429 | uint32_t * unpack12_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
430 | 
431 | 
432 | uint32_t * unpack13_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
433 | 
434 | 
435 | uint32_t * unpack14_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
436 | 
437 | 
438 | uint32_t * unpack15_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
439 | 
440 | 
441 | uint32_t * unpack16_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
442 | 
443 | 
444 | uint32_t * unpack17_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
445 | 
446 | 
447 | uint32_t * unpack18_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
448 | 
449 | 
450 | uint32_t * unpack19_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
451 | 
452 | 
453 | uint32_t * unpack20_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
454 | 
455 | 
456 | uint32_t * unpack21_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
457 | 
458 | 
459 | uint32_t * unpack22_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
460 | 
461 | 
462 | uint32_t * unpack23_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
463 | 
464 | 
465 | uint32_t * unpack24_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
466 | 
467 | 
468 | uint32_t * unpack25_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
469 | 
470 | 
471 | uint32_t * unpack26_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
472 | 
473 | 
474 | uint32_t * unpack27_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
475 | 
476 | 
477 | uint32_t * unpack28_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
478 | 
479 | 
480 | uint32_t * unpack29_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
481 | 
482 | 
483 | uint32_t * unpack30_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
484 | 
485 | 
486 | uint32_t * unpack31_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
487 | 
488 | 
489 | uint32_t * unpack32_32( uint32_t base,  uint32_t *   in, uint32_t *    out);
490 | 
491 | 
492 | typedef uint32_t * (*packfnc)( uint32_t,  uint32_t * , uint32_t * );
493 | 
494 | static uint32_t * nullpacker( uint32_t,  uint32_t *   , uint32_t *    out) {
495 |     return out;
496 | }
497 | 
498 | static uint32_t * nullunpacker8( uint32_t base,  uint32_t *  in , uint32_t * out) {
499 |     for(int k = 0; k < 8; ++k) {
500 | 		 out[k] = base;
501 | 	}
502 | 	return in;
503 | }
504 | 
505 | static uint32_t * nullunpacker16( uint32_t base,  uint32_t *  in , uint32_t * out) {
506 |     for(int k = 0; k < 8; ++k) {
507 | 		 out[k] = base;
508 | 	}
509 | 	return in;
510 | }
511 | 
512 | static uint32_t * nullunpacker32( uint32_t base,  uint32_t *  in , uint32_t * out) {
513 |     for(int k = 0; k < 8; ++k) {
514 | 		 out[k] = base;
515 | 	}
516 | 	return in;
517 | }
518 | 
519 | 
520 | 
521 | const static packfnc unpack8[33]= {nullunpacker8,unpack1_8,
522 |                       unpack2_8,
523 |                       unpack3_8,
524 |                       unpack4_8,
525 |                       unpack5_8,
526 |                       unpack6_8,
527 |                       unpack7_8,
528 |                       unpack8_8,
529 |                       unpack9_8,
530 |                       unpack10_8,
531 |                       unpack11_8,
532 |                       unpack12_8,
533 |                       unpack13_8,
534 |                       unpack14_8,
535 |                       unpack15_8,
536 |                       unpack16_8,
537 |                       unpack17_8,
538 |                       unpack18_8,
539 |                       unpack19_8,
540 |                       unpack20_8,
541 |                       unpack21_8,
542 |                       unpack22_8,
543 |                       unpack23_8,
544 |                       unpack24_8,
545 |                       unpack25_8,
546 |                       unpack26_8,
547 |                       unpack27_8,
548 |                       unpack28_8,
549 |                       unpack29_8,
550 |                       unpack30_8,
551 |                       unpack31_8,
552 |                       unpack32_8
553 |                      };
554 | 
555 | const static packfnc pack8[33]= {nullpacker,pack1_8,
556 |                     pack2_8,
557 |                     pack3_8,
558 |                     pack4_8,
559 |                     pack5_8,
560 |                     pack6_8,
561 |                     pack7_8,
562 |                     pack8_8,
563 |                     pack9_8,
564 |                     pack10_8,
565 |                     pack11_8,
566 |                     pack12_8,
567 |                     pack13_8,
568 |                     pack14_8,
569 |                     pack15_8,
570 |                     pack16_8,
571 |                     pack17_8,
572 |                     pack18_8,
573 |                     pack19_8,
574 |                     pack20_8,
575 |                     pack21_8,
576 |                     pack22_8,
577 |                     pack23_8,
578 |                     pack24_8,
579 |                     pack25_8,
580 |                     pack26_8,
581 |                     pack27_8,
582 |                     pack28_8,
583 |                     pack29_8,
584 |                     pack30_8,
585 |                     pack31_8,
586 |                     pack32_8
587 |                    };
588 | 
589 | 
590 | const static packfnc unpack16[33]= {nullunpacker16,unpack1_16,
591 |                       unpack2_16,
592 |                       unpack3_16,
593 |                       unpack4_16,
594 |                       unpack5_16,
595 |                       unpack6_16,
596 |                       unpack7_16,
597 |                       unpack8_16,
598 |                       unpack9_16,
599 |                       unpack10_16,
600 |                       unpack11_16,
601 |                       unpack12_16,
602 |                       unpack13_16,
603 |                       unpack14_16,
604 |                       unpack15_16,
605 |                       unpack16_16,
606 |                       unpack17_16,
607 |                       unpack18_16,
608 |                       unpack19_16,
609 |                       unpack20_16,
610 |                       unpack21_16,
611 |                       unpack22_16,
612 |                       unpack23_16,
613 |                       unpack24_16,
614 |                       unpack25_16,
615 |                       unpack26_16,
616 |                       unpack27_16,
617 |                       unpack28_16,
618 |                       unpack29_16,
619 |                       unpack30_16,
620 |                       unpack31_16,
621 |                       unpack32_16
622 |                      };
623 | 
624 | 
625 | const static packfnc pack16[33]= {nullpacker,pack1_16,
626 |                      pack2_16,
627 |                      pack3_16,
628 |                      pack4_16,
629 |                      pack5_16,
630 |                      pack6_16,
631 |                      pack7_16,
632 |                      pack8_16,
633 |                      pack9_16,
634 |                      pack10_16,
635 |                      pack11_16,
636 |                      pack12_16,
637 |                      pack13_16,
638 |                      pack14_16,
639 |                      pack15_16,
640 |                      pack16_16,
641 |                      pack17_16,
642 |                      pack18_16,
643 |                      pack19_16,
644 |                      pack20_16,
645 |                      pack21_16,
646 |                      pack22_16,
647 |                      pack23_16,
648 |                      pack24_16,
649 |                      pack25_16,
650 |                      pack26_16,
651 |                      pack27_16,
652 |                      pack28_16,
653 |                      pack29_16,
654 |                      pack30_16,
655 |                      pack31_16,
656 |                      pack32_16
657 |                     };
658 | 
659 | const static packfnc unpack32[33]= {nullunpacker32,unpack1_32,
660 |                        unpack2_32,
661 |                        unpack3_32,
662 |                        unpack4_32,
663 |                        unpack5_32,
664 |                        unpack6_32,
665 |                        unpack7_32,
666 |                        unpack8_32,
667 |                        unpack9_32,
668 |                        unpack10_32,
669 |                        unpack11_32,
670 |                        unpack12_32,
671 |                        unpack13_32,
672 |                        unpack14_32,
673 |                        unpack15_32,
674 |                        unpack16_32,
675 |                        unpack17_32,
676 |                        unpack18_32,
677 |                        unpack19_32,
678 |                        unpack20_32,
679 |                        unpack21_32,
680 |                        unpack22_32,
681 |                        unpack23_32,
682 |                        unpack24_32,
683 |                        unpack25_32,
684 |                        unpack26_32,
685 |                        unpack27_32,
686 |                        unpack28_32,
687 |                        unpack29_32,
688 |                        unpack30_32,
689 |                        unpack31_32,
690 |                        unpack32_32
691 |                       };
692 | 
693 | const static packfnc pack32[33]= {nullpacker,pack1_32,
694 |                      pack2_32,
695 |                      pack3_32,
696 |                      pack4_32,
697 |                      pack5_32,
698 |                      pack6_32,
699 |                      pack7_32,
700 |                      pack8_32,
701 |                      pack9_32,
702 |                      pack10_32,
703 |                      pack11_32,
704 |                      pack12_32,
705 |                      pack13_32,
706 |                      pack14_32,
707 |                      pack15_32,
708 |                      pack16_32,
709 |                      pack17_32,
710 |                      pack18_32,
711 |                      pack19_32,
712 |                      pack20_32,
713 |                      pack21_32,
714 |                      pack22_32,
715 |                      pack23_32,
716 |                      pack24_32,
717 |                      pack25_32,
718 |                      pack26_32,
719 |                      pack27_32,
720 |                      pack28_32,
721 |                      pack29_32,
722 |                      pack30_32,
723 |                      pack31_32,
724 |                      pack32_32
725 |                     };
726 | #endif
727 | 


--------------------------------------------------------------------------------
/include/common.h:
--------------------------------------------------------------------------------
 1 | /**
 2 | *
 3 | * This code is released under the
 4 | * Apache License Version 2.0 http://www.apache.org/licenses/.
 5 | * (c) Daniel Lemire 2013
 6 | */
 7 | 
 8 | #ifndef COMMON_H_
 9 | #define COMMON_H_
10 | 
11 | 
12 | #include <string.h>
13 | #include <stdlib.h>
14 | #include <iso646.h> // mostly for Microsoft compilers
15 | #include <limits.h>
16 | #include <stdint.h> // part of Visual Studio 2010 and better
17 | 
18 | #include <cassert>
19 | #include <iostream>
20 | #include <vector>
21 | #include <string>
22 | #include <stdexcept>
23 | #include <cstddef>
24 | #include <algorithm>
25 | #include <sstream>
26 | #endif /* COMMON_H_ */
27 | 


--------------------------------------------------------------------------------
/include/compression.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * compression.h
 3 |  *
 4 |  *  Created on: Oct 28, 2013
 5 |  *      Author: lemire
 6 |  */
 7 | 
 8 | #ifndef COMPRESSION_H_
 9 | #define COMPRESSION_H_
10 | 
11 | #include "bpacking.h"
12 | #include "util.h"
13 | 
14 | /**
15 | * Compresses "length" values from "in" to "out" and return a pointer to the end of the compressed stream.
16 | * The format is "number of values, minimal value, maximal value, followed by packed data".
17 | */
18 | inline uint32_t * compress(uint32_t * in, uint32_t length, uint32_t * out) {
19 |     out[0] = length;
20 |     ++out;
21 |     if(length == 0) return out;
22 |     uint32_t m = in[0];
23 |     uint32_t M = in[0];
24 |     for(uint32_t i = 1; i < length; ++i) {
25 |         if(in[i]>M) M=in[i];
26 |         if(in[i]<m) m=in[i];
27 |     }
28 |     int b = bits(static_cast<uint32_t>(M-m));
29 |     out[0] = m;
30 |     ++out;
31 |     out[0] = M;
32 |     ++out;
33 |     uint32_t k = 0;
34 |     for(; k+32<=length; k+=32,in+=32) {
35 |         out = pack32[b](m,in,out);
36 |     }
37 |     for(; k+16<=length; k+=16,in+=16) {
38 |         out = pack16[b](m,in,out);
39 |     }
40 |     for(; k+8<=length; k+=8,in+=8) {
41 |         out = pack8[b](m,in,out);
42 |     }
43 |     // we could pack the rest, but we don't  bother
44 |     for(; k<length; ++k,in++,out++) {
45 |         out[0] = in [0];
46 |     }
47 |     return out;
48 | }
49 | 
50 | /*
51 | * uncompress FOR data found in "in".
52 | * The format is "number of values, minimal value, maximal value, followed by packed data".
53 | * The 'nvalue' variable receives the number of decoded values (initial value is ignored)
54 | * The values are stored in "out".
55 | * We return a pointer to the end of the compressed input stream.
56 | */
57 | inline uint32_t * uncompress(uint32_t * in, uint32_t * out, uint32_t & nvalue) {
58 |     nvalue = in[0];
59 |     ++in;
60 |     if(nvalue == 0) return in;
61 |     uint32_t m = in[0];
62 |     ++in;
63 |     uint32_t M = in[0];
64 |     ++in;
65 |     int b = bits(static_cast<uint32_t>(M-m));
66 | #ifdef _OPENMP
67 |     #pragma omp parallel for
68 | #endif
69 |     for(uint32_t k = 0; k<nvalue/32; ++k) {
70 |         unpack32[b](m,in+b*k,out+32*k);
71 |     }
72 |     out = out + nvalue/32*32;
73 |     in = in + nvalue/32*b;
74 | 
75 |     for(uint32_t k=nvalue/32*32; k+16<=nvalue; k+=16,out+=16) {
76 |         in = unpack16[b](m,in,out);
77 |     }
78 |     for(uint32_t k=nvalue/16*16; k+8<=nvalue; k+=8,out+=8) {
79 |         in = unpack8[b](m,in,out);
80 |     }
81 |     // we could pack the rest, but we don't  bother
82 |     for(uint32_t k=nvalue/8*8; k<nvalue; ++k,in++,out++) {
83 |         out[0] = in [0];
84 |     }
85 |     return in;
86 | }
87 | 
88 | #endif /* COMPRESSION_H_ */
89 | 


--------------------------------------------------------------------------------
/include/turbocompression.h:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #ifndef TURBOCOMPRESSION_H_
  4 | #define TURBOCOMPRESSION_H_
  5 | 
  6 | #include "turbopacking32.h"
  7 | #include "turbopacking64.h"
  8 | 
  9 | #include "util.h"
 10 | 
 11 | /**
 12 | * "turbo" FOR packing uses an optimization to bit packing due to
 13 | * TurboPFor: we use 64-bit words as much as possible.
 14 | *
 15 | */
 16 | 
 17 | /**
 18 | * Compresses "length" values from "in" to "out" and return a pointer to the end
 19 | * of the compressed stream.
 20 | * The format is "number of values, minimal value, maximal value, followed by
 21 | * packed data".
 22 | *
 23 | * Currently the implementation assumes that the integer arrays to be compressed
 24 | * are in multiples of 32, remaining integers are not compressed. Thus using this
 25 | * code on arrays smaller than 32 is wasteful. (This limitation will be removed
 26 | * in the future.)
 27 | */
 28 | inline uint8_t *turbocompress(const uint32_t *in, uint32_t length,
 29 |                               uint8_t *out) {
 30 |   memcpy(out, &length, sizeof(length));
 31 |   out += sizeof(length);
 32 |   if (length == 0)
 33 |     return out;
 34 |   uint32_t m = in[0];
 35 |   uint32_t M = in[0];
 36 |   for (uint32_t i = 1; i < length; ++i) {
 37 |     if (in[i] > M)
 38 |       M = in[i];
 39 |     if (in[i] < m)
 40 |       m = in[i];
 41 |   }
 42 |   int b = bits(static_cast<uint32_t>(M - m));
 43 |   memcpy(out, &m, sizeof(m));
 44 |   out += sizeof(m);
 45 |   memcpy(out, &M, sizeof(M));
 46 |   out += sizeof(M);
 47 |   uint32_t k = 0;
 48 |   for (; k + 32 <= length; k += 32) {
 49 |     funcForPackArr[b](m, &in, &out);
 50 |   }
 51 |   // we could pack the rest, but we don't  bother
 52 |   memcpy(out, in, (length - k) * sizeof(uint32_t));
 53 |   out += (length - k) * sizeof(uint32_t);
 54 |   return out;
 55 | }
 56 | 
 57 | /*
 58 | * uncompress FOR data found in "in".
 59 | * The format is "number of values, minimal value, maximal value, followed by
 60 | * packed data".
 61 | * The 'nvalue' variable receives the number of decoded values (initial value is
 62 | * ignored)
 63 | * The values are stored in "out".
 64 | * We return a pointer to the end of the compressed input stream.
 65 | */
 66 | inline const uint8_t *turbouncompress(const uint8_t *in, uint32_t *out,
 67 |                                       uint32_t &nvalue) {
 68 |   memcpy(&nvalue, in, sizeof(nvalue));
 69 |   in += sizeof(nvalue);
 70 |   if (nvalue == 0)
 71 |     return in;
 72 |   uint32_t m, M;
 73 |   memcpy(&m, in, sizeof(m));
 74 |   in += sizeof(m);
 75 |   memcpy(&M, in, sizeof(M));
 76 |   in += sizeof(M);
 77 |   int b = bits(static_cast<uint32_t>(M - m));
 78 | #ifdef _OPENMP
 79 | #pragma omp parallel for
 80 | #endif
 81 |   for (uint32_t k = 0; k < nvalue / 32; ++k) {
 82 |     // could code as funcForUnpackArr[b](m,&in,&out); but it hurts
 83 |     // parallelization
 84 |     const uint8_t *input = in + 32 * b * k / 8;
 85 |     uint32_t *output = out + k * 32;
 86 |     funcForUnpackArr[b](m, &input, &output);
 87 |   }
 88 |   in = in + (32 * b / 8) * (nvalue / 32);
 89 |   out = out + 32 * (nvalue / 32);
 90 | 
 91 |   // we could pack the rest, but we don't  bother
 92 |   uint32_t leftover = nvalue - nvalue / 32 * 32;
 93 |   memcpy(out, in, leftover * sizeof(uint32_t));
 94 |   in += leftover * sizeof(uint32_t);
 95 |   return in;
 96 | }
 97 | 
 98 | 
 99 | 
100 | /**
101 | * Compresses "length" values from "in" to "out" and return a pointer to the end
102 | * of the compressed stream.
103 | * The format is "number of values, minimal value, maximal value, followed by
104 | * packed data".
105 | *
106 | * Currently the implementation assumes that the integer arrays to be compressed
107 | * are in multiples of 32, remaining integers are not compressed. Thus using this
108 | * code on arrays smaller than 32 is wasteful. (This limitation will be removed
109 | * in the future.)
110 | */
111 | inline uint8_t *turbocompress64(const uint64_t *in, uint32_t length,
112 |                               uint8_t *out) {
113 |   memcpy(out, &length, sizeof(length));
114 |   out += sizeof(length);
115 |   if (length == 0)
116 |     return out;
117 |   uint64_t m = in[0];
118 |   uint64_t M = in[0];
119 |   for (uint32_t i = 1; i < length; ++i) {
120 |     if (in[i] > M)
121 |       M = in[i];
122 |     if (in[i] < m)
123 |       m = in[i];
124 |   }
125 |   int b = bits64(static_cast<uint64_t>(M - m));
126 |   memcpy(out, &m, sizeof(m));
127 |   out += sizeof(m);
128 |   memcpy(out, &M, sizeof(M));
129 |   out += sizeof(M);
130 | 
131 |   uint32_t k = 0;
132 |   for (; k + 32 <= length; k += 32) {
133 |     funcForPackArr64[b](m, &in, &out);
134 |   }
135 |   // we could pack the rest, but we don't  bother
136 |   memcpy(out, in, (length - k) * sizeof(uint64_t));
137 |   out += (length - k) * sizeof(uint64_t);
138 |   return out;
139 | }
140 | 
141 | /*
142 | * uncompress FOR data found in "in".
143 | * The format is "number of values, minimal value, maximal value, followed by
144 | * packed data".
145 | * The 'nvalue' variable receives the number of decoded values (initial value is
146 | * ignored)
147 | * The values are stored in "out".
148 | * We return a pointer to the end of the compressed input stream.
149 | */
150 | inline const uint8_t *turbouncompress64(const uint8_t *in, uint64_t *out,
151 |                                       uint32_t &nvalue) {
152 |   memcpy(&nvalue, in, sizeof(nvalue));
153 |   in += sizeof(nvalue);
154 |   if (nvalue == 0)
155 |     return in;
156 |   uint64_t m, M;
157 |   memcpy(&m, in, sizeof(m));
158 |   in += sizeof(m);
159 |   memcpy(&M, in, sizeof(M));
160 |   in += sizeof(M);
161 |   int b = bits64(static_cast<uint64_t>(M - m));
162 | #ifdef _OPENMP
163 | #pragma omp parallel for
164 | #endif
165 |   for (uint32_t k = 0; k < nvalue / 32; ++k) {
166 |     // could code as funcForUnpackArr[b](m,&in,&out); but it hurts
167 |     // parallelization
168 |     const uint8_t *input = in + 32 * b * k / 8;
169 |     uint64_t *output = out + k * 32;
170 |     funcForUnpackArr64[b](m, &input, &output);
171 |   }
172 |   in = in + (32 * b / 8) * (nvalue / 32);
173 |   out = out + 32 * (nvalue / 32);
174 | 
175 |   // we could pack the rest, but we don't  bother
176 |   uint32_t leftover = nvalue - nvalue / 32 * 32;
177 |   memcpy(out, in, leftover * sizeof(uint64_t));
178 |   in += leftover * sizeof(uint64_t);
179 |   return in;
180 | }
181 | 
182 | 
183 | #endif /* COMPRESSION_H_ */
184 | 


--------------------------------------------------------------------------------
/include/util.h:
--------------------------------------------------------------------------------
 1 | #ifndef FRAMEOFREFERENCE_INCLUDE_UTIL_H
 2 | #define FRAMEOFREFERENCE_INCLUDE_UTIL_H
 3 | #include <iso646.h> // mostly for Microsoft 
 4 | #ifdef _MSC_VER
 5 | /* Microsoft C/C++-compatible compiler */
 6 | #include <intrin.h>
 7 | 
 8 | #ifndef __clang__ // if one compiles with MSVC *with* clang, then these intrinsics are defined!!!
 9 | // sadly there is no way to check whether we are missing these intrinsics specifically.
10 | 
11 | /* wrappers for Visual Studio built-ins that look like gcc built-ins */
12 | /* result might be undefined when input_num is zero */
13 | static inline int __builtin_ctzll(unsigned long long input_num) {
14 | 	unsigned long index;
15 | #ifdef _WIN64 // highly recommended!!!
16 | 	_BitScanForward64(&index, input_num);
17 | #else // if we must support 32-bit Windows
18 | 	if ((uint32_t)input_num != 0) {
19 | 		_BitScanForward(&index, (uint32_t)input_num);
20 | 	}
21 | 	else {
22 | 		_BitScanForward(&index, (uint32_t)(input_num >> 32));
23 | 		index += 32;
24 | 	}
25 | #endif
26 | 	return index;
27 | }
28 | 
29 | /* result might be undefined when input_num is zero */
30 | static inline int __builtin_clzll(unsigned long long input_num) {
31 | 	unsigned long index;
32 | #ifdef _WIN64 // highly recommended!!!
33 | 	_BitScanReverse64(&index, input_num);
34 | #else // if we must support 32-bit Windows
35 | 	if (input_num > 0xFFFFFFF) {
36 | 		_BitScanReverse(&index, (uint32_t)(input_num >> 32));
37 | 	}
38 | 	else {
39 | 		_BitScanReverse(&index, (uint32_t)(input_num));
40 | 		index += 32;
41 | 	}
42 | #endif
43 | 	return 63 - index;
44 | }
45 | 
46 | /* result might be undefined when input_num is zero */
47 | static inline int __builtin_clz(int input_num) {
48 | 	unsigned long index;
49 | 	_BitScanReverse(&index, input_num);
50 | 	return 31 - index;
51 | }
52 | 
53 | /* result might be undefined when input_num is zero */
54 | static inline int __builtin_popcountll(unsigned long long input_num) {
55 | #ifdef _WIN64 // highly recommended!!!
56 | 	return (int)__popcnt64(input_num);
57 | #else // if we must support 32-bit Windows
58 | 	return (int)(__popcnt((uint32_t)input_num) + __popcnt((uint32_t)(input_num >> 32)));
59 | #endif
60 | }
61 | 
62 | static inline void __builtin_unreachable() {
63 | 	__assume(0);
64 | }
65 | #endif
66 | #endif
67 | 
68 | 
69 | #include <stdint.h> // part of Visual Studio 2010 and better
70 | 
71 | // integer logarithm function
72 | static inline uint32_t bits(const uint32_t v) {
73 |     return v == 0 ? 0 : 32 - __builtin_clz(v); // todo: make portable (Visual studio)
74 | }
75 | 
76 | 
77 | 
78 | // integer logarithm function
79 | static inline uint32_t bits64(const uint64_t v) {
80 |     return v == 0 ? 0 : 64 - __builtin_clzll(v); // todo: make portable (Visual studio)
81 | }
82 | 
83 | #endif // FRAMEOFREFERENCE_INCLUDE_UTIL_H
84 | 


--------------------------------------------------------------------------------
/sampledata.txt:
--------------------------------------------------------------------------------
   1 | 4106
   2 | 4481
   3 | 4192
   4 | 4420
   5 | 4478
   6 | 4088
   7 | 4338
   8 | 4320
   9 | 4353
  10 | 4253
  11 | 4129
  12 | 4284
  13 | 4430
  14 | 4093
  15 | 4329
  16 | 4293
  17 | 4366
  18 | 4064
  19 | 4124
  20 | 4142
  21 | 4435
  22 | 4472
  23 | 4497
  24 | 4423
  25 | 4357
  26 | 4498
  27 | 4034
  28 | 4386
  29 | 4419
  30 | 4184
  31 | 4228
  32 | 4302
  33 | 4168
  34 | 4080
  35 | 4317
  36 | 4056
  37 | 4104
  38 | 4037
  39 | 4397
  40 | 4001
  41 | 4300
  42 | 4346
  43 | 4176
  44 | 4203
  45 | 4095
  46 | 4128
  47 | 4145
  48 | 4348
  49 | 4174
  50 | 4403
  51 | 4004
  52 | 4157
  53 | 4015
  54 | 4119
  55 | 4321
  56 | 4350
  57 | 4216
  58 | 4375
  59 | 4394
  60 | 4238
  61 | 4265
  62 | 4484
  63 | 4245
  64 | 4025
  65 | 4248
  66 | 4136
  67 | 4049
  68 | 4221
  69 | 4478
  70 | 4058
  71 | 4031
  72 | 4133
  73 | 4165
  74 | 4448
  75 | 4008
  76 | 4451
  77 | 4434
  78 | 4404
  79 | 4164
  80 | 4065
  81 | 4268
  82 | 4255
  83 | 4386
  84 | 4245
  85 | 4414
  86 | 4219
  87 | 4123
  88 | 4339
  89 | 4071
  90 | 4114
  91 | 4027
  92 | 4074
  93 | 4001
  94 | 4123
  95 | 4313
  96 | 4339
  97 | 4394
  98 | 4383
  99 | 4045
 100 | 4069
 101 | 4398
 102 | 4083
 103 | 4207
 104 | 4340
 105 | 4499
 106 | 4264
 107 | 4233
 108 | 4410
 109 | 4487
 110 | 4202
 111 | 4486
 112 | 4325
 113 | 4102
 114 | 4094
 115 | 4044
 116 | 4025
 117 | 4097
 118 | 4287
 119 | 4348
 120 | 4230
 121 | 4046
 122 | 4440
 123 | 4034
 124 | 4487
 125 | 4259
 126 | 4016
 127 | 4106
 128 | 4320
 129 | 4344
 130 | 4175
 131 | 4087
 132 | 4070
 133 | 4080
 134 | 4095
 135 | 4381
 136 | 4262
 137 | 4053
 138 | 4033
 139 | 4103
 140 | 4040
 141 | 4148
 142 | 4312
 143 | 4424
 144 | 4149
 145 | 4111
 146 | 4419
 147 | 4012
 148 | 4113
 149 | 4118
 150 | 4458
 151 | 4018
 152 | 4088
 153 | 4356
 154 | 4389
 155 | 4408
 156 | 4135
 157 | 4457
 158 | 4214
 159 | 4143
 160 | 4155
 161 | 4250
 162 | 4388
 163 | 4424
 164 | 4128
 165 | 4446
 166 | 4225
 167 | 4109
 168 | 4438
 169 | 4498
 170 | 4351
 171 | 4060
 172 | 4038
 173 | 4366
 174 | 4240
 175 | 4020
 176 | 4080
 177 | 4243
 178 | 4420
 179 | 4299
 180 | 4428
 181 | 4326
 182 | 4072
 183 | 4263
 184 | 4192
 185 | 4315
 186 | 4492
 187 | 4129
 188 | 4026
 189 | 4112
 190 | 4100
 191 | 4024
 192 | 4254
 193 | 4418
 194 | 4413
 195 | 4279
 196 | 4249
 197 | 4073
 198 | 4398
 199 | 4086
 200 | 4282
 201 | 4405
 202 | 4061
 203 | 4279
 204 | 4373
 205 | 4142
 206 | 4236
 207 | 4278
 208 | 4376
 209 | 4103
 210 | 4266
 211 | 4157
 212 | 4117
 213 | 4243
 214 | 4276
 215 | 4409
 216 | 4245
 217 | 4379
 218 | 4039
 219 | 4419
 220 | 4383
 221 | 4443
 222 | 4284
 223 | 4490
 224 | 4332
 225 | 4405
 226 | 4330
 227 | 4208
 228 | 4121
 229 | 4419
 230 | 4259
 231 | 4195
 232 | 4246
 233 | 4365
 234 | 4469
 235 | 4115
 236 | 4467
 237 | 4121
 238 | 4262
 239 | 4191
 240 | 4082
 241 | 4233
 242 | 4495
 243 | 4497
 244 | 4188
 245 | 4398
 246 | 4280
 247 | 4467
 248 | 4006
 249 | 4198
 250 | 4216
 251 | 4216
 252 | 4342
 253 | 4320
 254 | 4229
 255 | 4068
 256 | 4222
 257 | 4497
 258 | 4052
 259 | 4069
 260 | 4355
 261 | 4441
 262 | 4457
 263 | 4327
 264 | 4271
 265 | 4164
 266 | 4271
 267 | 4083
 268 | 4269
 269 | 4394
 270 | 4424
 271 | 4035
 272 | 4013
 273 | 4336
 274 | 4206
 275 | 4463
 276 | 4147
 277 | 4268
 278 | 4270
 279 | 4389
 280 | 4489
 281 | 4498
 282 | 4032
 283 | 4053
 284 | 4165
 285 | 4321
 286 | 4410
 287 | 4180
 288 | 4033
 289 | 4077
 290 | 4074
 291 | 4224
 292 | 4103
 293 | 4021
 294 | 4179
 295 | 4223
 296 | 4432
 297 | 4335
 298 | 4387
 299 | 4090
 300 | 4246
 301 | 4133
 302 | 4499
 303 | 4023
 304 | 4148
 305 | 4280
 306 | 4429
 307 | 4007
 308 | 4037
 309 | 4453
 310 | 4075
 311 | 4023
 312 | 4305
 313 | 4298
 314 | 4303
 315 | 4422
 316 | 4209
 317 | 4409
 318 | 4009
 319 | 4301
 320 | 4493
 321 | 4478
 322 | 4482
 323 | 4240
 324 | 4404
 325 | 4057
 326 | 4449
 327 | 4103
 328 | 4388
 329 | 4131
 330 | 4215
 331 | 4169
 332 | 4262
 333 | 4347
 334 | 4366
 335 | 4274
 336 | 4403
 337 | 4335
 338 | 4223
 339 | 4442
 340 | 4376
 341 | 4469
 342 | 4246
 343 | 4476
 344 | 4022
 345 | 4366
 346 | 4355
 347 | 4283
 348 | 4160
 349 | 4349
 350 | 4275
 351 | 4013
 352 | 4167
 353 | 4382
 354 | 4453
 355 | 4100
 356 | 4346
 357 | 4124
 358 | 4475
 359 | 4134
 360 | 4396
 361 | 4479
 362 | 4223
 363 | 4117
 364 | 4141
 365 | 4184
 366 | 4271
 367 | 4361
 368 | 4140
 369 | 4368
 370 | 4360
 371 | 4415
 372 | 4448
 373 | 4170
 374 | 4382
 375 | 4045
 376 | 4163
 377 | 4210
 378 | 4243
 379 | 4363
 380 | 4255
 381 | 4092
 382 | 4304
 383 | 4048
 384 | 4288
 385 | 4305
 386 | 4494
 387 | 4457
 388 | 4349
 389 | 4266
 390 | 4274
 391 | 4333
 392 | 4037
 393 | 4009
 394 | 4345
 395 | 4282
 396 | 4142
 397 | 4341
 398 | 4334
 399 | 4202
 400 | 4436
 401 | 4392
 402 | 4054
 403 | 4363
 404 | 4027
 405 | 4490
 406 | 4134
 407 | 4495
 408 | 4391
 409 | 4057
 410 | 4312
 411 | 4136
 412 | 4223
 413 | 4022
 414 | 4104
 415 | 4169
 416 | 4005
 417 | 4115
 418 | 4163
 419 | 4275
 420 | 4018
 421 | 4382
 422 | 4387
 423 | 4011
 424 | 4374
 425 | 4289
 426 | 4239
 427 | 4356
 428 | 4457
 429 | 4318
 430 | 4187
 431 | 4177
 432 | 4154
 433 | 4002
 434 | 4291
 435 | 4244
 436 | 4245
 437 | 4289
 438 | 4197
 439 | 4229
 440 | 4262
 441 | 4148
 442 | 4118
 443 | 4406
 444 | 4163
 445 | 4381
 446 | 4406
 447 | 4131
 448 | 4314
 449 | 4301
 450 | 4232
 451 | 4270
 452 | 4496
 453 | 4484
 454 | 4392
 455 | 4047
 456 | 4238
 457 | 4341
 458 | 4203
 459 | 4203
 460 | 4149
 461 | 4046
 462 | 4374
 463 | 4494
 464 | 4241
 465 | 4264
 466 | 4483
 467 | 4131
 468 | 4244
 469 | 4266
 470 | 4080
 471 | 4192
 472 | 4140
 473 | 4129
 474 | 4242
 475 | 4393
 476 | 4381
 477 | 4139
 478 | 4107
 479 | 4267
 480 | 4129
 481 | 4111
 482 | 4316
 483 | 4380
 484 | 4214
 485 | 4311
 486 | 4148
 487 | 4230
 488 | 4420
 489 | 4430
 490 | 4440
 491 | 4012
 492 | 4117
 493 | 4289
 494 | 4233
 495 | 4242
 496 | 4341
 497 | 4215
 498 | 4351
 499 | 4216
 500 | 4400
 501 | 4224
 502 | 4253
 503 | 4141
 504 | 4474
 505 | 4212
 506 | 4088
 507 | 4263
 508 | 4229
 509 | 4142
 510 | 4301
 511 | 4042
 512 | 4274
 513 | 4361
 514 | 4312
 515 | 4492
 516 | 4052
 517 | 4214
 518 | 4375
 519 | 4177
 520 | 4390
 521 | 4251
 522 | 4116
 523 | 4235
 524 | 4355
 525 | 4008
 526 | 4208
 527 | 4141
 528 | 4366
 529 | 4434
 530 | 4277
 531 | 4383
 532 | 4411
 533 | 4435
 534 | 4110
 535 | 4303
 536 | 4071
 537 | 4187
 538 | 4061
 539 | 4132
 540 | 4225
 541 | 4374
 542 | 4210
 543 | 4397
 544 | 4482
 545 | 4298
 546 | 4431
 547 | 4371
 548 | 4221
 549 | 4361
 550 | 4396
 551 | 4044
 552 | 4027
 553 | 4428
 554 | 4001
 555 | 4148
 556 | 4211
 557 | 4149
 558 | 4372
 559 | 4023
 560 | 4104
 561 | 4186
 562 | 4423
 563 | 4303
 564 | 4184
 565 | 4373
 566 | 4213
 567 | 4194
 568 | 4083
 569 | 4125
 570 | 4268
 571 | 4017
 572 | 4098
 573 | 4291
 574 | 4248
 575 | 4114
 576 | 4309
 577 | 4294
 578 | 4089
 579 | 4358
 580 | 4202
 581 | 4187
 582 | 4235
 583 | 4381
 584 | 4286
 585 | 4108
 586 | 4208
 587 | 4481
 588 | 4174
 589 | 4399
 590 | 4098
 591 | 4222
 592 | 4094
 593 | 4005
 594 | 4318
 595 | 4210
 596 | 4277
 597 | 4153
 598 | 4151
 599 | 4058
 600 | 4332
 601 | 4451
 602 | 4326
 603 | 4126
 604 | 4420
 605 | 4407
 606 | 4181
 607 | 4289
 608 | 4405
 609 | 4229
 610 | 4279
 611 | 4391
 612 | 4478
 613 | 4339
 614 | 4273
 615 | 4073
 616 | 4228
 617 | 4430
 618 | 4363
 619 | 4363
 620 | 4409
 621 | 4184
 622 | 4090
 623 | 4284
 624 | 4424
 625 | 4213
 626 | 4487
 627 | 4193
 628 | 4090
 629 | 4305
 630 | 4419
 631 | 4411
 632 | 4427
 633 | 4180
 634 | 4168
 635 | 4092
 636 | 4217
 637 | 4496
 638 | 4102
 639 | 4151
 640 | 4262
 641 | 4196
 642 | 4180
 643 | 4158
 644 | 4162
 645 | 4493
 646 | 4059
 647 | 4394
 648 | 4272
 649 | 4421
 650 | 4274
 651 | 4278
 652 | 4091
 653 | 4248
 654 | 4003
 655 | 4216
 656 | 4116
 657 | 4256
 658 | 4308
 659 | 4095
 660 | 4146
 661 | 4192
 662 | 4082
 663 | 4022
 664 | 4478
 665 | 4314
 666 | 4334
 667 | 4318
 668 | 4005
 669 | 4337
 670 | 4365
 671 | 4161
 672 | 4323
 673 | 4368
 674 | 4390
 675 | 4054
 676 | 4267
 677 | 4377
 678 | 4268
 679 | 4423
 680 | 4324
 681 | 4032
 682 | 4457
 683 | 4408
 684 | 4373
 685 | 4130
 686 | 4220
 687 | 4420
 688 | 4496
 689 | 4409
 690 | 4001
 691 | 4388
 692 | 4063
 693 | 4176
 694 | 4275
 695 | 4404
 696 | 4490
 697 | 4010
 698 | 4470
 699 | 4268
 700 | 4109
 701 | 4171
 702 | 4410
 703 | 4144
 704 | 4375
 705 | 4347
 706 | 4133
 707 | 4197
 708 | 4441
 709 | 4467
 710 | 4171
 711 | 4060
 712 | 4260
 713 | 4036
 714 | 4194
 715 | 4250
 716 | 4285
 717 | 4032
 718 | 4433
 719 | 4186
 720 | 4405
 721 | 4454
 722 | 4002
 723 | 4120
 724 | 4264
 725 | 4472
 726 | 4134
 727 | 4279
 728 | 4045
 729 | 4413
 730 | 4084
 731 | 4441
 732 | 4241
 733 | 4010
 734 | 4151
 735 | 4389
 736 | 4398
 737 | 4333
 738 | 4165
 739 | 4295
 740 | 4140
 741 | 4450
 742 | 4045
 743 | 4278
 744 | 4072
 745 | 4389
 746 | 4295
 747 | 4342
 748 | 4305
 749 | 4385
 750 | 4183
 751 | 4476
 752 | 4123
 753 | 4267
 754 | 4120
 755 | 4279
 756 | 4124
 757 | 4424
 758 | 4073
 759 | 4324
 760 | 4014
 761 | 4460
 762 | 4402
 763 | 4109
 764 | 4183
 765 | 4468
 766 | 4251
 767 | 4333
 768 | 4158
 769 | 4279
 770 | 4148
 771 | 4317
 772 | 4336
 773 | 4351
 774 | 4059
 775 | 4073
 776 | 4226
 777 | 4362
 778 | 4454
 779 | 4090
 780 | 4302
 781 | 4053
 782 | 4429
 783 | 4323
 784 | 4436
 785 | 4495
 786 | 4118
 787 | 4031
 788 | 4129
 789 | 4015
 790 | 4483
 791 | 4054
 792 | 4226
 793 | 4285
 794 | 4027
 795 | 4474
 796 | 4038
 797 | 4121
 798 | 4440
 799 | 4279
 800 | 4180
 801 | 4090
 802 | 4268
 803 | 4395
 804 | 4149
 805 | 4483
 806 | 4314
 807 | 4254
 808 | 4379
 809 | 4210
 810 | 4141
 811 | 4394
 812 | 4114
 813 | 4434
 814 | 4043
 815 | 4001
 816 | 4258
 817 | 4429
 818 | 4050
 819 | 4071
 820 | 4329
 821 | 4221
 822 | 4146
 823 | 4176
 824 | 4206
 825 | 4310
 826 | 4048
 827 | 4151
 828 | 4496
 829 | 4254
 830 | 4094
 831 | 4140
 832 | 4076
 833 | 4230
 834 | 4200
 835 | 4270
 836 | 4396
 837 | 4486
 838 | 4354
 839 | 4191
 840 | 4356
 841 | 4052
 842 | 4006
 843 | 4490
 844 | 4300
 845 | 4268
 846 | 4005
 847 | 4298
 848 | 4243
 849 | 4440
 850 | 4058
 851 | 4289
 852 | 4030
 853 | 4464
 854 | 4367
 855 | 4056
 856 | 4018
 857 | 4232
 858 | 4199
 859 | 4475
 860 | 4074
 861 | 4387
 862 | 4102
 863 | 4236
 864 | 4042
 865 | 4348
 866 | 4418
 867 | 4171
 868 | 4060
 869 | 4035
 870 | 4138
 871 | 4155
 872 | 4289
 873 | 4364
 874 | 4190
 875 | 4293
 876 | 4346
 877 | 4399
 878 | 4106
 879 | 4486
 880 | 4071
 881 | 4018
 882 | 4385
 883 | 4223
 884 | 4282
 885 | 4281
 886 | 4442
 887 | 4468
 888 | 4168
 889 | 4481
 890 | 4068
 891 | 4453
 892 | 4054
 893 | 4169
 894 | 4240
 895 | 4297
 896 | 4119
 897 | 4136
 898 | 4023
 899 | 4106
 900 | 4048
 901 | 4362
 902 | 4415
 903 | 4373
 904 | 4198
 905 | 4336
 906 | 4262
 907 | 4449
 908 | 4164
 909 | 4075
 910 | 4393
 911 | 4480
 912 | 4094
 913 | 4171
 914 | 4243
 915 | 4141
 916 | 4216
 917 | 4247
 918 | 4317
 919 | 4272
 920 | 4376
 921 | 4028
 922 | 4259
 923 | 4078
 924 | 4076
 925 | 4189
 926 | 4436
 927 | 4225
 928 | 4264
 929 | 4039
 930 | 4459
 931 | 4409
 932 | 4038
 933 | 4255
 934 | 4481
 935 | 4217
 936 | 4029
 937 | 4012
 938 | 4113
 939 | 4487
 940 | 4200
 941 | 4278
 942 | 4371
 943 | 4399
 944 | 4258
 945 | 4246
 946 | 4010
 947 | 4226
 948 | 4470
 949 | 4114
 950 | 4454
 951 | 4409
 952 | 4309
 953 | 4137
 954 | 4007
 955 | 4100
 956 | 4402
 957 | 4033
 958 | 4056
 959 | 4039
 960 | 4103
 961 | 4032
 962 | 4119
 963 | 4242
 964 | 4018
 965 | 4408
 966 | 4139
 967 | 4088
 968 | 4300
 969 | 4116
 970 | 4134
 971 | 4067
 972 | 4206
 973 | 4149
 974 | 4021
 975 | 4281
 976 | 4476
 977 | 4033
 978 | 4106
 979 | 4012
 980 | 4024
 981 | 4348
 982 | 4118
 983 | 4181
 984 | 4251
 985 | 4244
 986 | 4263
 987 | 4203
 988 | 4158
 989 | 4019
 990 | 4211
 991 | 4478
 992 | 4140
 993 | 4455
 994 | 4500
 995 | 4214
 996 | 4478
 997 | 4146
 998 | 4457
 999 | 4044
1000 | 4004
1001 | 


--------------------------------------------------------------------------------
/scripts/turbopacking32.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | def howmany(bit):
  4 |     """ how many values are we going to pack? """
  5 |     return 32
  6 |     #number = (64+bit-1)/bit
  7 |     #while((number * bit) % 8 != 0):
  8 |     #    number += 1
  9 |     #return number
 10 | 
 11 | def howmanywords(bit):
 12 |     return (howmany(bit) * bit + 63)//64
 13 | 
 14 | def howmanybytes(bit):
 15 |     return (howmany(bit) * bit + 7)//8
 16 | print("""
 17 | #ifndef INCLUDE_TURBOPACKING32_H
 18 | #define INCLUDE_TURBOPACKING32_H
 19 | #include <iso646.h> // mostly for Microsoft compilers
 20 | #include <stdint.h> // part of Visual Studio 2010 and better
 21 | """)
 22 | print("""
 23 | #ifndef UINT64_C
 24 | #define UINT64_C(c) (c ## ULL)
 25 | #endif
 26 | """)
 27 | print("""
 28 | /**
 29 | * (c) Daniel Lemire
 30 | * Apache License 2.0
 31 | */
 32 | /** turbopacking32 starts here **/
 33 | /**
 34 | * this code mimics the way TurboPFor packs short arrays of integers.
 35 | * We pack and unpack always at least a full 64-bit word, plus whatever
 36 | * is necessary to get to an even number of bytes.
 37 | */""")
 38 | 
 39 | print("""typedef void (*packforblockfnc)(const uint32_t base, const uint32_t ** pin, uint8_t ** pw);""")
 40 | print("""typedef void (*unpackforblockfnc)(const uint32_t base, const uint8_t ** pw, uint32_t ** pout);""")
 41 | 
 42 | 
 43 | 
 44 | def plurial(number):
 45 |     if(number > 1):
 46 |         return "s"
 47 |     else :
 48 |         return ""
 49 | 
 50 | print("")
 51 | print("static void packforblock0(const uint32_t , const uint32_t ** pin, uint8_t ** pw) {");
 52 | print("  (void)pw;");
 53 | print("  *pin += {0}; /* we consumed {0} 32-bit integer{1} */ ".format(howmany(0),plurial(howmany(0))));
 54 | print("}");
 55 | print("")
 56 | 
 57 | for bit in range(1,33):
 58 |     print("")
 59 |     print("/* we are going to pack {0} {1}-bit values, touching {2} 64-bit words, using {3} bytes */ ".format(howmany(bit),bit,howmanywords(bit),howmanybytes(bit)))
 60 |     print("static void packforblock{0}(const uint32_t base, const uint32_t ** pin, uint8_t ** pw) {{".format(bit));
 61 |     print("  uint64_t * pw64 = *(uint64_t **) pw;");
 62 |     print("  const uint32_t * in = *pin;");
 63 |     print("  /* we are going to touch  {0} 64-bit word{1} */ ".format(howmanywords(bit),plurial(howmanywords(bit))));
 64 |     for k in range(howmanywords(bit)) :
 65 |       print("  uint64_t w{0};".format(k))
 66 |     for j in range(howmany(bit)):
 67 |       firstword = j * bit // 64
 68 |       secondword = (j * bit + bit - 1)//64
 69 |       firstshift = (j*bit) % 64
 70 |       if( firstword == secondword):
 71 |           if(firstshift == 0):
 72 |             print("  w{0} = (uint64_t) (in[{1}] - base);".format(firstword,j))
 73 |           else:
 74 |             print("  w{0} |= (uint64_t)  (in[{1}] - base) << {2};".format(firstword,j,firstshift))
 75 |       else:
 76 |           print("  w{0} |= (uint64_t) (in[{1}] - base) << {2};".format(firstword,j,firstshift))
 77 |           secondshift = 64-firstshift
 78 |           print("  w{0} = (uint64_t) (in[{1}] - base) >> {2};".format(secondword,j,secondshift))
 79 |     for k in range(howmanywords(bit)) :
 80 |       print("  pw64[{0}] = w{0};".format(k))
 81 |     print("  *pin += {0}; /* we consumed {0} 32-bit integer{1} */ ".format(howmany(bit),plurial(howmany(bit))));
 82 |     print("  *pw += {0}; /* we used up {0} output bytes */ ".format(howmanybytes(bit)));
 83 |     print("}");
 84 |     print("")
 85 | 
 86 | print("static void unpackforblock0(const uint32_t base, const uint8_t ** pw, uint32_t ** pout) {");
 87 | print("  (void) pw;");
 88 | print("  for(int k = 0; k < {0} ; k+= 1) {{".format(howmany(0)))
 89 | print("    (*pout) [k] = base;")
 90 | print("  }")
 91 | print("  *pout += {0}; /* we wrote {0} 32-bit integer{1} */ ".format(howmany(0),plurial(howmany(0))));
 92 | print("}");
 93 | print("")
 94 | 
 95 | for bit in range(1,33):
 96 |     print("")
 97 |     print("/* we packed {0} {1}-bit values, touching {2} 64-bit words, using {3} bytes */ ".format(howmany(bit),bit,howmanywords(bit),howmanybytes(bit)))
 98 |     print("static void unpackforblock{0}(const uint32_t base, const uint8_t ** pw, uint32_t ** pout) {{".format(bit));
 99 |     print("  const uint64_t * pw64 = *(const uint64_t **) pw;");
100 |     print("  uint32_t * out = *pout;");
101 |     if(bit < 32): print("  const uint64_t mask = UINT64_C({0});".format((1<<bit)-1));
102 |     maskstr = " & mask "
103 |     if (bit == 32) : maskstr = "" # no need
104 |     print("  /* we are going to access  {0} 64-bit word{1} */ ".format(howmanywords(bit),plurial(howmanywords(bit))));
105 |     for k in range(howmanywords(bit)) :
106 |       print("  uint64_t w{0} = pw64[{0}];".format(k))
107 |     print("  *pw += {0}; /* we used up {0} input bytes */ ".format(howmanybytes(bit)));
108 |     for j in range(howmany(bit)):
109 |       firstword = j * bit // 64
110 |       secondword = (j * bit + bit - 1)//64
111 |       firstshift = (j*bit) % 64
112 |       firstshiftstr = ">> {0} ".format(firstshift)
113 |       if(firstshift == 0):
114 |           firstshiftstr ="" # no need
115 |       if( firstword == secondword):
116 |           if(firstshift + bit == 64):
117 |             print("  out[{0}] = base + (uint32_t) ( w{1}  {2} );".format(j,firstword,firstshiftstr,firstshift))
118 |           else:
119 |             print("  out[{0}] = base + (uint32_t)  ( ( w{1} {2}) {3} );".format(j,firstword,firstshiftstr,maskstr))
120 |       else:
121 |           secondshift = (64-firstshift)
122 |           print("  out[{0}] = base + (uint32_t)  ( ( ( w{1} {2} ) | ( w{3} << {4} ) ) {5} );".format(j,firstword,firstshiftstr, firstword+1,secondshift,maskstr))
123 |     print("  *pout += {0}; /* we wrote {0} 32-bit integer{1} */ ".format(howmany(bit),plurial(howmany(bit))));
124 |     print("}");
125 |     print("")
126 | 
127 | print("static packforblockfnc funcForPackArr[] = {")
128 | for bit in range(0,32):
129 |   print("&packforblock{0},".format(bit))
130 | print("&packforblock32")
131 | print("};")
132 | 
133 | print("static unpackforblockfnc funcForUnpackArr[] = {")
134 | for bit in range(0,32):
135 |   print("&unpackforblock{0},".format(bit))
136 | print("&unpackforblock32")
137 | print("};")
138 | print("/**  turbopacking32.py ends here **/")
139 | print("""
140 | #endif //INCLUDE_TURBOPACKING32_H
141 | """)
142 | 


--------------------------------------------------------------------------------
/scripts/turbopacking64.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | def howmany(bit):
  4 |     """ how many values are we going to pack? """
  5 |     return 32
  6 | 
  7 | def howmanywords(bit):
  8 |     return (howmany(bit) * bit + 63)//64
  9 | 
 10 | def howmanybytes(bit):
 11 |     return (howmany(bit) * bit + 7)//8
 12 | print("""
 13 | #ifndef INCLUDE_TURBOPACKING64_H
 14 | #define INCLUDE_TURBOPACKING64_H
 15 | #include <iso646.h> // mostly for Microsoft compilers
 16 | #include <stdint.h> // part of Visual Studio 2010 and better
 17 | """)
 18 | print("""
 19 | #ifndef UINT64_C
 20 | #define UINT64_C(c) (c ## ULL)
 21 | #endif
 22 | """)
 23 | print("""
 24 | /**
 25 | * (c) Daniel Lemire
 26 | * Apache License 2.0
 27 | */
 28 | /** turbopacking64 starts here **/
 29 | /**
 30 | * this code mimics the way TurboPFor packs short arrays of integers.
 31 | * We pack and unpack always at least a full 64-bit word, plus whatever
 32 | * is necessary to get to an even number of bytes.
 33 | */""")
 34 | 
 35 | print("""typedef void (*packforblockfnc_64)(const uint64_t base, const uint64_t ** pin, uint8_t ** pw);""")
 36 | print("""typedef void (*unpackforblockfnc_64)(const uint64_t base, const uint8_t ** pw, uint64_t ** pout);""")
 37 | 
 38 | 
 39 | 
 40 | def plurial(number):
 41 |     if(number > 1):
 42 |         return "s"
 43 |     else :
 44 |         return ""
 45 | 
 46 | print("")
 47 | print("static void packforblock0_64(const uint64_t , const uint64_t ** pin, uint8_t ** pw) {");
 48 | print("  (void)pw;");
 49 | print("  *pin += {0}; /* we consumed {0} 64-bit integer{1} */ ".format(howmany(0),plurial(howmany(0))));
 50 | print("}");
 51 | print("")
 52 | 
 53 | for bit in range(1,65):
 54 |     print("")
 55 |     print("/* we are going to pack {0} {1}-bit values, touching {2} 64-bit words, using {3} bytes */ ".format(howmany(bit),bit,howmanywords(bit),howmanybytes(bit)))
 56 |     print("static void packforblock{0}_64(const uint64_t base, const uint64_t ** pin, uint8_t ** pw) {{".format(bit));
 57 |     print("  uint64_t * pw64 = *(uint64_t **) pw;");
 58 |     print("  const uint64_t * in = *pin;");
 59 |     print("  /* we are going to touch  {0} 64-bit word{1} */ ".format(howmanywords(bit),plurial(howmanywords(bit))));
 60 |     for k in range(howmanywords(bit)) :
 61 |       print("  uint64_t w{0};".format(k))
 62 |     for j in range(howmany(bit)):
 63 |       firstword = j * bit // 64
 64 |       secondword = (j * bit + bit - 1)//64
 65 |       firstshift = (j*bit) % 64
 66 |       if( firstword == secondword):
 67 |           if(firstshift == 0):
 68 |             print("  w{0} = (in[{1}] - base);".format(firstword,j))
 69 |           else:
 70 |             print("  w{0} |=  (in[{1}] - base) << {2};".format(firstword,j,firstshift))
 71 |       else:
 72 |           print("  w{0} |= (in[{1}] - base) << {2};".format(firstword,j,firstshift))
 73 |           secondshift = 64-firstshift
 74 |           print("  w{0} = (in[{1}] - base) >> {2};".format(secondword,j,secondshift))
 75 |     for k in range(howmanywords(bit)) :
 76 |       print("  pw64[{0}] = w{0};".format(k))
 77 |     print("  *pin += {0}; /* we consumed {0} 64-bit integer{1} */ ".format(howmany(bit),plurial(howmany(bit))));
 78 |     print("  *pw += {0}; /* we used up {0} output bytes */ ".format(howmanybytes(bit)));
 79 |     print("}");
 80 |     print("")
 81 | 
 82 | print("static void unpackforblock0_64(const uint64_t base, const uint8_t ** pw, uint64_t ** pout) {");
 83 | print("  (void) pw;");
 84 | print("  for(int k = 0; k < {0} ; k+= 1) {{".format(howmany(0)))
 85 | print("    (*pout) [k] = base;")
 86 | print("  }")
 87 | print("  *pout += {0}; /* we wrote {0} 64-bit integer{1} */ ".format(howmany(0),plurial(howmany(0))));
 88 | print("}");
 89 | print("")
 90 | 
 91 | for bit in range(1,65):
 92 |     print("")
 93 |     print("/* we packed {0} {1}-bit values, touching {2} 64-bit words, using {3} bytes */ ".format(howmany(bit),bit,howmanywords(bit),howmanybytes(bit)))
 94 |     print("static void unpackforblock{0}_64(const uint64_t base, const uint8_t ** pw, uint64_t ** pout) {{".format(bit));
 95 |     print("  const uint64_t * pw64 = *(const uint64_t **) pw;");
 96 |     print("  uint64_t * out = *pout;");
 97 |     if(bit < 64): print("  const uint64_t mask = UINT64_C({0});".format((1<<bit)-1));
 98 |     maskstr = " & mask "
 99 |     if (bit == 64) : maskstr = "" # no need
100 |     print("  /* we are going to access  {0} 64-bit word{1} */ ".format(howmanywords(bit),plurial(howmanywords(bit))));
101 |     for k in range(howmanywords(bit)) :
102 |       print("  uint64_t w{0} = pw64[{0}];".format(k))
103 |     print("  *pw += {0}; /* we used up {0} input bytes */ ".format(howmanybytes(bit)));
104 |     for j in range(howmany(bit)):
105 |       firstword = j * bit // 64
106 |       secondword = (j * bit + bit - 1)//64
107 |       firstshift = (j*bit) % 64
108 |       firstshiftstr = ">> {0} ".format(firstshift)
109 |       if(firstshift == 0):
110 |           firstshiftstr ="" # no need
111 |       if( firstword == secondword):
112 |           if(firstshift + bit == 64):
113 |             print("  out[{0}] = base + ( w{1}  {2} );".format(j,firstword,firstshiftstr,firstshift))
114 |           else:
115 |             print("  out[{0}] = base + ( ( w{1} {2}) {3} );".format(j,firstword,firstshiftstr,maskstr))
116 |       else:
117 |           secondshift = (64-firstshift)
118 |           print("  out[{0}] = base + ( ( ( w{1} {2} ) | ( w{3} << {4} ) ) {5} );".format(j,firstword,firstshiftstr, firstword+1,secondshift,maskstr))
119 |     print("  *pout += {0}; /* we wrote {0} 64-bit integer{1} */ ".format(howmany(bit),plurial(howmany(bit))));
120 |     print("}");
121 |     print("")
122 | 
123 | print("static packforblockfnc_64 funcForPackArr64[] = {")
124 | for bit in range(0,64):
125 |   print("&packforblock{0}_64,".format(bit))
126 | print("&packforblock64_64")
127 | print("};")
128 | 
129 | print("static unpackforblockfnc_64 funcForUnpackArr64[] = {")
130 | for bit in range(0,64):
131 |   print("&unpackforblock{0}_64,".format(bit))
132 | print("&unpackforblock64_64")
133 | print("};")
134 | print("/**  turbopacking64.py ends here **/")
135 | print("""
136 | #endif //INCLUDE_TURBOPACKING64_H
137 | """)
138 | 


--------------------------------------------------------------------------------
/src/test.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 | *
  3 | * This code is released under the
  4 | * Apache License Version 2.0 http://www.apache.org/licenses/.
  5 | * (c) Daniel Lemire 2013
  6 | */
  7 | #include <chrono>
  8 | #include <sys/stat.h>
  9 | #include <sys/types.h>
 10 | #ifdef _OPENMP
 11 | #include <omp.h>
 12 | #endif
 13 | #include <cmath>
 14 | #include <iomanip>
 15 | #include <fstream>
 16 | #include "bpacking.h"
 17 | #include "compression.h"
 18 | #include "turbocompression.h"
 19 | 
 20 | using namespace std;
 21 | 
 22 | 
 23 | //
 24 | // VS2012 bug: high_precision_clock is defined as system_clock and precision is
 25 | // about 15 MS!!
 26 | // See: https://connect.microsoft.com/VisualStudio/feedback/details/719443
 27 | //
 28 | // Implementation has been taken from a post on stackoverflow and adapted here
 29 | // http://stackoverflow.com/questions/13263277/difference-between-stdsystem-clock-and-stdsteady-clock
 30 | //
 31 | #ifdef _WIN32
 32 | #define NOMINMAX
 33 | #define WINDOWS_LEAN_AND_MEAN
 34 | #include <windows.h>
 35 | 
 36 | struct qpc_clock {
 37 | 	typedef std::chrono::nanoseconds duration;
 38 | 	typedef duration::rep rep;
 39 | 	typedef duration::period period;
 40 | 	typedef std::chrono::time_point<qpc_clock, duration> time_point;
 41 | 	static time_point now() {
 42 | 		static bool isInited = false;
 43 | 		static LARGE_INTEGER frequency = { 0, 0 };
 44 | 		if (!isInited) {
 45 | 			if (QueryPerformanceFrequency(&frequency) == 0) {
 46 | 				throw std::logic_error("QueryPerformanceCounter not supported: " +
 47 | 					std::to_string(GetLastError()));
 48 | 			}
 49 | 			isInited = true;
 50 | 		}
 51 | 		LARGE_INTEGER counter;
 52 | 		QueryPerformanceCounter(&counter);
 53 | 		return time_point(duration(static_cast<rep>((double)counter.QuadPart /
 54 | 			frequency.QuadPart *
 55 | 			period::den / period::num)));
 56 | 	}
 57 | };
 58 | 
 59 | #endif
 60 | 
 61 | /**
 62 | *  author: Preston Bannister
 63 | */
 64 | class WallClockTimer {
 65 | public:
 66 | #ifdef _WIN32
 67 | 	typedef qpc_clock clock;
 68 | #else
 69 | 	typedef std::chrono::high_resolution_clock clock;
 70 | #endif
 71 | 
 72 | 	std::chrono::time_point<clock> t1, t2;
 73 | 	WallClockTimer() : t1(), t2() {
 74 | 		t1 = clock::now();
 75 | 		t2 = t1;
 76 | 	}
 77 | 	void reset() {
 78 | 		t1 = clock::now();
 79 | 		t2 = t1;
 80 | 	}
 81 | 	uint64_t elapsed() {
 82 | 		std::chrono::microseconds delta =
 83 | 			std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1);
 84 | 		return delta.count();
 85 | 	}
 86 | 	uint64_t split() {
 87 | 		t2 = clock::now();
 88 | 		return elapsed();
 89 | 	}
 90 | };
 91 | 
 92 | #ifndef _WIN32
 93 | #include <sys/time.h>
 94 | #include <sys/resource.h>
 95 | class CPUTimer {
 96 | public:
 97 | 	// clock_t t1, t2;
 98 | 	struct rusage t1, t2;
 99 | 
100 | 	CPUTimer() : t1(), t2() {
101 | 		getrusage(RUSAGE_SELF, &t1);
102 | 		// t1 = clock();
103 | 		t2 = t1;
104 | 	}
105 | 	void reset() {
106 | 		getrusage(RUSAGE_SELF, &t1);
107 | 		t2 = t1;
108 | 	}
109 | 	// proxy for userelapsed
110 | 	uint64_t elapsed() { return totalelapsed(); }
111 | 
112 | 	uint64_t totalelapsed() { return userelapsed() + systemelapsed(); }
113 | 	// returns the *user* CPU time in micro seconds (mu s)
114 | 	uint64_t userelapsed() {
115 | 		return ((t2.ru_utime.tv_sec - t1.ru_utime.tv_sec) * 1000ULL * 1000ULL) +
116 | 			((t2.ru_utime.tv_usec - t1.ru_utime.tv_usec));
117 | 	}
118 | 
119 | 	// returns the *system* CPU time in micro seconds (mu s)
120 | 	uint64_t systemelapsed() {
121 | 		return ((t2.ru_stime.tv_sec - t1.ru_stime.tv_sec) * 1000ULL * 1000ULL) +
122 | 			((t2.ru_stime.tv_usec - t1.ru_stime.tv_usec));
123 | 	}
124 | 
125 | 	uint64_t split() {
126 | 		getrusage(RUSAGE_SELF, &t2);
127 | 		return elapsed();
128 | 	}
129 | };
130 | 
131 | #endif
132 | 
133 | void displayUsage() { cout << "run as test nameoffile" << endl; }
134 | 
135 | vector<uint32_t> loadVector(string filename) {
136 |   vector<uint32_t> answer;
137 |   answer.reserve(1024 * 32); // expect sizeable arrays
138 |   ifstream logFile(filename.c_str());
139 |   if (!logFile.is_open()) {
140 |     cerr << " Couldn't open query vector file " << filename << endl;
141 |     displayUsage();
142 |     return answer;
143 |   }
144 |   cout << "# Parsing vector file " << filename << endl;
145 | 
146 |   logFile.exceptions(ios::badbit); // will throw an exception if something goes
147 |                                    // wrong, saves us the trouble of checking
148 |                                    // the IO status
149 |   string line;
150 |   for (; logFile && getline(logFile, line);) {
151 |     uint32_t id = atoi(line.c_str());
152 |     answer.push_back(id);
153 |   }
154 |   return answer;
155 | }
156 | 
157 | void unit() {
158 |   vector<uint32_t> test;
159 |   for (uint32_t i = 0; i < 100; ++i)
160 |     test.push_back(i);
161 |   vector<uint32_t> comp(test.size() + 1024);
162 |   vector<uint32_t> recover(test.size() + 1024);
163 | 
164 |   compress(test.data(), test.size(), comp.data());
165 |   uint32_t nvalue = 0;
166 |   uncompress(comp.data(), recover.data(), nvalue);
167 |   recover.resize(nvalue);
168 | 
169 |   if (recover != test)
170 |     throw runtime_error("bug");
171 | }
172 | 
173 | void deepunit() {
174 |   for (int k = 0; k < 32; ++k) {
175 |     vector<uint32_t> test;
176 |     for (uint32_t i = 0; i < 100; ++i) {
177 |       test.push_back(0);
178 |       test.push_back(0xFFFFFFFF >> k);
179 |     }
180 |     vector<uint32_t> comp(test.size() + 1024);
181 |     vector<uint32_t> recover(test.size() + 1024);
182 | 
183 |     compress(test.data(), test.size(), comp.data());
184 |     uint32_t nvalue = 0;
185 |     uncompress(comp.data(), recover.data(), nvalue);
186 |     recover.resize(nvalue);
187 | 
188 |     if (recover != test)
189 |       throw runtime_error("bug");
190 |   }
191 | }
192 | 
193 | void turbounit() {
194 |   vector<uint32_t> test;
195 |   for (uint32_t i = 0; i < 100; ++i)
196 |     test.push_back(i);
197 |   vector<uint8_t> comp(4 * test.size() + 1024);
198 |   vector<uint32_t> recover(test.size() + 1024);
199 | 
200 |   turbocompress(test.data(), test.size(), comp.data());
201 |   uint32_t nvalue = 0;
202 |   turbouncompress(comp.data(), recover.data(), nvalue);
203 | 
204 |   recover.resize(nvalue);
205 | 
206 |   if (recover != test)
207 |     throw runtime_error("bug");
208 | }
209 | 
210 | void turbodeepunit() {
211 |   for (int k = 31; k >= 0; --k) {
212 |     vector<uint32_t> test;
213 |     for (uint32_t i = 0; i < 100; ++i) {
214 |       test.push_back(0);
215 |       test.push_back(0xFFFFFFFF >> k);
216 |     }
217 |     vector<uint8_t> comp(test.size() * 4 + 1024);
218 |     vector<uint32_t> recover(test.size() + 1024);
219 | 
220 |     turbocompress(test.data(), test.size(), comp.data());
221 |     uint32_t nvalue = 0;
222 |     turbouncompress(comp.data(), recover.data(), nvalue);
223 |     recover.resize(nvalue);
224 |     if (recover != test) {
225 |       throw runtime_error("bug");
226 |     }
227 |   }
228 | }
229 | 
230 | 
231 | void turbounit64() {
232 |   vector<uint64_t> test;
233 |   for (uint64_t i = 0; i < 100; ++i)
234 |     test.push_back(i);
235 |   vector<uint8_t> comp(4 * test.size() + 1024);
236 |   vector<uint64_t> recover(test.size() + 1024);
237 | 
238 |   turbocompress64(test.data(), test.size(), comp.data());
239 |   uint32_t nvalue = 0;
240 |   turbouncompress64(comp.data(), recover.data(), nvalue);
241 | 
242 |   recover.resize(nvalue);
243 |   if (recover != test)
244 |     throw runtime_error("bug64");
245 | }
246 | 
247 | void turbodeepunit64() {
248 |   for (int k = 31; k >= 0; --k) {
249 |     vector<uint64_t> test;
250 |     for (uint64_t i = 0; i < 100; ++i) {
251 |       test.push_back(0);
252 |       test.push_back(0xFFFFFFFF >> k);
253 |     }
254 |     vector<uint8_t> comp(test.size() * 4 + 1024);
255 |     vector<uint64_t> recover(test.size() + 1024);
256 | 
257 |     turbocompress64(test.data(), test.size(), comp.data());
258 |     uint32_t nvalue = 0;
259 |     turbouncompress64(comp.data(), recover.data(), nvalue);
260 |     recover.resize(nvalue);
261 |     if (recover != test) {
262 |       throw runtime_error("bug64d");
263 |     }
264 |   }
265 | }
266 | 
267 | 
268 | void benchmark(vector<uint32_t> &data) {
269 |   std::cout << "[standard benchmark]" << std::endl;
270 |   vector<uint32_t> buffer(data.size());
271 | 
272 |   if (data.size() == 0) {
273 |     cout << "Empty vector" << endl;
274 |     return;
275 |   }
276 |   cout << "vector size = " << data.size() << endl;
277 |   cout << "vector size = " << data.size() * sizeof(uint32_t) / 1024.0 << "KB"
278 |        << endl;
279 | 
280 |   vector<uint32_t> compdata(data.size() + 2048);
281 |   uint32_t *out = compress(data.data(), data.size(), compdata.data());
282 |   cout << "compression rate:" << setprecision(2)
283 |        << data.size() * 1.0 / (out - compdata.data()) << endl;
284 |   cout << "bits/int:" << setprecision(4)
285 |        << (out - compdata.data()) * 32.0 / data.size() << endl;
286 |   cout << "volume: " << setprecision(2) << (out - compdata.data()) * 4.0 / 1024
287 |        << "KB" << endl;
288 | 
289 |   uint32_t nvalue = 0;
290 |   uncompress(compdata.data(), buffer.data(), nvalue);
291 |   buffer.resize(nvalue);
292 |   if (buffer != data)
293 |     throw runtime_error("bug");
294 | 
295 |   double numberofintegers = 0;
296 |   int N = (1 << 28) / data.size();
297 |   uint32_t bogus = 0;
298 |   WallClockTimer timer;
299 |   for (int k = 0; k < N; ++k) {
300 |     uncompress(compdata.data(), buffer.data(), nvalue);
301 |     numberofintegers += nvalue;
302 |     bogus += buffer.back() + buffer.front();
303 |   }
304 |   uint64_t timems = timer.split();
305 |   cout << "decoding time per int: " << setprecision(2)
306 |        << timems / numberofintegers * 1000 * 1000 << "ns" << endl;
307 |   cout << "decoding time per array: " << setprecision(2)
308 |        << static_cast<double>(timems) / N * 1000 << "ms" << endl;
309 |   cout << "# ignore me " << bogus << endl;
310 |   cout << endl;
311 | }
312 | 
313 | void turbobenchmark(vector<uint32_t> &data) {
314 |   std::cout << "[turbo benchmark]" << std::endl;
315 |   vector<uint32_t> buffer(data.size());
316 | 
317 |   if (data.size() == 0) {
318 |     cout << "Empty vector" << endl;
319 |     return;
320 |   }
321 |   cout << "vector size = " << data.size() << endl;
322 |   cout << "vector size = " << data.size() * sizeof(uint32_t) / 1024.0 << "KB"
323 |        << endl;
324 | 
325 |   vector<uint8_t> compdata(data.size() * sizeof(uint32_t) + 2048);
326 |   const uint8_t *out = turbocompress(data.data(), data.size(), compdata.data());
327 |   cout << "compression rate:" << setprecision(2)
328 |        << data.size() * 1.0 * sizeof(uint32_t) / (out - compdata.data())
329 |        << endl;
330 |   cout << "bits/int:" << setprecision(4)
331 |        << (out - compdata.data()) * 8.0 / data.size() << endl;
332 |   cout << "volume: " << setprecision(2) << (out - compdata.data()) * 1.0 / 1024
333 |        << "KB" << endl;
334 | 
335 |   uint32_t nvalue = 0;
336 |   turbouncompress(compdata.data(), buffer.data(), nvalue);
337 |   buffer.resize(nvalue);
338 |   if (buffer != data)
339 |     throw runtime_error("bug");
340 | 
341 |   double numberofintegers = 0;
342 |   int N = (1 << 28) / data.size();
343 |   uint32_t bogus = 0;
344 |   WallClockTimer timer;
345 |   for (int k = 0; k < N; ++k) {
346 |     turbouncompress(compdata.data(), buffer.data(), nvalue);
347 |     numberofintegers += nvalue;
348 |     bogus += buffer.back() + buffer.front();
349 |   }
350 |   uint64_t timems = timer.split();
351 |   cout << "decoding time per int: " << setprecision(2)
352 |        << timems / numberofintegers * 1000 * 1000 << "ns" << endl;
353 |   cout << "decoding time per array: " << setprecision(2)
354 |        << static_cast<double>(timems) / N * 1000 << "ms" << endl;
355 |   cout << "# ignore me " << bogus << endl;
356 |   cout << endl;
357 | }
358 | 
359 | void turbobenchmark64(vector<uint32_t> &data32) {
360 |   std::cout << "[turbo benchmark64]" << std::endl;
361 |   vector<uint64_t> data;
362 | 
363 |   for (vector<uint32_t>::const_iterator i = data32.begin(); i != data32.end(); ++i)
364 |     data.push_back(*i);
365 | 
366 |   vector<uint64_t> buffer(data);
367 | 
368 |   if (data.size() == 0) {
369 |     cout << "Empty vector" << endl;
370 |     return;
371 |   }
372 |   cout << "vector size = " << data.size() << endl;
373 |   cout << "vector size = " << data.size() * sizeof(uint64_t) / 1024.0 << "KB"
374 |        << endl;
375 | 
376 |   vector<uint8_t> compdata(data.size() * sizeof(uint32_t) + 2048);
377 |   const uint8_t *out = turbocompress64(data.data(), data.size(), compdata.data());
378 |   cout << "compression rate:" << setprecision(2)
379 |        << data.size() * 1.0 * sizeof(uint64_t) / (out - compdata.data())
380 |        << endl;
381 |   cout << "bits/int:" << setprecision(4)
382 |        << (out - compdata.data()) * 8.0 / data.size() << endl;
383 |   cout << "volume: " << setprecision(2) << (out - compdata.data()) * 1.0 / 1024
384 |        << "KB" << endl;
385 | 
386 |   uint32_t nvalue = 0;
387 |   turbouncompress64(compdata.data(), buffer.data(), nvalue);
388 |   buffer.resize(nvalue);
389 |   if (buffer != data)
390 |     throw runtime_error("bug");
391 | 
392 |   double numberofintegers = 0;
393 |   int N = (1 << 28) / data.size();
394 |   uint32_t bogus = 0;
395 |   WallClockTimer timer;
396 |   for (int k = 0; k < N; ++k) {
397 |     turbouncompress64(compdata.data(), buffer.data(), nvalue);
398 |     numberofintegers += nvalue;
399 |     bogus += buffer.back() + buffer.front();
400 |   }
401 |   uint64_t timems = timer.split();
402 |   cout << "decoding time per int: " << setprecision(2)
403 |        << timems / numberofintegers * 1000 * 1000 << "ns" << endl;
404 |   cout << "decoding time per array: " << setprecision(2)
405 |        << static_cast<double>(timems) / N * 1000 << "ms" << endl;
406 |   cout << "# ignore me " << bogus << endl;
407 |   cout << endl;
408 | }
409 | 
410 | int main(int argc, char **argv) {
411 |   unit();
412 |   deepunit();
413 | 
414 |   turbounit();
415 |   turbodeepunit();
416 | 
417 |   turbounit64();
418 |   turbodeepunit64();
419 |   if (argc <= 1) {
420 |     displayUsage();
421 |     return -1;
422 |   }
423 |   string filename = argv[1];
424 | #ifdef _OPENMP
425 |   cout << "OpenMP support is available" << endl;
426 |   int k;
427 | #pragma omp parallel
428 |   {
429 | #pragma omp master
430 |     {
431 |       k = omp_get_num_threads();
432 |       cout << "Number of Threads requested = " << k << endl;
433 |     }
434 |   }
435 | #else
436 |   cout << "No OpenMP support" << endl;
437 | #endif
438 | 
439 | #ifdef _OPENMP
440 |   k = 0;
441 | #pragma omp parallel
442 | #pragma omp atomic
443 |   k++;
444 |   cout << "Number of Threads counted = " << k << endl;
445 | #endif
446 | 
447 |   cout << "####### processing " << filename << endl;
448 |   vector<uint32_t> data = loadVector(filename);
449 |   cout << endl;
450 | 
451 |   benchmark(data);
452 |   turbobenchmark(data);
453 |   turbobenchmark64(data);
454 | 
455 |   return 0;
456 | }
457 | 


--------------------------------------------------------------------------------