├── .travis.yml ├── CMakeLists.txt ├── LICENSE ├── Makefile ├── README.md ├── include ├── bpacking.h ├── common.h ├── compression.h ├── turbocompression.h ├── turbopacking32.h ├── turbopacking64.h └── util.h ├── sampledata.txt ├── scripts ├── turbopacking32.py └── turbopacking64.py └── src ├── bpacking.cpp └── test.cpp /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | sudo: false 3 | compiler: 4 | - clang 5 | 6 | branches: 7 | only: 8 | - master 9 | 10 | script: mkdir -p build && cd build && cmake .. && make VERBOSE=1 && make test 11 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 2.8.7) 3 | if (NOT CMAKE_BUILD_TYPE) 4 | message(STATUS "No build type selected, default to Release") 5 | set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE) 6 | endif() 7 | MESSAGE( STATUS "CMAKE_BUILD_TYPE: " ${CMAKE_BUILD_TYPE} ) 8 | 9 | project(FrameOfReference CXX C) 10 | set(PROJECT_URL "https://github.com/lemire/FrameOfReference") 11 | set(PROJECT_DESCRIPTION "C++ library to pack and unpack vectors of integers having a small range of values using a technique called Frame of Reference") 12 | 13 | ### Next line would be ideal, but it does not work on all platforms: 14 | ###set(CMAKE_CXX_STANDARD 11) 15 | #### 16 | set(CMAKE_CXX_FLAGS "-Wall -std=c++11") 17 | 18 | 19 | # library target 20 | include_directories(include) 21 | add_library(FrameOfReference STATIC ./src/bpacking.cpp 22 | ${HEADERS} 23 | ) 24 | enable_testing() 25 | 26 | 27 | add_executable(unit src/test) 28 | target_link_libraries(unit FrameOfReference) 29 | add_test(unit unit ${PROJECT_SOURCE_DIR}/sampledata.txt ) 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .SUFFIXES: 2 | # 3 | .SUFFIXES: .cpp .o .c .h 4 | # replace the YOURCXX variable with a path to a C++11 compatible compiler. 5 | ifeq ($(INTEL), 1) 6 | # if you wish to use the Intel compiler, please do "make INTEL=1". 7 | YOURCXX ?= /opt/intel/bin/icpc 8 | ifeq ($(DEBUG),1) 9 | CXXFLAGS = -std=c++11 -O3 -Wall -ansi -DDEBUG=1 -D_GLIBCXX_DEBUG -ggdb 10 | else 11 | CXXFLAGS = -std=c++11 -O3 -Wall -ansi -DNDEBUG=1 -ggdb 12 | endif # debug 13 | else #intel 14 | YOURCXX ?= g++ 15 | ifeq ($(DEBUG),1) 16 | CXXFLAGS = -std=c++11 -Weffc++ -pedantic -ggdb -DDEBUG=1 -D_GLIBCXX_DEBUG -Wall -Wextra -Wcast-align 17 | else 18 | CXXFLAGS = -std=c++11 -Weffc++ -pedantic -O3 -Wall -Wextra -Wcast-align 19 | endif #debug 20 | endif #intel 21 | 22 | 23 | 24 | 25 | HEADERS= $(shell ls include/*h) 26 | 27 | all: test 28 | @echo "type ./test sampledata.txt" 29 | @echo "if you have OpenMP support, type make testmp" 30 | 31 | bpacking.o: include/bpacking.h src/bpacking.cpp 32 | $(CXX) $(CXXFLAGS) -c src/bpacking.cpp -Iinclude 33 | 34 | testmp: $(HEADERS) src/test.cpp bpacking.o 35 | ifeq ($(INTEL), 1) 36 | $(CXX) -openmp -lpthread -D_OPENMP $(CXXFLAGS) -o testmp src/test.cpp bpacking.o -Iinclude 37 | else 38 | $(CXX) -fopenmp -lpthread $(CXXFLAGS) -o testmp src/test.cpp bpacking.o -Iinclude 39 | endif 40 | 41 | test: $(HEADERS) src/test.cpp bpacking.o 42 | $(CXX) $(CXXFLAGS) -o test src/test.cpp bpacking.o -Iinclude 43 | 44 | clean: 45 | rm -f *.o test testmp 46 | 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Frame of Reference (FOR) C++ library 2 | 3 | ## What is this? 4 | 5 | C++ library to pack and unpack vectors of integers having a small 6 | range of values using a technique called Frame of Reference (Goldstein et al. 1998). 7 | It should run fast even though it is written in simple C++. 8 | 9 | Code from this library is part [Apache Arrow](https://github.com/apache/arrow) 10 | and [Apache Impala](https://github.com/cloudera/Impala). 11 | 12 | ## Code usage : 13 | 14 | Given an array of 32-bit integers, you can compress it as follows: 15 | 16 | ```C 17 | #include "compression.h" 18 | 19 | ... 20 | 21 | uint32_t * inputdata = ... // length values 22 | uint32_t * compresseddata = ... // enough data 23 | uint32_t *out = compress(inputdata, length, compresseddata); 24 | // compressed data lies between compresseddata and out 25 | uint32_t nvalue = 0; 26 | uint32_t * recoverydata = ... // available buffer with at least length elements 27 | uncompress(compresseddata, recoverydata, nvalue); 28 | // nvalue will be equal to length 29 | ``` 30 | 31 | There is a similar API with ``turbocompress`` and ``turbouncompress`` with the difference 32 | that ``compresseddata`` uses an ``uint8_t`` pointer type. 33 | 34 | ```C 35 | #include "turbocompression.h" 36 | 37 | ... 38 | 39 | uint32_t * inputdata = ... // length values 40 | uint8_t * compresseddata = ... // enough data 41 | uint8_t *out = turbocompress(inputdata, length, compresseddata); 42 | // compressed data lies between compresseddata and out 43 | uint32_t nvalue = 0; 44 | uint32_t * recoverydata = ... // available buffer with at least length elements 45 | turbouncompress(compresseddata, recoverydata, nvalue); 46 | // nvalue will be equal to length 47 | ``` 48 | 49 | We can also compress 64-bit arrays: 50 | 51 | ```C 52 | #include "turbocompression.h" 53 | 54 | ... 55 | 56 | uint64_t * inputdata = ... // length values 57 | uint8_t * compresseddata = ... // enough data 58 | uint8_t *out = turbocompress64(inputdata, length, compresseddata); 59 | // compressed data lies between compresseddata and out 60 | uint32_t nvalue = 0; 61 | uint64_t * recoverydata = ... // available buffer with at least length elements 62 | turbouncompress64(compresseddata, recoverydata, nvalue); 63 | // nvalue will be equal to length 64 | ``` 65 | 66 | ## Usage (with Makefile) 67 | 68 | To run a simple benchmark, do 69 | 70 | make 71 | ./test sampledata.txt 72 | 73 | where sampledata.txt is a text data file with one integer per line. 74 | 75 | For a parallelized version, type 76 | 77 | make testmp 78 | ./testmp sampledata.txt 79 | 80 | This requires OpenMP support however. 81 | 82 | 83 | ## Building (with CMake under macOS and Linux) 84 | 85 | You need to have ``cmake`` installed and available as a command. 86 | 87 | mkdir release 88 | cd release 89 | cmake .. 90 | make 91 | make test 92 | 93 | # Building (Visual Studio under Windows) 94 | 95 | We are assuming that you have a common Windows PC with at least Visual Studio 2015, and an x64 processor. 96 | 97 | To build with at least Visual Studio 2015 from the command line: 98 | - Grab the FrameOfReference code from GitHub, e.g., by cloning it using [GitHub Desktop](https://desktop.github.com/). 99 | - Install [CMake](https://cmake.org/download/). When you install it, make sure to ask that ``cmake`` be made available from the command line. 100 | - Create a subdirectory within FrameOfReference, such as ``VisualStudio``. 101 | - Using a shell, go to this newly created directory. For example, within GitHub Desktop, you can right-click on  ``FrameOfReference`` in your GitHub repository list, and select ``Open in Git Shell``, then type ``cd VisualStudio`` in the newly created shell. 102 | - Type ``cmake -DCMAKE_GENERATOR_PLATFORM=x64 ..`` in the shell while in the ``VisualStudio`` repository. 103 | - This last command created a Visual Studio solution file in the newly created directory (e.g., ``FrameOfReference.sln``). Open this file in Visual Studio. You should now be able to build the project and run the tests. For example, in the ``Solution Explorer`` window (available from the ``View`` menu), right-click ``ALL_BUILD`` and select ``Build``. To test the code, still in the ``Solution Explorer`` window, select ``RUN_TESTS`` and select ``Build``. 104 | 105 | To build with at least Visual Studio 2017 directly in the IDE: 106 | - Grab the FrameOfReference code from GitHub, e.g., by cloning it using [GitHub Desktop](https://desktop.github.com/). 107 | - Select the ``Visual C++ tools for CMake`` optional component when installing the C++ Development Workload within Visual Studio. 108 | - Within Visual Studio use ``File > Open > Folder...`` to open the FrameOfReference folder. 109 | - Right click on CMakeLists.txt in the parent directory within ``Solution Explorer`` and select ``Build`` to build the project. 110 | - For testing, in the Standard toolbar, drop the ``Select Startup Item...`` menu and choose one of the tests. Run the test by pressing the button to the left of the dropdown. 111 | 112 | 113 | ## Requirements: 114 | 115 | This was tested with GNU G++ and clang++ After suitable adjustments, it should 116 | build under most C++ compilers. 117 | 118 | ## Other relevant libraries 119 | 120 | * Fast decoder for VByte-compressed integers https://github.com/lemire/MaskedVByte 121 | * Fast integer compression in C using StreamVByte https://github.com/lemire/streamvbyte 122 | * FastPFOR is a C++ research library well suited to compress unsorted arrays: https://github.com/lemire/FastPFor 123 | * SIMDCompressionAndIntersection is a C++ research library well suited for sorted arrays (differential coding) 124 | and computing intersections: https://github.com/lemire/SIMDCompressionAndIntersection 125 | 126 | ## References 127 | 128 | * Daniel Lemire, Nathan Kurz, Christoph Rupp, Stream VByte: Faster Byte-Oriented Integer Compression, Information Processing Letters (to appear) https://arxiv.org/abs/1709.08990 129 | * Goldstein J, Ramakrishnan R, Shaft U. Compressing relations and indexes. Proceedings of the Fourteenth International Conference on Data Engineering, ICDE ’98, IEEE Computer Society: Washington, DC, USA, 1998; 370–379. 130 | * Daniel Lemire and Leonid Boytsov, Decoding billions of integers per second through vectorization, Software Practice & Experience 45 (1), 2015. http://arxiv.org/abs/1209.2137 http://onlinelibrary.wiley.com/doi/10.1002/spe.2203/abstract 131 | * Daniel Lemire, Leonid Boytsov, Nathan Kurz, SIMD Compression and the Intersection of Sorted Integers, Software Practice & Experience 46 (6), 2016. http://arxiv.org/abs/1401.6399 132 | * Jeff Plaisance, Nathan Kurz, Daniel Lemire, Vectorized VByte Decoding, International Symposium on Web Algorithms 2015, 2015. http://arxiv.org/abs/1503.07387 133 | * Wayne Xin Zhao, Xudong Zhang, Daniel Lemire, Dongdong Shan, Jian-Yun Nie, Hongfei Yan, Ji-Rong Wen, A General SIMD-based Approach to Accelerating Compression Algorithms, ACM Transactions on Information Systems 33 (3), 2015. http://arxiv.org/abs/1502.01916 134 | * Jianguo Wang, Chunbin Lin, Yannis Papakonstantinou, Steven Swanson, An Experimental Study of Bitmap Compression vs. Inverted List Compression, SIGMOD 2017 http://db.ucsd.edu/wp-content/uploads/2017/03/sidm338-wangA.pdf 135 | -------------------------------------------------------------------------------- /include/bpacking.h: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * This code is released under the 4 | * Apache License Version 2.0 http://www.apache.org/licenses/. 5 | * (c) Daniel Lemire 2013 6 | */ 7 | #ifndef BPACKING 8 | #define BPACKING 9 | 10 | #include "common.h" 11 | uint32_t * pack1_8( uint32_t base, uint32_t * in, uint32_t * out); 12 | 13 | uint32_t * pack2_8( uint32_t base, uint32_t * in, uint32_t * out); 14 | 15 | uint32_t * pack3_8( uint32_t base, uint32_t * in, uint32_t * out); 16 | 17 | uint32_t * pack4_8( uint32_t base, uint32_t * in, uint32_t * out); 18 | 19 | uint32_t * pack5_8( uint32_t base, uint32_t * in, uint32_t * out); 20 | 21 | uint32_t * pack6_8( uint32_t base, uint32_t * in, uint32_t * out); 22 | 23 | uint32_t * pack7_8( uint32_t base, uint32_t * in, uint32_t * out); 24 | 25 | uint32_t * pack8_8( uint32_t base, uint32_t * in, uint32_t * out); 26 | 27 | uint32_t * pack9_8( uint32_t base, uint32_t * in, uint32_t * out); 28 | 29 | uint32_t * pack10_8( uint32_t base, uint32_t * in, uint32_t * out); 30 | 31 | uint32_t * pack11_8( uint32_t base, uint32_t * in, uint32_t * out); 32 | 33 | uint32_t * pack12_8( uint32_t base, uint32_t * in, uint32_t * out); 34 | 35 | uint32_t * pack13_8( uint32_t base, uint32_t * in, uint32_t * out); 36 | 37 | uint32_t * pack14_8( uint32_t base, uint32_t * in, uint32_t * out); 38 | 39 | uint32_t * pack15_8( uint32_t base, uint32_t * in, uint32_t * out); 40 | 41 | uint32_t * pack16_8( uint32_t base, uint32_t * in, uint32_t * out); 42 | 43 | uint32_t * pack17_8( uint32_t base, uint32_t * in, uint32_t * out); 44 | 45 | uint32_t * pack18_8( uint32_t base, uint32_t * in, uint32_t * out); 46 | 47 | uint32_t * pack19_8( uint32_t base, uint32_t * in, uint32_t * out); 48 | 49 | uint32_t * pack20_8( uint32_t base, uint32_t * in, uint32_t * out); 50 | 51 | uint32_t * pack21_8( uint32_t base, uint32_t * in, uint32_t * out); 52 | 53 | uint32_t * pack22_8( uint32_t base, uint32_t * in, uint32_t * out); 54 | 55 | uint32_t * pack23_8( uint32_t base, uint32_t * in, uint32_t * out); 56 | 57 | uint32_t * pack24_8( uint32_t base, uint32_t * in, uint32_t * out); 58 | 59 | uint32_t * pack25_8( uint32_t base, uint32_t * in, uint32_t * out); 60 | 61 | uint32_t * pack26_8( uint32_t base, uint32_t * in, uint32_t * out); 62 | 63 | uint32_t * pack27_8( uint32_t base, uint32_t * in, uint32_t * out); 64 | 65 | uint32_t * pack28_8( uint32_t base, uint32_t * in, uint32_t * out); 66 | 67 | uint32_t * pack29_8( uint32_t base, uint32_t * in, uint32_t * out); 68 | 69 | uint32_t * pack30_8( uint32_t base, uint32_t * in, uint32_t * out); 70 | 71 | uint32_t * pack31_8( uint32_t base, uint32_t * in, uint32_t * out); 72 | 73 | uint32_t * pack32_8( uint32_t base, uint32_t * in, uint32_t * out); 74 | 75 | 76 | uint32_t * unpack1_8( uint32_t base, uint32_t * in, uint32_t * out); 77 | 78 | 79 | uint32_t * unpack2_8( uint32_t base, uint32_t * in, uint32_t * out); 80 | 81 | 82 | uint32_t * unpack3_8( uint32_t base, uint32_t * in, uint32_t * out); 83 | 84 | 85 | uint32_t * unpack4_8( uint32_t base, uint32_t * in, uint32_t * out); 86 | 87 | 88 | uint32_t * unpack5_8( uint32_t base, uint32_t * in, uint32_t * out); 89 | 90 | 91 | uint32_t * unpack6_8( uint32_t base, uint32_t * in, uint32_t * out); 92 | 93 | 94 | uint32_t * unpack7_8( uint32_t base, uint32_t * in, uint32_t * out); 95 | 96 | 97 | uint32_t * unpack8_8( uint32_t base, uint32_t * in, uint32_t * out); 98 | 99 | 100 | uint32_t * unpack9_8( uint32_t base, uint32_t * in, uint32_t * out); 101 | 102 | 103 | uint32_t * unpack10_8( uint32_t base, uint32_t * in, uint32_t * out); 104 | 105 | 106 | uint32_t * unpack11_8( uint32_t base, uint32_t * in, uint32_t * out); 107 | 108 | 109 | uint32_t * unpack12_8( uint32_t base, uint32_t * in, uint32_t * out); 110 | 111 | 112 | uint32_t * unpack13_8( uint32_t base, uint32_t * in, uint32_t * out); 113 | 114 | 115 | uint32_t * unpack14_8( uint32_t base, uint32_t * in, uint32_t * out); 116 | 117 | 118 | uint32_t * unpack15_8( uint32_t base, uint32_t * in, uint32_t * out); 119 | 120 | 121 | uint32_t * unpack16_8( uint32_t base, uint32_t * in, uint32_t * out); 122 | 123 | 124 | uint32_t * unpack17_8( uint32_t base, uint32_t * in, uint32_t * out); 125 | 126 | 127 | uint32_t * unpack18_8( uint32_t base, uint32_t * in, uint32_t * out); 128 | 129 | 130 | uint32_t * unpack19_8( uint32_t base, uint32_t * in, uint32_t * out); 131 | 132 | 133 | uint32_t * unpack20_8( uint32_t base, uint32_t * in, uint32_t * out); 134 | 135 | 136 | uint32_t * unpack21_8( uint32_t base, uint32_t * in, uint32_t * out); 137 | 138 | 139 | uint32_t * unpack22_8( uint32_t base, uint32_t * in, uint32_t * out); 140 | 141 | 142 | uint32_t * unpack23_8( uint32_t base, uint32_t * in, uint32_t * out); 143 | 144 | 145 | uint32_t * unpack24_8( uint32_t base, uint32_t * in, uint32_t * out); 146 | 147 | 148 | uint32_t * unpack25_8( uint32_t base, uint32_t * in, uint32_t * out); 149 | 150 | 151 | uint32_t * unpack26_8( uint32_t base, uint32_t * in, uint32_t * out); 152 | 153 | 154 | uint32_t * unpack27_8( uint32_t base, uint32_t * in, uint32_t * out); 155 | 156 | 157 | uint32_t * unpack28_8( uint32_t base, uint32_t * in, uint32_t * out); 158 | 159 | 160 | uint32_t * unpack29_8( uint32_t base, uint32_t * in, uint32_t * out); 161 | 162 | 163 | uint32_t * unpack30_8( uint32_t base, uint32_t * in, uint32_t * out); 164 | 165 | 166 | uint32_t * unpack31_8( uint32_t base, uint32_t * in, uint32_t * out); 167 | 168 | 169 | uint32_t * unpack32_8( uint32_t base, uint32_t * in, uint32_t * out); 170 | 171 | uint32_t * pack1_16( uint32_t base, uint32_t * in, uint32_t * out); 172 | 173 | uint32_t * pack2_16( uint32_t base, uint32_t * in, uint32_t * out); 174 | 175 | uint32_t * pack3_16( uint32_t base, uint32_t * in, uint32_t * out); 176 | 177 | uint32_t * pack4_16( uint32_t base, uint32_t * in, uint32_t * out); 178 | 179 | uint32_t * pack5_16( uint32_t base, uint32_t * in, uint32_t * out); 180 | 181 | uint32_t * pack6_16( uint32_t base, uint32_t * in, uint32_t * out); 182 | 183 | uint32_t * pack7_16( uint32_t base, uint32_t * in, uint32_t * out); 184 | 185 | uint32_t * pack8_16( uint32_t base, uint32_t * in, uint32_t * out); 186 | 187 | uint32_t * pack9_16( uint32_t base, uint32_t * in, uint32_t * out); 188 | 189 | uint32_t * pack10_16( uint32_t base, uint32_t * in, uint32_t * out); 190 | 191 | uint32_t * pack11_16( uint32_t base, uint32_t * in, uint32_t * out); 192 | 193 | uint32_t * pack12_16( uint32_t base, uint32_t * in, uint32_t * out); 194 | 195 | uint32_t * pack13_16( uint32_t base, uint32_t * in, uint32_t * out); 196 | 197 | uint32_t * pack14_16( uint32_t base, uint32_t * in, uint32_t * out); 198 | 199 | uint32_t * pack15_16( uint32_t base, uint32_t * in, uint32_t * out); 200 | 201 | uint32_t * pack16_16( uint32_t base, uint32_t * in, uint32_t * out); 202 | 203 | uint32_t * pack17_16( uint32_t base, uint32_t * in, uint32_t * out); 204 | 205 | uint32_t * pack18_16( uint32_t base, uint32_t * in, uint32_t * out); 206 | 207 | uint32_t * pack19_16( uint32_t base, uint32_t * in, uint32_t * out); 208 | 209 | uint32_t * pack20_16( uint32_t base, uint32_t * in, uint32_t * out); 210 | 211 | uint32_t * pack21_16( uint32_t base, uint32_t * in, uint32_t * out); 212 | 213 | uint32_t * pack22_16( uint32_t base, uint32_t * in, uint32_t * out); 214 | 215 | uint32_t * pack23_16( uint32_t base, uint32_t * in, uint32_t * out); 216 | 217 | uint32_t * pack24_16( uint32_t base, uint32_t * in, uint32_t * out); 218 | 219 | uint32_t * pack25_16( uint32_t base, uint32_t * in, uint32_t * out); 220 | 221 | uint32_t * pack26_16( uint32_t base, uint32_t * in, uint32_t * out); 222 | 223 | uint32_t * pack27_16( uint32_t base, uint32_t * in, uint32_t * out); 224 | 225 | uint32_t * pack28_16( uint32_t base, uint32_t * in, uint32_t * out); 226 | 227 | uint32_t * pack29_16( uint32_t base, uint32_t * in, uint32_t * out); 228 | 229 | uint32_t * pack30_16( uint32_t base, uint32_t * in, uint32_t * out); 230 | 231 | uint32_t * pack31_16( uint32_t base, uint32_t * in, uint32_t * out); 232 | 233 | uint32_t * pack32_16( uint32_t base, uint32_t * in, uint32_t * out); 234 | 235 | 236 | uint32_t * unpack1_16( uint32_t base, uint32_t * in, uint32_t * out); 237 | 238 | 239 | uint32_t * unpack2_16( uint32_t base, uint32_t * in, uint32_t * out); 240 | 241 | 242 | uint32_t * unpack3_16( uint32_t base, uint32_t * in, uint32_t * out); 243 | 244 | 245 | uint32_t * unpack4_16( uint32_t base, uint32_t * in, uint32_t * out); 246 | 247 | 248 | uint32_t * unpack5_16( uint32_t base, uint32_t * in, uint32_t * out); 249 | 250 | 251 | uint32_t * unpack6_16( uint32_t base, uint32_t * in, uint32_t * out); 252 | 253 | 254 | uint32_t * unpack7_16( uint32_t base, uint32_t * in, uint32_t * out); 255 | 256 | 257 | uint32_t * unpack8_16( uint32_t base, uint32_t * in, uint32_t * out); 258 | 259 | 260 | uint32_t * unpack9_16( uint32_t base, uint32_t * in, uint32_t * out); 261 | 262 | 263 | uint32_t * unpack10_16( uint32_t base, uint32_t * in, uint32_t * out); 264 | 265 | 266 | uint32_t * unpack11_16( uint32_t base, uint32_t * in, uint32_t * out); 267 | 268 | 269 | uint32_t * unpack12_16( uint32_t base, uint32_t * in, uint32_t * out); 270 | 271 | 272 | uint32_t * unpack13_16( uint32_t base, uint32_t * in, uint32_t * out); 273 | 274 | 275 | uint32_t * unpack14_16( uint32_t base, uint32_t * in, uint32_t * out); 276 | 277 | 278 | uint32_t * unpack15_16( uint32_t base, uint32_t * in, uint32_t * out); 279 | 280 | 281 | uint32_t * unpack16_16( uint32_t base, uint32_t * in, uint32_t * out); 282 | 283 | 284 | uint32_t * unpack17_16( uint32_t base, uint32_t * in, uint32_t * out); 285 | 286 | 287 | uint32_t * unpack18_16( uint32_t base, uint32_t * in, uint32_t * out); 288 | 289 | 290 | uint32_t * unpack19_16( uint32_t base, uint32_t * in, uint32_t * out); 291 | 292 | 293 | uint32_t * unpack20_16( uint32_t base, uint32_t * in, uint32_t * out); 294 | 295 | 296 | uint32_t * unpack21_16( uint32_t base, uint32_t * in, uint32_t * out); 297 | 298 | 299 | uint32_t * unpack22_16( uint32_t base, uint32_t * in, uint32_t * out); 300 | 301 | 302 | uint32_t * unpack23_16( uint32_t base, uint32_t * in, uint32_t * out); 303 | 304 | 305 | uint32_t * unpack24_16( uint32_t base, uint32_t * in, uint32_t * out); 306 | 307 | 308 | uint32_t * unpack25_16( uint32_t base, uint32_t * in, uint32_t * out); 309 | 310 | 311 | uint32_t * unpack26_16( uint32_t base, uint32_t * in, uint32_t * out); 312 | 313 | 314 | uint32_t * unpack27_16( uint32_t base, uint32_t * in, uint32_t * out); 315 | 316 | 317 | uint32_t * unpack28_16( uint32_t base, uint32_t * in, uint32_t * out); 318 | 319 | 320 | uint32_t * unpack29_16( uint32_t base, uint32_t * in, uint32_t * out); 321 | 322 | 323 | uint32_t * unpack30_16( uint32_t base, uint32_t * in, uint32_t * out); 324 | 325 | 326 | uint32_t * unpack31_16( uint32_t base, uint32_t * in, uint32_t * out); 327 | 328 | 329 | uint32_t * unpack32_16( uint32_t base, uint32_t * in, uint32_t * out); 330 | 331 | uint32_t * pack1_32( uint32_t base, uint32_t * in, uint32_t * out); 332 | 333 | uint32_t * pack2_32( uint32_t base, uint32_t * in, uint32_t * out); 334 | 335 | uint32_t * pack3_32( uint32_t base, uint32_t * in, uint32_t * out); 336 | 337 | uint32_t * pack4_32( uint32_t base, uint32_t * in, uint32_t * out); 338 | 339 | uint32_t * pack5_32( uint32_t base, uint32_t * in, uint32_t * out); 340 | 341 | uint32_t * pack6_32( uint32_t base, uint32_t * in, uint32_t * out); 342 | 343 | uint32_t * pack7_32( uint32_t base, uint32_t * in, uint32_t * out); 344 | 345 | uint32_t * pack8_32( uint32_t base, uint32_t * in, uint32_t * out); 346 | 347 | uint32_t * pack9_32( uint32_t base, uint32_t * in, uint32_t * out); 348 | 349 | uint32_t * pack10_32( uint32_t base, uint32_t * in, uint32_t * out); 350 | 351 | uint32_t * pack11_32( uint32_t base, uint32_t * in, uint32_t * out); 352 | 353 | uint32_t * pack12_32( uint32_t base, uint32_t * in, uint32_t * out); 354 | 355 | uint32_t * pack13_32( uint32_t base, uint32_t * in, uint32_t * out); 356 | 357 | uint32_t * pack14_32( uint32_t base, uint32_t * in, uint32_t * out); 358 | 359 | uint32_t * pack15_32( uint32_t base, uint32_t * in, uint32_t * out); 360 | 361 | uint32_t * pack16_32( uint32_t base, uint32_t * in, uint32_t * out); 362 | 363 | uint32_t * pack17_32( uint32_t base, uint32_t * in, uint32_t * out); 364 | 365 | uint32_t * pack18_32( uint32_t base, uint32_t * in, uint32_t * out); 366 | 367 | uint32_t * pack19_32( uint32_t base, uint32_t * in, uint32_t * out); 368 | 369 | uint32_t * pack20_32( uint32_t base, uint32_t * in, uint32_t * out); 370 | 371 | uint32_t * pack21_32( uint32_t base, uint32_t * in, uint32_t * out); 372 | 373 | uint32_t * pack22_32( uint32_t base, uint32_t * in, uint32_t * out); 374 | 375 | uint32_t * pack23_32( uint32_t base, uint32_t * in, uint32_t * out); 376 | 377 | uint32_t * pack24_32( uint32_t base, uint32_t * in, uint32_t * out); 378 | 379 | uint32_t * pack25_32( uint32_t base, uint32_t * in, uint32_t * out); 380 | 381 | uint32_t * pack26_32( uint32_t base, uint32_t * in, uint32_t * out); 382 | 383 | uint32_t * pack27_32( uint32_t base, uint32_t * in, uint32_t * out); 384 | 385 | uint32_t * pack28_32( uint32_t base, uint32_t * in, uint32_t * out); 386 | 387 | uint32_t * pack29_32( uint32_t base, uint32_t * in, uint32_t * out); 388 | 389 | uint32_t * pack30_32( uint32_t base, uint32_t * in, uint32_t * out); 390 | 391 | uint32_t * pack31_32( uint32_t base, uint32_t * in, uint32_t * out); 392 | 393 | uint32_t * pack32_32( uint32_t base, uint32_t * in, uint32_t * out); 394 | 395 | 396 | uint32_t * unpack1_32( uint32_t base, uint32_t * in, uint32_t * out); 397 | 398 | 399 | uint32_t * unpack2_32( uint32_t base, uint32_t * in, uint32_t * out); 400 | 401 | 402 | uint32_t * unpack3_32( uint32_t base, uint32_t * in, uint32_t * out); 403 | 404 | 405 | uint32_t * unpack4_32( uint32_t base, uint32_t * in, uint32_t * out); 406 | 407 | 408 | uint32_t * unpack5_32( uint32_t base, uint32_t * in, uint32_t * out); 409 | 410 | 411 | uint32_t * unpack6_32( uint32_t base, uint32_t * in, uint32_t * out); 412 | 413 | 414 | uint32_t * unpack7_32( uint32_t base, uint32_t * in, uint32_t * out); 415 | 416 | 417 | uint32_t * unpack8_32( uint32_t base, uint32_t * in, uint32_t * out); 418 | 419 | 420 | uint32_t * unpack9_32( uint32_t base, uint32_t * in, uint32_t * out); 421 | 422 | 423 | uint32_t * unpack10_32( uint32_t base, uint32_t * in, uint32_t * out); 424 | 425 | 426 | uint32_t * unpack11_32( uint32_t base, uint32_t * in, uint32_t * out); 427 | 428 | 429 | uint32_t * unpack12_32( uint32_t base, uint32_t * in, uint32_t * out); 430 | 431 | 432 | uint32_t * unpack13_32( uint32_t base, uint32_t * in, uint32_t * out); 433 | 434 | 435 | uint32_t * unpack14_32( uint32_t base, uint32_t * in, uint32_t * out); 436 | 437 | 438 | uint32_t * unpack15_32( uint32_t base, uint32_t * in, uint32_t * out); 439 | 440 | 441 | uint32_t * unpack16_32( uint32_t base, uint32_t * in, uint32_t * out); 442 | 443 | 444 | uint32_t * unpack17_32( uint32_t base, uint32_t * in, uint32_t * out); 445 | 446 | 447 | uint32_t * unpack18_32( uint32_t base, uint32_t * in, uint32_t * out); 448 | 449 | 450 | uint32_t * unpack19_32( uint32_t base, uint32_t * in, uint32_t * out); 451 | 452 | 453 | uint32_t * unpack20_32( uint32_t base, uint32_t * in, uint32_t * out); 454 | 455 | 456 | uint32_t * unpack21_32( uint32_t base, uint32_t * in, uint32_t * out); 457 | 458 | 459 | uint32_t * unpack22_32( uint32_t base, uint32_t * in, uint32_t * out); 460 | 461 | 462 | uint32_t * unpack23_32( uint32_t base, uint32_t * in, uint32_t * out); 463 | 464 | 465 | uint32_t * unpack24_32( uint32_t base, uint32_t * in, uint32_t * out); 466 | 467 | 468 | uint32_t * unpack25_32( uint32_t base, uint32_t * in, uint32_t * out); 469 | 470 | 471 | uint32_t * unpack26_32( uint32_t base, uint32_t * in, uint32_t * out); 472 | 473 | 474 | uint32_t * unpack27_32( uint32_t base, uint32_t * in, uint32_t * out); 475 | 476 | 477 | uint32_t * unpack28_32( uint32_t base, uint32_t * in, uint32_t * out); 478 | 479 | 480 | uint32_t * unpack29_32( uint32_t base, uint32_t * in, uint32_t * out); 481 | 482 | 483 | uint32_t * unpack30_32( uint32_t base, uint32_t * in, uint32_t * out); 484 | 485 | 486 | uint32_t * unpack31_32( uint32_t base, uint32_t * in, uint32_t * out); 487 | 488 | 489 | uint32_t * unpack32_32( uint32_t base, uint32_t * in, uint32_t * out); 490 | 491 | 492 | typedef uint32_t * (*packfnc)( uint32_t, uint32_t * , uint32_t * ); 493 | 494 | static uint32_t * nullpacker( uint32_t, uint32_t * , uint32_t * out) { 495 | return out; 496 | } 497 | 498 | static uint32_t * nullunpacker8( uint32_t base, uint32_t * in , uint32_t * out) { 499 | for(int k = 0; k < 8; ++k) { 500 | out[k] = base; 501 | } 502 | return in; 503 | } 504 | 505 | static uint32_t * nullunpacker16( uint32_t base, uint32_t * in , uint32_t * out) { 506 | for(int k = 0; k < 8; ++k) { 507 | out[k] = base; 508 | } 509 | return in; 510 | } 511 | 512 | static uint32_t * nullunpacker32( uint32_t base, uint32_t * in , uint32_t * out) { 513 | for(int k = 0; k < 8; ++k) { 514 | out[k] = base; 515 | } 516 | return in; 517 | } 518 | 519 | 520 | 521 | const static packfnc unpack8[33]= {nullunpacker8,unpack1_8, 522 | unpack2_8, 523 | unpack3_8, 524 | unpack4_8, 525 | unpack5_8, 526 | unpack6_8, 527 | unpack7_8, 528 | unpack8_8, 529 | unpack9_8, 530 | unpack10_8, 531 | unpack11_8, 532 | unpack12_8, 533 | unpack13_8, 534 | unpack14_8, 535 | unpack15_8, 536 | unpack16_8, 537 | unpack17_8, 538 | unpack18_8, 539 | unpack19_8, 540 | unpack20_8, 541 | unpack21_8, 542 | unpack22_8, 543 | unpack23_8, 544 | unpack24_8, 545 | unpack25_8, 546 | unpack26_8, 547 | unpack27_8, 548 | unpack28_8, 549 | unpack29_8, 550 | unpack30_8, 551 | unpack31_8, 552 | unpack32_8 553 | }; 554 | 555 | const static packfnc pack8[33]= {nullpacker,pack1_8, 556 | pack2_8, 557 | pack3_8, 558 | pack4_8, 559 | pack5_8, 560 | pack6_8, 561 | pack7_8, 562 | pack8_8, 563 | pack9_8, 564 | pack10_8, 565 | pack11_8, 566 | pack12_8, 567 | pack13_8, 568 | pack14_8, 569 | pack15_8, 570 | pack16_8, 571 | pack17_8, 572 | pack18_8, 573 | pack19_8, 574 | pack20_8, 575 | pack21_8, 576 | pack22_8, 577 | pack23_8, 578 | pack24_8, 579 | pack25_8, 580 | pack26_8, 581 | pack27_8, 582 | pack28_8, 583 | pack29_8, 584 | pack30_8, 585 | pack31_8, 586 | pack32_8 587 | }; 588 | 589 | 590 | const static packfnc unpack16[33]= {nullunpacker16,unpack1_16, 591 | unpack2_16, 592 | unpack3_16, 593 | unpack4_16, 594 | unpack5_16, 595 | unpack6_16, 596 | unpack7_16, 597 | unpack8_16, 598 | unpack9_16, 599 | unpack10_16, 600 | unpack11_16, 601 | unpack12_16, 602 | unpack13_16, 603 | unpack14_16, 604 | unpack15_16, 605 | unpack16_16, 606 | unpack17_16, 607 | unpack18_16, 608 | unpack19_16, 609 | unpack20_16, 610 | unpack21_16, 611 | unpack22_16, 612 | unpack23_16, 613 | unpack24_16, 614 | unpack25_16, 615 | unpack26_16, 616 | unpack27_16, 617 | unpack28_16, 618 | unpack29_16, 619 | unpack30_16, 620 | unpack31_16, 621 | unpack32_16 622 | }; 623 | 624 | 625 | const static packfnc pack16[33]= {nullpacker,pack1_16, 626 | pack2_16, 627 | pack3_16, 628 | pack4_16, 629 | pack5_16, 630 | pack6_16, 631 | pack7_16, 632 | pack8_16, 633 | pack9_16, 634 | pack10_16, 635 | pack11_16, 636 | pack12_16, 637 | pack13_16, 638 | pack14_16, 639 | pack15_16, 640 | pack16_16, 641 | pack17_16, 642 | pack18_16, 643 | pack19_16, 644 | pack20_16, 645 | pack21_16, 646 | pack22_16, 647 | pack23_16, 648 | pack24_16, 649 | pack25_16, 650 | pack26_16, 651 | pack27_16, 652 | pack28_16, 653 | pack29_16, 654 | pack30_16, 655 | pack31_16, 656 | pack32_16 657 | }; 658 | 659 | const static packfnc unpack32[33]= {nullunpacker32,unpack1_32, 660 | unpack2_32, 661 | unpack3_32, 662 | unpack4_32, 663 | unpack5_32, 664 | unpack6_32, 665 | unpack7_32, 666 | unpack8_32, 667 | unpack9_32, 668 | unpack10_32, 669 | unpack11_32, 670 | unpack12_32, 671 | unpack13_32, 672 | unpack14_32, 673 | unpack15_32, 674 | unpack16_32, 675 | unpack17_32, 676 | unpack18_32, 677 | unpack19_32, 678 | unpack20_32, 679 | unpack21_32, 680 | unpack22_32, 681 | unpack23_32, 682 | unpack24_32, 683 | unpack25_32, 684 | unpack26_32, 685 | unpack27_32, 686 | unpack28_32, 687 | unpack29_32, 688 | unpack30_32, 689 | unpack31_32, 690 | unpack32_32 691 | }; 692 | 693 | const static packfnc pack32[33]= {nullpacker,pack1_32, 694 | pack2_32, 695 | pack3_32, 696 | pack4_32, 697 | pack5_32, 698 | pack6_32, 699 | pack7_32, 700 | pack8_32, 701 | pack9_32, 702 | pack10_32, 703 | pack11_32, 704 | pack12_32, 705 | pack13_32, 706 | pack14_32, 707 | pack15_32, 708 | pack16_32, 709 | pack17_32, 710 | pack18_32, 711 | pack19_32, 712 | pack20_32, 713 | pack21_32, 714 | pack22_32, 715 | pack23_32, 716 | pack24_32, 717 | pack25_32, 718 | pack26_32, 719 | pack27_32, 720 | pack28_32, 721 | pack29_32, 722 | pack30_32, 723 | pack31_32, 724 | pack32_32 725 | }; 726 | #endif 727 | -------------------------------------------------------------------------------- /include/common.h: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * This code is released under the 4 | * Apache License Version 2.0 http://www.apache.org/licenses/. 5 | * (c) Daniel Lemire 2013 6 | */ 7 | 8 | #ifndef COMMON_H_ 9 | #define COMMON_H_ 10 | 11 | 12 | #include 13 | #include 14 | #include // mostly for Microsoft compilers 15 | #include 16 | #include // part of Visual Studio 2010 and better 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #endif /* COMMON_H_ */ 27 | -------------------------------------------------------------------------------- /include/compression.h: -------------------------------------------------------------------------------- 1 | /* 2 | * compression.h 3 | * 4 | * Created on: Oct 28, 2013 5 | * Author: lemire 6 | */ 7 | 8 | #ifndef COMPRESSION_H_ 9 | #define COMPRESSION_H_ 10 | 11 | #include "bpacking.h" 12 | #include "util.h" 13 | 14 | /** 15 | * Compresses "length" values from "in" to "out" and return a pointer to the end of the compressed stream. 16 | * The format is "number of values, minimal value, maximal value, followed by packed data". 17 | */ 18 | inline uint32_t * compress(uint32_t * in, uint32_t length, uint32_t * out) { 19 | out[0] = length; 20 | ++out; 21 | if(length == 0) return out; 22 | uint32_t m = in[0]; 23 | uint32_t M = in[0]; 24 | for(uint32_t i = 1; i < length; ++i) { 25 | if(in[i]>M) M=in[i]; 26 | if(in[i](M-m)); 29 | out[0] = m; 30 | ++out; 31 | out[0] = M; 32 | ++out; 33 | uint32_t k = 0; 34 | for(; k+32<=length; k+=32,in+=32) { 35 | out = pack32[b](m,in,out); 36 | } 37 | for(; k+16<=length; k+=16,in+=16) { 38 | out = pack16[b](m,in,out); 39 | } 40 | for(; k+8<=length; k+=8,in+=8) { 41 | out = pack8[b](m,in,out); 42 | } 43 | // we could pack the rest, but we don't bother 44 | for(; k(M-m)); 66 | #ifdef _OPENMP 67 | #pragma omp parallel for 68 | #endif 69 | for(uint32_t k = 0; k M) 38 | M = in[i]; 39 | if (in[i] < m) 40 | m = in[i]; 41 | } 42 | int b = bits(static_cast(M - m)); 43 | memcpy(out, &m, sizeof(m)); 44 | out += sizeof(m); 45 | memcpy(out, &M, sizeof(M)); 46 | out += sizeof(M); 47 | uint32_t k = 0; 48 | for (; k + 32 <= length; k += 32) { 49 | funcForPackArr[b](m, &in, &out); 50 | } 51 | // we could pack the rest, but we don't bother 52 | memcpy(out, in, (length - k) * sizeof(uint32_t)); 53 | out += (length - k) * sizeof(uint32_t); 54 | return out; 55 | } 56 | 57 | /* 58 | * uncompress FOR data found in "in". 59 | * The format is "number of values, minimal value, maximal value, followed by 60 | * packed data". 61 | * The 'nvalue' variable receives the number of decoded values (initial value is 62 | * ignored) 63 | * The values are stored in "out". 64 | * We return a pointer to the end of the compressed input stream. 65 | */ 66 | inline const uint8_t *turbouncompress(const uint8_t *in, uint32_t *out, 67 | uint32_t &nvalue) { 68 | memcpy(&nvalue, in, sizeof(nvalue)); 69 | in += sizeof(nvalue); 70 | if (nvalue == 0) 71 | return in; 72 | uint32_t m, M; 73 | memcpy(&m, in, sizeof(m)); 74 | in += sizeof(m); 75 | memcpy(&M, in, sizeof(M)); 76 | in += sizeof(M); 77 | int b = bits(static_cast(M - m)); 78 | #ifdef _OPENMP 79 | #pragma omp parallel for 80 | #endif 81 | for (uint32_t k = 0; k < nvalue / 32; ++k) { 82 | // could code as funcForUnpackArr[b](m,&in,&out); but it hurts 83 | // parallelization 84 | const uint8_t *input = in + 32 * b * k / 8; 85 | uint32_t *output = out + k * 32; 86 | funcForUnpackArr[b](m, &input, &output); 87 | } 88 | in = in + (32 * b / 8) * (nvalue / 32); 89 | out = out + 32 * (nvalue / 32); 90 | 91 | // we could pack the rest, but we don't bother 92 | uint32_t leftover = nvalue - nvalue / 32 * 32; 93 | memcpy(out, in, leftover * sizeof(uint32_t)); 94 | in += leftover * sizeof(uint32_t); 95 | return in; 96 | } 97 | 98 | 99 | 100 | /** 101 | * Compresses "length" values from "in" to "out" and return a pointer to the end 102 | * of the compressed stream. 103 | * The format is "number of values, minimal value, maximal value, followed by 104 | * packed data". 105 | * 106 | * Currently the implementation assumes that the integer arrays to be compressed 107 | * are in multiples of 32, remaining integers are not compressed. Thus using this 108 | * code on arrays smaller than 32 is wasteful. (This limitation will be removed 109 | * in the future.) 110 | */ 111 | inline uint8_t *turbocompress64(const uint64_t *in, uint32_t length, 112 | uint8_t *out) { 113 | memcpy(out, &length, sizeof(length)); 114 | out += sizeof(length); 115 | if (length == 0) 116 | return out; 117 | uint64_t m = in[0]; 118 | uint64_t M = in[0]; 119 | for (uint32_t i = 1; i < length; ++i) { 120 | if (in[i] > M) 121 | M = in[i]; 122 | if (in[i] < m) 123 | m = in[i]; 124 | } 125 | int b = bits64(static_cast(M - m)); 126 | memcpy(out, &m, sizeof(m)); 127 | out += sizeof(m); 128 | memcpy(out, &M, sizeof(M)); 129 | out += sizeof(M); 130 | 131 | uint32_t k = 0; 132 | for (; k + 32 <= length; k += 32) { 133 | funcForPackArr64[b](m, &in, &out); 134 | } 135 | // we could pack the rest, but we don't bother 136 | memcpy(out, in, (length - k) * sizeof(uint64_t)); 137 | out += (length - k) * sizeof(uint64_t); 138 | return out; 139 | } 140 | 141 | /* 142 | * uncompress FOR data found in "in". 143 | * The format is "number of values, minimal value, maximal value, followed by 144 | * packed data". 145 | * The 'nvalue' variable receives the number of decoded values (initial value is 146 | * ignored) 147 | * The values are stored in "out". 148 | * We return a pointer to the end of the compressed input stream. 149 | */ 150 | inline const uint8_t *turbouncompress64(const uint8_t *in, uint64_t *out, 151 | uint32_t &nvalue) { 152 | memcpy(&nvalue, in, sizeof(nvalue)); 153 | in += sizeof(nvalue); 154 | if (nvalue == 0) 155 | return in; 156 | uint64_t m, M; 157 | memcpy(&m, in, sizeof(m)); 158 | in += sizeof(m); 159 | memcpy(&M, in, sizeof(M)); 160 | in += sizeof(M); 161 | int b = bits64(static_cast(M - m)); 162 | #ifdef _OPENMP 163 | #pragma omp parallel for 164 | #endif 165 | for (uint32_t k = 0; k < nvalue / 32; ++k) { 166 | // could code as funcForUnpackArr[b](m,&in,&out); but it hurts 167 | // parallelization 168 | const uint8_t *input = in + 32 * b * k / 8; 169 | uint64_t *output = out + k * 32; 170 | funcForUnpackArr64[b](m, &input, &output); 171 | } 172 | in = in + (32 * b / 8) * (nvalue / 32); 173 | out = out + 32 * (nvalue / 32); 174 | 175 | // we could pack the rest, but we don't bother 176 | uint32_t leftover = nvalue - nvalue / 32 * 32; 177 | memcpy(out, in, leftover * sizeof(uint64_t)); 178 | in += leftover * sizeof(uint64_t); 179 | return in; 180 | } 181 | 182 | 183 | #endif /* COMPRESSION_H_ */ 184 | -------------------------------------------------------------------------------- /include/util.h: -------------------------------------------------------------------------------- 1 | #ifndef FRAMEOFREFERENCE_INCLUDE_UTIL_H 2 | #define FRAMEOFREFERENCE_INCLUDE_UTIL_H 3 | #include // mostly for Microsoft 4 | #ifdef _MSC_VER 5 | /* Microsoft C/C++-compatible compiler */ 6 | #include 7 | 8 | #ifndef __clang__ // if one compiles with MSVC *with* clang, then these intrinsics are defined!!! 9 | // sadly there is no way to check whether we are missing these intrinsics specifically. 10 | 11 | /* wrappers for Visual Studio built-ins that look like gcc built-ins */ 12 | /* result might be undefined when input_num is zero */ 13 | static inline int __builtin_ctzll(unsigned long long input_num) { 14 | unsigned long index; 15 | #ifdef _WIN64 // highly recommended!!! 16 | _BitScanForward64(&index, input_num); 17 | #else // if we must support 32-bit Windows 18 | if ((uint32_t)input_num != 0) { 19 | _BitScanForward(&index, (uint32_t)input_num); 20 | } 21 | else { 22 | _BitScanForward(&index, (uint32_t)(input_num >> 32)); 23 | index += 32; 24 | } 25 | #endif 26 | return index; 27 | } 28 | 29 | /* result might be undefined when input_num is zero */ 30 | static inline int __builtin_clzll(unsigned long long input_num) { 31 | unsigned long index; 32 | #ifdef _WIN64 // highly recommended!!! 33 | _BitScanReverse64(&index, input_num); 34 | #else // if we must support 32-bit Windows 35 | if (input_num > 0xFFFFFFF) { 36 | _BitScanReverse(&index, (uint32_t)(input_num >> 32)); 37 | } 38 | else { 39 | _BitScanReverse(&index, (uint32_t)(input_num)); 40 | index += 32; 41 | } 42 | #endif 43 | return 63 - index; 44 | } 45 | 46 | /* result might be undefined when input_num is zero */ 47 | static inline int __builtin_clz(int input_num) { 48 | unsigned long index; 49 | _BitScanReverse(&index, input_num); 50 | return 31 - index; 51 | } 52 | 53 | /* result might be undefined when input_num is zero */ 54 | static inline int __builtin_popcountll(unsigned long long input_num) { 55 | #ifdef _WIN64 // highly recommended!!! 56 | return (int)__popcnt64(input_num); 57 | #else // if we must support 32-bit Windows 58 | return (int)(__popcnt((uint32_t)input_num) + __popcnt((uint32_t)(input_num >> 32))); 59 | #endif 60 | } 61 | 62 | static inline void __builtin_unreachable() { 63 | __assume(0); 64 | } 65 | #endif 66 | #endif 67 | 68 | 69 | #include // part of Visual Studio 2010 and better 70 | 71 | // integer logarithm function 72 | static inline uint32_t bits(const uint32_t v) { 73 | return v == 0 ? 0 : 32 - __builtin_clz(v); // todo: make portable (Visual studio) 74 | } 75 | 76 | 77 | 78 | // integer logarithm function 79 | static inline uint32_t bits64(const uint64_t v) { 80 | return v == 0 ? 0 : 64 - __builtin_clzll(v); // todo: make portable (Visual studio) 81 | } 82 | 83 | #endif // FRAMEOFREFERENCE_INCLUDE_UTIL_H 84 | -------------------------------------------------------------------------------- /sampledata.txt: -------------------------------------------------------------------------------- 1 | 4106 2 | 4481 3 | 4192 4 | 4420 5 | 4478 6 | 4088 7 | 4338 8 | 4320 9 | 4353 10 | 4253 11 | 4129 12 | 4284 13 | 4430 14 | 4093 15 | 4329 16 | 4293 17 | 4366 18 | 4064 19 | 4124 20 | 4142 21 | 4435 22 | 4472 23 | 4497 24 | 4423 25 | 4357 26 | 4498 27 | 4034 28 | 4386 29 | 4419 30 | 4184 31 | 4228 32 | 4302 33 | 4168 34 | 4080 35 | 4317 36 | 4056 37 | 4104 38 | 4037 39 | 4397 40 | 4001 41 | 4300 42 | 4346 43 | 4176 44 | 4203 45 | 4095 46 | 4128 47 | 4145 48 | 4348 49 | 4174 50 | 4403 51 | 4004 52 | 4157 53 | 4015 54 | 4119 55 | 4321 56 | 4350 57 | 4216 58 | 4375 59 | 4394 60 | 4238 61 | 4265 62 | 4484 63 | 4245 64 | 4025 65 | 4248 66 | 4136 67 | 4049 68 | 4221 69 | 4478 70 | 4058 71 | 4031 72 | 4133 73 | 4165 74 | 4448 75 | 4008 76 | 4451 77 | 4434 78 | 4404 79 | 4164 80 | 4065 81 | 4268 82 | 4255 83 | 4386 84 | 4245 85 | 4414 86 | 4219 87 | 4123 88 | 4339 89 | 4071 90 | 4114 91 | 4027 92 | 4074 93 | 4001 94 | 4123 95 | 4313 96 | 4339 97 | 4394 98 | 4383 99 | 4045 100 | 4069 101 | 4398 102 | 4083 103 | 4207 104 | 4340 105 | 4499 106 | 4264 107 | 4233 108 | 4410 109 | 4487 110 | 4202 111 | 4486 112 | 4325 113 | 4102 114 | 4094 115 | 4044 116 | 4025 117 | 4097 118 | 4287 119 | 4348 120 | 4230 121 | 4046 122 | 4440 123 | 4034 124 | 4487 125 | 4259 126 | 4016 127 | 4106 128 | 4320 129 | 4344 130 | 4175 131 | 4087 132 | 4070 133 | 4080 134 | 4095 135 | 4381 136 | 4262 137 | 4053 138 | 4033 139 | 4103 140 | 4040 141 | 4148 142 | 4312 143 | 4424 144 | 4149 145 | 4111 146 | 4419 147 | 4012 148 | 4113 149 | 4118 150 | 4458 151 | 4018 152 | 4088 153 | 4356 154 | 4389 155 | 4408 156 | 4135 157 | 4457 158 | 4214 159 | 4143 160 | 4155 161 | 4250 162 | 4388 163 | 4424 164 | 4128 165 | 4446 166 | 4225 167 | 4109 168 | 4438 169 | 4498 170 | 4351 171 | 4060 172 | 4038 173 | 4366 174 | 4240 175 | 4020 176 | 4080 177 | 4243 178 | 4420 179 | 4299 180 | 4428 181 | 4326 182 | 4072 183 | 4263 184 | 4192 185 | 4315 186 | 4492 187 | 4129 188 | 4026 189 | 4112 190 | 4100 191 | 4024 192 | 4254 193 | 4418 194 | 4413 195 | 4279 196 | 4249 197 | 4073 198 | 4398 199 | 4086 200 | 4282 201 | 4405 202 | 4061 203 | 4279 204 | 4373 205 | 4142 206 | 4236 207 | 4278 208 | 4376 209 | 4103 210 | 4266 211 | 4157 212 | 4117 213 | 4243 214 | 4276 215 | 4409 216 | 4245 217 | 4379 218 | 4039 219 | 4419 220 | 4383 221 | 4443 222 | 4284 223 | 4490 224 | 4332 225 | 4405 226 | 4330 227 | 4208 228 | 4121 229 | 4419 230 | 4259 231 | 4195 232 | 4246 233 | 4365 234 | 4469 235 | 4115 236 | 4467 237 | 4121 238 | 4262 239 | 4191 240 | 4082 241 | 4233 242 | 4495 243 | 4497 244 | 4188 245 | 4398 246 | 4280 247 | 4467 248 | 4006 249 | 4198 250 | 4216 251 | 4216 252 | 4342 253 | 4320 254 | 4229 255 | 4068 256 | 4222 257 | 4497 258 | 4052 259 | 4069 260 | 4355 261 | 4441 262 | 4457 263 | 4327 264 | 4271 265 | 4164 266 | 4271 267 | 4083 268 | 4269 269 | 4394 270 | 4424 271 | 4035 272 | 4013 273 | 4336 274 | 4206 275 | 4463 276 | 4147 277 | 4268 278 | 4270 279 | 4389 280 | 4489 281 | 4498 282 | 4032 283 | 4053 284 | 4165 285 | 4321 286 | 4410 287 | 4180 288 | 4033 289 | 4077 290 | 4074 291 | 4224 292 | 4103 293 | 4021 294 | 4179 295 | 4223 296 | 4432 297 | 4335 298 | 4387 299 | 4090 300 | 4246 301 | 4133 302 | 4499 303 | 4023 304 | 4148 305 | 4280 306 | 4429 307 | 4007 308 | 4037 309 | 4453 310 | 4075 311 | 4023 312 | 4305 313 | 4298 314 | 4303 315 | 4422 316 | 4209 317 | 4409 318 | 4009 319 | 4301 320 | 4493 321 | 4478 322 | 4482 323 | 4240 324 | 4404 325 | 4057 326 | 4449 327 | 4103 328 | 4388 329 | 4131 330 | 4215 331 | 4169 332 | 4262 333 | 4347 334 | 4366 335 | 4274 336 | 4403 337 | 4335 338 | 4223 339 | 4442 340 | 4376 341 | 4469 342 | 4246 343 | 4476 344 | 4022 345 | 4366 346 | 4355 347 | 4283 348 | 4160 349 | 4349 350 | 4275 351 | 4013 352 | 4167 353 | 4382 354 | 4453 355 | 4100 356 | 4346 357 | 4124 358 | 4475 359 | 4134 360 | 4396 361 | 4479 362 | 4223 363 | 4117 364 | 4141 365 | 4184 366 | 4271 367 | 4361 368 | 4140 369 | 4368 370 | 4360 371 | 4415 372 | 4448 373 | 4170 374 | 4382 375 | 4045 376 | 4163 377 | 4210 378 | 4243 379 | 4363 380 | 4255 381 | 4092 382 | 4304 383 | 4048 384 | 4288 385 | 4305 386 | 4494 387 | 4457 388 | 4349 389 | 4266 390 | 4274 391 | 4333 392 | 4037 393 | 4009 394 | 4345 395 | 4282 396 | 4142 397 | 4341 398 | 4334 399 | 4202 400 | 4436 401 | 4392 402 | 4054 403 | 4363 404 | 4027 405 | 4490 406 | 4134 407 | 4495 408 | 4391 409 | 4057 410 | 4312 411 | 4136 412 | 4223 413 | 4022 414 | 4104 415 | 4169 416 | 4005 417 | 4115 418 | 4163 419 | 4275 420 | 4018 421 | 4382 422 | 4387 423 | 4011 424 | 4374 425 | 4289 426 | 4239 427 | 4356 428 | 4457 429 | 4318 430 | 4187 431 | 4177 432 | 4154 433 | 4002 434 | 4291 435 | 4244 436 | 4245 437 | 4289 438 | 4197 439 | 4229 440 | 4262 441 | 4148 442 | 4118 443 | 4406 444 | 4163 445 | 4381 446 | 4406 447 | 4131 448 | 4314 449 | 4301 450 | 4232 451 | 4270 452 | 4496 453 | 4484 454 | 4392 455 | 4047 456 | 4238 457 | 4341 458 | 4203 459 | 4203 460 | 4149 461 | 4046 462 | 4374 463 | 4494 464 | 4241 465 | 4264 466 | 4483 467 | 4131 468 | 4244 469 | 4266 470 | 4080 471 | 4192 472 | 4140 473 | 4129 474 | 4242 475 | 4393 476 | 4381 477 | 4139 478 | 4107 479 | 4267 480 | 4129 481 | 4111 482 | 4316 483 | 4380 484 | 4214 485 | 4311 486 | 4148 487 | 4230 488 | 4420 489 | 4430 490 | 4440 491 | 4012 492 | 4117 493 | 4289 494 | 4233 495 | 4242 496 | 4341 497 | 4215 498 | 4351 499 | 4216 500 | 4400 501 | 4224 502 | 4253 503 | 4141 504 | 4474 505 | 4212 506 | 4088 507 | 4263 508 | 4229 509 | 4142 510 | 4301 511 | 4042 512 | 4274 513 | 4361 514 | 4312 515 | 4492 516 | 4052 517 | 4214 518 | 4375 519 | 4177 520 | 4390 521 | 4251 522 | 4116 523 | 4235 524 | 4355 525 | 4008 526 | 4208 527 | 4141 528 | 4366 529 | 4434 530 | 4277 531 | 4383 532 | 4411 533 | 4435 534 | 4110 535 | 4303 536 | 4071 537 | 4187 538 | 4061 539 | 4132 540 | 4225 541 | 4374 542 | 4210 543 | 4397 544 | 4482 545 | 4298 546 | 4431 547 | 4371 548 | 4221 549 | 4361 550 | 4396 551 | 4044 552 | 4027 553 | 4428 554 | 4001 555 | 4148 556 | 4211 557 | 4149 558 | 4372 559 | 4023 560 | 4104 561 | 4186 562 | 4423 563 | 4303 564 | 4184 565 | 4373 566 | 4213 567 | 4194 568 | 4083 569 | 4125 570 | 4268 571 | 4017 572 | 4098 573 | 4291 574 | 4248 575 | 4114 576 | 4309 577 | 4294 578 | 4089 579 | 4358 580 | 4202 581 | 4187 582 | 4235 583 | 4381 584 | 4286 585 | 4108 586 | 4208 587 | 4481 588 | 4174 589 | 4399 590 | 4098 591 | 4222 592 | 4094 593 | 4005 594 | 4318 595 | 4210 596 | 4277 597 | 4153 598 | 4151 599 | 4058 600 | 4332 601 | 4451 602 | 4326 603 | 4126 604 | 4420 605 | 4407 606 | 4181 607 | 4289 608 | 4405 609 | 4229 610 | 4279 611 | 4391 612 | 4478 613 | 4339 614 | 4273 615 | 4073 616 | 4228 617 | 4430 618 | 4363 619 | 4363 620 | 4409 621 | 4184 622 | 4090 623 | 4284 624 | 4424 625 | 4213 626 | 4487 627 | 4193 628 | 4090 629 | 4305 630 | 4419 631 | 4411 632 | 4427 633 | 4180 634 | 4168 635 | 4092 636 | 4217 637 | 4496 638 | 4102 639 | 4151 640 | 4262 641 | 4196 642 | 4180 643 | 4158 644 | 4162 645 | 4493 646 | 4059 647 | 4394 648 | 4272 649 | 4421 650 | 4274 651 | 4278 652 | 4091 653 | 4248 654 | 4003 655 | 4216 656 | 4116 657 | 4256 658 | 4308 659 | 4095 660 | 4146 661 | 4192 662 | 4082 663 | 4022 664 | 4478 665 | 4314 666 | 4334 667 | 4318 668 | 4005 669 | 4337 670 | 4365 671 | 4161 672 | 4323 673 | 4368 674 | 4390 675 | 4054 676 | 4267 677 | 4377 678 | 4268 679 | 4423 680 | 4324 681 | 4032 682 | 4457 683 | 4408 684 | 4373 685 | 4130 686 | 4220 687 | 4420 688 | 4496 689 | 4409 690 | 4001 691 | 4388 692 | 4063 693 | 4176 694 | 4275 695 | 4404 696 | 4490 697 | 4010 698 | 4470 699 | 4268 700 | 4109 701 | 4171 702 | 4410 703 | 4144 704 | 4375 705 | 4347 706 | 4133 707 | 4197 708 | 4441 709 | 4467 710 | 4171 711 | 4060 712 | 4260 713 | 4036 714 | 4194 715 | 4250 716 | 4285 717 | 4032 718 | 4433 719 | 4186 720 | 4405 721 | 4454 722 | 4002 723 | 4120 724 | 4264 725 | 4472 726 | 4134 727 | 4279 728 | 4045 729 | 4413 730 | 4084 731 | 4441 732 | 4241 733 | 4010 734 | 4151 735 | 4389 736 | 4398 737 | 4333 738 | 4165 739 | 4295 740 | 4140 741 | 4450 742 | 4045 743 | 4278 744 | 4072 745 | 4389 746 | 4295 747 | 4342 748 | 4305 749 | 4385 750 | 4183 751 | 4476 752 | 4123 753 | 4267 754 | 4120 755 | 4279 756 | 4124 757 | 4424 758 | 4073 759 | 4324 760 | 4014 761 | 4460 762 | 4402 763 | 4109 764 | 4183 765 | 4468 766 | 4251 767 | 4333 768 | 4158 769 | 4279 770 | 4148 771 | 4317 772 | 4336 773 | 4351 774 | 4059 775 | 4073 776 | 4226 777 | 4362 778 | 4454 779 | 4090 780 | 4302 781 | 4053 782 | 4429 783 | 4323 784 | 4436 785 | 4495 786 | 4118 787 | 4031 788 | 4129 789 | 4015 790 | 4483 791 | 4054 792 | 4226 793 | 4285 794 | 4027 795 | 4474 796 | 4038 797 | 4121 798 | 4440 799 | 4279 800 | 4180 801 | 4090 802 | 4268 803 | 4395 804 | 4149 805 | 4483 806 | 4314 807 | 4254 808 | 4379 809 | 4210 810 | 4141 811 | 4394 812 | 4114 813 | 4434 814 | 4043 815 | 4001 816 | 4258 817 | 4429 818 | 4050 819 | 4071 820 | 4329 821 | 4221 822 | 4146 823 | 4176 824 | 4206 825 | 4310 826 | 4048 827 | 4151 828 | 4496 829 | 4254 830 | 4094 831 | 4140 832 | 4076 833 | 4230 834 | 4200 835 | 4270 836 | 4396 837 | 4486 838 | 4354 839 | 4191 840 | 4356 841 | 4052 842 | 4006 843 | 4490 844 | 4300 845 | 4268 846 | 4005 847 | 4298 848 | 4243 849 | 4440 850 | 4058 851 | 4289 852 | 4030 853 | 4464 854 | 4367 855 | 4056 856 | 4018 857 | 4232 858 | 4199 859 | 4475 860 | 4074 861 | 4387 862 | 4102 863 | 4236 864 | 4042 865 | 4348 866 | 4418 867 | 4171 868 | 4060 869 | 4035 870 | 4138 871 | 4155 872 | 4289 873 | 4364 874 | 4190 875 | 4293 876 | 4346 877 | 4399 878 | 4106 879 | 4486 880 | 4071 881 | 4018 882 | 4385 883 | 4223 884 | 4282 885 | 4281 886 | 4442 887 | 4468 888 | 4168 889 | 4481 890 | 4068 891 | 4453 892 | 4054 893 | 4169 894 | 4240 895 | 4297 896 | 4119 897 | 4136 898 | 4023 899 | 4106 900 | 4048 901 | 4362 902 | 4415 903 | 4373 904 | 4198 905 | 4336 906 | 4262 907 | 4449 908 | 4164 909 | 4075 910 | 4393 911 | 4480 912 | 4094 913 | 4171 914 | 4243 915 | 4141 916 | 4216 917 | 4247 918 | 4317 919 | 4272 920 | 4376 921 | 4028 922 | 4259 923 | 4078 924 | 4076 925 | 4189 926 | 4436 927 | 4225 928 | 4264 929 | 4039 930 | 4459 931 | 4409 932 | 4038 933 | 4255 934 | 4481 935 | 4217 936 | 4029 937 | 4012 938 | 4113 939 | 4487 940 | 4200 941 | 4278 942 | 4371 943 | 4399 944 | 4258 945 | 4246 946 | 4010 947 | 4226 948 | 4470 949 | 4114 950 | 4454 951 | 4409 952 | 4309 953 | 4137 954 | 4007 955 | 4100 956 | 4402 957 | 4033 958 | 4056 959 | 4039 960 | 4103 961 | 4032 962 | 4119 963 | 4242 964 | 4018 965 | 4408 966 | 4139 967 | 4088 968 | 4300 969 | 4116 970 | 4134 971 | 4067 972 | 4206 973 | 4149 974 | 4021 975 | 4281 976 | 4476 977 | 4033 978 | 4106 979 | 4012 980 | 4024 981 | 4348 982 | 4118 983 | 4181 984 | 4251 985 | 4244 986 | 4263 987 | 4203 988 | 4158 989 | 4019 990 | 4211 991 | 4478 992 | 4140 993 | 4455 994 | 4500 995 | 4214 996 | 4478 997 | 4146 998 | 4457 999 | 4044 1000 | 4004 1001 | -------------------------------------------------------------------------------- /scripts/turbopacking32.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | def howmany(bit): 4 | """ how many values are we going to pack? """ 5 | return 32 6 | #number = (64+bit-1)/bit 7 | #while((number * bit) % 8 != 0): 8 | # number += 1 9 | #return number 10 | 11 | def howmanywords(bit): 12 | return (howmany(bit) * bit + 63)//64 13 | 14 | def howmanybytes(bit): 15 | return (howmany(bit) * bit + 7)//8 16 | print(""" 17 | #ifndef INCLUDE_TURBOPACKING32_H 18 | #define INCLUDE_TURBOPACKING32_H 19 | #include // mostly for Microsoft compilers 20 | #include // part of Visual Studio 2010 and better 21 | """) 22 | print(""" 23 | #ifndef UINT64_C 24 | #define UINT64_C(c) (c ## ULL) 25 | #endif 26 | """) 27 | print(""" 28 | /** 29 | * (c) Daniel Lemire 30 | * Apache License 2.0 31 | */ 32 | /** turbopacking32 starts here **/ 33 | /** 34 | * this code mimics the way TurboPFor packs short arrays of integers. 35 | * We pack and unpack always at least a full 64-bit word, plus whatever 36 | * is necessary to get to an even number of bytes. 37 | */""") 38 | 39 | print("""typedef void (*packforblockfnc)(const uint32_t base, const uint32_t ** pin, uint8_t ** pw);""") 40 | print("""typedef void (*unpackforblockfnc)(const uint32_t base, const uint8_t ** pw, uint32_t ** pout);""") 41 | 42 | 43 | 44 | def plurial(number): 45 | if(number > 1): 46 | return "s" 47 | else : 48 | return "" 49 | 50 | print("") 51 | print("static void packforblock0(const uint32_t , const uint32_t ** pin, uint8_t ** pw) {"); 52 | print(" (void)pw;"); 53 | print(" *pin += {0}; /* we consumed {0} 32-bit integer{1} */ ".format(howmany(0),plurial(howmany(0)))); 54 | print("}"); 55 | print("") 56 | 57 | for bit in range(1,33): 58 | print("") 59 | print("/* we are going to pack {0} {1}-bit values, touching {2} 64-bit words, using {3} bytes */ ".format(howmany(bit),bit,howmanywords(bit),howmanybytes(bit))) 60 | print("static void packforblock{0}(const uint32_t base, const uint32_t ** pin, uint8_t ** pw) {{".format(bit)); 61 | print(" uint64_t * pw64 = *(uint64_t **) pw;"); 62 | print(" const uint32_t * in = *pin;"); 63 | print(" /* we are going to touch {0} 64-bit word{1} */ ".format(howmanywords(bit),plurial(howmanywords(bit)))); 64 | for k in range(howmanywords(bit)) : 65 | print(" uint64_t w{0};".format(k)) 66 | for j in range(howmany(bit)): 67 | firstword = j * bit // 64 68 | secondword = (j * bit + bit - 1)//64 69 | firstshift = (j*bit) % 64 70 | if( firstword == secondword): 71 | if(firstshift == 0): 72 | print(" w{0} = (uint64_t) (in[{1}] - base);".format(firstword,j)) 73 | else: 74 | print(" w{0} |= (uint64_t) (in[{1}] - base) << {2};".format(firstword,j,firstshift)) 75 | else: 76 | print(" w{0} |= (uint64_t) (in[{1}] - base) << {2};".format(firstword,j,firstshift)) 77 | secondshift = 64-firstshift 78 | print(" w{0} = (uint64_t) (in[{1}] - base) >> {2};".format(secondword,j,secondshift)) 79 | for k in range(howmanywords(bit)) : 80 | print(" pw64[{0}] = w{0};".format(k)) 81 | print(" *pin += {0}; /* we consumed {0} 32-bit integer{1} */ ".format(howmany(bit),plurial(howmany(bit)))); 82 | print(" *pw += {0}; /* we used up {0} output bytes */ ".format(howmanybytes(bit))); 83 | print("}"); 84 | print("") 85 | 86 | print("static void unpackforblock0(const uint32_t base, const uint8_t ** pw, uint32_t ** pout) {"); 87 | print(" (void) pw;"); 88 | print(" for(int k = 0; k < {0} ; k+= 1) {{".format(howmany(0))) 89 | print(" (*pout) [k] = base;") 90 | print(" }") 91 | print(" *pout += {0}; /* we wrote {0} 32-bit integer{1} */ ".format(howmany(0),plurial(howmany(0)))); 92 | print("}"); 93 | print("") 94 | 95 | for bit in range(1,33): 96 | print("") 97 | print("/* we packed {0} {1}-bit values, touching {2} 64-bit words, using {3} bytes */ ".format(howmany(bit),bit,howmanywords(bit),howmanybytes(bit))) 98 | print("static void unpackforblock{0}(const uint32_t base, const uint8_t ** pw, uint32_t ** pout) {{".format(bit)); 99 | print(" const uint64_t * pw64 = *(const uint64_t **) pw;"); 100 | print(" uint32_t * out = *pout;"); 101 | if(bit < 32): print(" const uint64_t mask = UINT64_C({0});".format((1< // mostly for Microsoft compilers 16 | #include // part of Visual Studio 2010 and better 17 | """) 18 | print(""" 19 | #ifndef UINT64_C 20 | #define UINT64_C(c) (c ## ULL) 21 | #endif 22 | """) 23 | print(""" 24 | /** 25 | * (c) Daniel Lemire 26 | * Apache License 2.0 27 | */ 28 | /** turbopacking64 starts here **/ 29 | /** 30 | * this code mimics the way TurboPFor packs short arrays of integers. 31 | * We pack and unpack always at least a full 64-bit word, plus whatever 32 | * is necessary to get to an even number of bytes. 33 | */""") 34 | 35 | print("""typedef void (*packforblockfnc_64)(const uint64_t base, const uint64_t ** pin, uint8_t ** pw);""") 36 | print("""typedef void (*unpackforblockfnc_64)(const uint64_t base, const uint8_t ** pw, uint64_t ** pout);""") 37 | 38 | 39 | 40 | def plurial(number): 41 | if(number > 1): 42 | return "s" 43 | else : 44 | return "" 45 | 46 | print("") 47 | print("static void packforblock0_64(const uint64_t , const uint64_t ** pin, uint8_t ** pw) {"); 48 | print(" (void)pw;"); 49 | print(" *pin += {0}; /* we consumed {0} 64-bit integer{1} */ ".format(howmany(0),plurial(howmany(0)))); 50 | print("}"); 51 | print("") 52 | 53 | for bit in range(1,65): 54 | print("") 55 | print("/* we are going to pack {0} {1}-bit values, touching {2} 64-bit words, using {3} bytes */ ".format(howmany(bit),bit,howmanywords(bit),howmanybytes(bit))) 56 | print("static void packforblock{0}_64(const uint64_t base, const uint64_t ** pin, uint8_t ** pw) {{".format(bit)); 57 | print(" uint64_t * pw64 = *(uint64_t **) pw;"); 58 | print(" const uint64_t * in = *pin;"); 59 | print(" /* we are going to touch {0} 64-bit word{1} */ ".format(howmanywords(bit),plurial(howmanywords(bit)))); 60 | for k in range(howmanywords(bit)) : 61 | print(" uint64_t w{0};".format(k)) 62 | for j in range(howmany(bit)): 63 | firstword = j * bit // 64 64 | secondword = (j * bit + bit - 1)//64 65 | firstshift = (j*bit) % 64 66 | if( firstword == secondword): 67 | if(firstshift == 0): 68 | print(" w{0} = (in[{1}] - base);".format(firstword,j)) 69 | else: 70 | print(" w{0} |= (in[{1}] - base) << {2};".format(firstword,j,firstshift)) 71 | else: 72 | print(" w{0} |= (in[{1}] - base) << {2};".format(firstword,j,firstshift)) 73 | secondshift = 64-firstshift 74 | print(" w{0} = (in[{1}] - base) >> {2};".format(secondword,j,secondshift)) 75 | for k in range(howmanywords(bit)) : 76 | print(" pw64[{0}] = w{0};".format(k)) 77 | print(" *pin += {0}; /* we consumed {0} 64-bit integer{1} */ ".format(howmany(bit),plurial(howmany(bit)))); 78 | print(" *pw += {0}; /* we used up {0} output bytes */ ".format(howmanybytes(bit))); 79 | print("}"); 80 | print("") 81 | 82 | print("static void unpackforblock0_64(const uint64_t base, const uint8_t ** pw, uint64_t ** pout) {"); 83 | print(" (void) pw;"); 84 | print(" for(int k = 0; k < {0} ; k+= 1) {{".format(howmany(0))) 85 | print(" (*pout) [k] = base;") 86 | print(" }") 87 | print(" *pout += {0}; /* we wrote {0} 64-bit integer{1} */ ".format(howmany(0),plurial(howmany(0)))); 88 | print("}"); 89 | print("") 90 | 91 | for bit in range(1,65): 92 | print("") 93 | print("/* we packed {0} {1}-bit values, touching {2} 64-bit words, using {3} bytes */ ".format(howmany(bit),bit,howmanywords(bit),howmanybytes(bit))) 94 | print("static void unpackforblock{0}_64(const uint64_t base, const uint8_t ** pw, uint64_t ** pout) {{".format(bit)); 95 | print(" const uint64_t * pw64 = *(const uint64_t **) pw;"); 96 | print(" uint64_t * out = *pout;"); 97 | if(bit < 64): print(" const uint64_t mask = UINT64_C({0});".format((1< 8 | #include 9 | #include 10 | #ifdef _OPENMP 11 | #include 12 | #endif 13 | #include 14 | #include 15 | #include 16 | #include "bpacking.h" 17 | #include "compression.h" 18 | #include "turbocompression.h" 19 | 20 | using namespace std; 21 | 22 | 23 | // 24 | // VS2012 bug: high_precision_clock is defined as system_clock and precision is 25 | // about 15 MS!! 26 | // See: https://connect.microsoft.com/VisualStudio/feedback/details/719443 27 | // 28 | // Implementation has been taken from a post on stackoverflow and adapted here 29 | // http://stackoverflow.com/questions/13263277/difference-between-stdsystem-clock-and-stdsteady-clock 30 | // 31 | #ifdef _WIN32 32 | #define NOMINMAX 33 | #define WINDOWS_LEAN_AND_MEAN 34 | #include 35 | 36 | struct qpc_clock { 37 | typedef std::chrono::nanoseconds duration; 38 | typedef duration::rep rep; 39 | typedef duration::period period; 40 | typedef std::chrono::time_point time_point; 41 | static time_point now() { 42 | static bool isInited = false; 43 | static LARGE_INTEGER frequency = { 0, 0 }; 44 | if (!isInited) { 45 | if (QueryPerformanceFrequency(&frequency) == 0) { 46 | throw std::logic_error("QueryPerformanceCounter not supported: " + 47 | std::to_string(GetLastError())); 48 | } 49 | isInited = true; 50 | } 51 | LARGE_INTEGER counter; 52 | QueryPerformanceCounter(&counter); 53 | return time_point(duration(static_cast((double)counter.QuadPart / 54 | frequency.QuadPart * 55 | period::den / period::num))); 56 | } 57 | }; 58 | 59 | #endif 60 | 61 | /** 62 | * author: Preston Bannister 63 | */ 64 | class WallClockTimer { 65 | public: 66 | #ifdef _WIN32 67 | typedef qpc_clock clock; 68 | #else 69 | typedef std::chrono::high_resolution_clock clock; 70 | #endif 71 | 72 | std::chrono::time_point t1, t2; 73 | WallClockTimer() : t1(), t2() { 74 | t1 = clock::now(); 75 | t2 = t1; 76 | } 77 | void reset() { 78 | t1 = clock::now(); 79 | t2 = t1; 80 | } 81 | uint64_t elapsed() { 82 | std::chrono::microseconds delta = 83 | std::chrono::duration_cast(t2 - t1); 84 | return delta.count(); 85 | } 86 | uint64_t split() { 87 | t2 = clock::now(); 88 | return elapsed(); 89 | } 90 | }; 91 | 92 | #ifndef _WIN32 93 | #include 94 | #include 95 | class CPUTimer { 96 | public: 97 | // clock_t t1, t2; 98 | struct rusage t1, t2; 99 | 100 | CPUTimer() : t1(), t2() { 101 | getrusage(RUSAGE_SELF, &t1); 102 | // t1 = clock(); 103 | t2 = t1; 104 | } 105 | void reset() { 106 | getrusage(RUSAGE_SELF, &t1); 107 | t2 = t1; 108 | } 109 | // proxy for userelapsed 110 | uint64_t elapsed() { return totalelapsed(); } 111 | 112 | uint64_t totalelapsed() { return userelapsed() + systemelapsed(); } 113 | // returns the *user* CPU time in micro seconds (mu s) 114 | uint64_t userelapsed() { 115 | return ((t2.ru_utime.tv_sec - t1.ru_utime.tv_sec) * 1000ULL * 1000ULL) + 116 | ((t2.ru_utime.tv_usec - t1.ru_utime.tv_usec)); 117 | } 118 | 119 | // returns the *system* CPU time in micro seconds (mu s) 120 | uint64_t systemelapsed() { 121 | return ((t2.ru_stime.tv_sec - t1.ru_stime.tv_sec) * 1000ULL * 1000ULL) + 122 | ((t2.ru_stime.tv_usec - t1.ru_stime.tv_usec)); 123 | } 124 | 125 | uint64_t split() { 126 | getrusage(RUSAGE_SELF, &t2); 127 | return elapsed(); 128 | } 129 | }; 130 | 131 | #endif 132 | 133 | void displayUsage() { cout << "run as test nameoffile" << endl; } 134 | 135 | vector loadVector(string filename) { 136 | vector answer; 137 | answer.reserve(1024 * 32); // expect sizeable arrays 138 | ifstream logFile(filename.c_str()); 139 | if (!logFile.is_open()) { 140 | cerr << " Couldn't open query vector file " << filename << endl; 141 | displayUsage(); 142 | return answer; 143 | } 144 | cout << "# Parsing vector file " << filename << endl; 145 | 146 | logFile.exceptions(ios::badbit); // will throw an exception if something goes 147 | // wrong, saves us the trouble of checking 148 | // the IO status 149 | string line; 150 | for (; logFile && getline(logFile, line);) { 151 | uint32_t id = atoi(line.c_str()); 152 | answer.push_back(id); 153 | } 154 | return answer; 155 | } 156 | 157 | void unit() { 158 | vector test; 159 | for (uint32_t i = 0; i < 100; ++i) 160 | test.push_back(i); 161 | vector comp(test.size() + 1024); 162 | vector recover(test.size() + 1024); 163 | 164 | compress(test.data(), test.size(), comp.data()); 165 | uint32_t nvalue = 0; 166 | uncompress(comp.data(), recover.data(), nvalue); 167 | recover.resize(nvalue); 168 | 169 | if (recover != test) 170 | throw runtime_error("bug"); 171 | } 172 | 173 | void deepunit() { 174 | for (int k = 0; k < 32; ++k) { 175 | vector test; 176 | for (uint32_t i = 0; i < 100; ++i) { 177 | test.push_back(0); 178 | test.push_back(0xFFFFFFFF >> k); 179 | } 180 | vector comp(test.size() + 1024); 181 | vector recover(test.size() + 1024); 182 | 183 | compress(test.data(), test.size(), comp.data()); 184 | uint32_t nvalue = 0; 185 | uncompress(comp.data(), recover.data(), nvalue); 186 | recover.resize(nvalue); 187 | 188 | if (recover != test) 189 | throw runtime_error("bug"); 190 | } 191 | } 192 | 193 | void turbounit() { 194 | vector test; 195 | for (uint32_t i = 0; i < 100; ++i) 196 | test.push_back(i); 197 | vector comp(4 * test.size() + 1024); 198 | vector recover(test.size() + 1024); 199 | 200 | turbocompress(test.data(), test.size(), comp.data()); 201 | uint32_t nvalue = 0; 202 | turbouncompress(comp.data(), recover.data(), nvalue); 203 | 204 | recover.resize(nvalue); 205 | 206 | if (recover != test) 207 | throw runtime_error("bug"); 208 | } 209 | 210 | void turbodeepunit() { 211 | for (int k = 31; k >= 0; --k) { 212 | vector test; 213 | for (uint32_t i = 0; i < 100; ++i) { 214 | test.push_back(0); 215 | test.push_back(0xFFFFFFFF >> k); 216 | } 217 | vector comp(test.size() * 4 + 1024); 218 | vector recover(test.size() + 1024); 219 | 220 | turbocompress(test.data(), test.size(), comp.data()); 221 | uint32_t nvalue = 0; 222 | turbouncompress(comp.data(), recover.data(), nvalue); 223 | recover.resize(nvalue); 224 | if (recover != test) { 225 | throw runtime_error("bug"); 226 | } 227 | } 228 | } 229 | 230 | 231 | void turbounit64() { 232 | vector test; 233 | for (uint64_t i = 0; i < 100; ++i) 234 | test.push_back(i); 235 | vector comp(4 * test.size() + 1024); 236 | vector recover(test.size() + 1024); 237 | 238 | turbocompress64(test.data(), test.size(), comp.data()); 239 | uint32_t nvalue = 0; 240 | turbouncompress64(comp.data(), recover.data(), nvalue); 241 | 242 | recover.resize(nvalue); 243 | if (recover != test) 244 | throw runtime_error("bug64"); 245 | } 246 | 247 | void turbodeepunit64() { 248 | for (int k = 31; k >= 0; --k) { 249 | vector test; 250 | for (uint64_t i = 0; i < 100; ++i) { 251 | test.push_back(0); 252 | test.push_back(0xFFFFFFFF >> k); 253 | } 254 | vector comp(test.size() * 4 + 1024); 255 | vector recover(test.size() + 1024); 256 | 257 | turbocompress64(test.data(), test.size(), comp.data()); 258 | uint32_t nvalue = 0; 259 | turbouncompress64(comp.data(), recover.data(), nvalue); 260 | recover.resize(nvalue); 261 | if (recover != test) { 262 | throw runtime_error("bug64d"); 263 | } 264 | } 265 | } 266 | 267 | 268 | void benchmark(vector &data) { 269 | std::cout << "[standard benchmark]" << std::endl; 270 | vector buffer(data.size()); 271 | 272 | if (data.size() == 0) { 273 | cout << "Empty vector" << endl; 274 | return; 275 | } 276 | cout << "vector size = " << data.size() << endl; 277 | cout << "vector size = " << data.size() * sizeof(uint32_t) / 1024.0 << "KB" 278 | << endl; 279 | 280 | vector compdata(data.size() + 2048); 281 | uint32_t *out = compress(data.data(), data.size(), compdata.data()); 282 | cout << "compression rate:" << setprecision(2) 283 | << data.size() * 1.0 / (out - compdata.data()) << endl; 284 | cout << "bits/int:" << setprecision(4) 285 | << (out - compdata.data()) * 32.0 / data.size() << endl; 286 | cout << "volume: " << setprecision(2) << (out - compdata.data()) * 4.0 / 1024 287 | << "KB" << endl; 288 | 289 | uint32_t nvalue = 0; 290 | uncompress(compdata.data(), buffer.data(), nvalue); 291 | buffer.resize(nvalue); 292 | if (buffer != data) 293 | throw runtime_error("bug"); 294 | 295 | double numberofintegers = 0; 296 | int N = (1 << 28) / data.size(); 297 | uint32_t bogus = 0; 298 | WallClockTimer timer; 299 | for (int k = 0; k < N; ++k) { 300 | uncompress(compdata.data(), buffer.data(), nvalue); 301 | numberofintegers += nvalue; 302 | bogus += buffer.back() + buffer.front(); 303 | } 304 | uint64_t timems = timer.split(); 305 | cout << "decoding time per int: " << setprecision(2) 306 | << timems / numberofintegers * 1000 * 1000 << "ns" << endl; 307 | cout << "decoding time per array: " << setprecision(2) 308 | << static_cast(timems) / N * 1000 << "ms" << endl; 309 | cout << "# ignore me " << bogus << endl; 310 | cout << endl; 311 | } 312 | 313 | void turbobenchmark(vector &data) { 314 | std::cout << "[turbo benchmark]" << std::endl; 315 | vector buffer(data.size()); 316 | 317 | if (data.size() == 0) { 318 | cout << "Empty vector" << endl; 319 | return; 320 | } 321 | cout << "vector size = " << data.size() << endl; 322 | cout << "vector size = " << data.size() * sizeof(uint32_t) / 1024.0 << "KB" 323 | << endl; 324 | 325 | vector compdata(data.size() * sizeof(uint32_t) + 2048); 326 | const uint8_t *out = turbocompress(data.data(), data.size(), compdata.data()); 327 | cout << "compression rate:" << setprecision(2) 328 | << data.size() * 1.0 * sizeof(uint32_t) / (out - compdata.data()) 329 | << endl; 330 | cout << "bits/int:" << setprecision(4) 331 | << (out - compdata.data()) * 8.0 / data.size() << endl; 332 | cout << "volume: " << setprecision(2) << (out - compdata.data()) * 1.0 / 1024 333 | << "KB" << endl; 334 | 335 | uint32_t nvalue = 0; 336 | turbouncompress(compdata.data(), buffer.data(), nvalue); 337 | buffer.resize(nvalue); 338 | if (buffer != data) 339 | throw runtime_error("bug"); 340 | 341 | double numberofintegers = 0; 342 | int N = (1 << 28) / data.size(); 343 | uint32_t bogus = 0; 344 | WallClockTimer timer; 345 | for (int k = 0; k < N; ++k) { 346 | turbouncompress(compdata.data(), buffer.data(), nvalue); 347 | numberofintegers += nvalue; 348 | bogus += buffer.back() + buffer.front(); 349 | } 350 | uint64_t timems = timer.split(); 351 | cout << "decoding time per int: " << setprecision(2) 352 | << timems / numberofintegers * 1000 * 1000 << "ns" << endl; 353 | cout << "decoding time per array: " << setprecision(2) 354 | << static_cast(timems) / N * 1000 << "ms" << endl; 355 | cout << "# ignore me " << bogus << endl; 356 | cout << endl; 357 | } 358 | 359 | void turbobenchmark64(vector &data32) { 360 | std::cout << "[turbo benchmark64]" << std::endl; 361 | vector data; 362 | 363 | for (vector::const_iterator i = data32.begin(); i != data32.end(); ++i) 364 | data.push_back(*i); 365 | 366 | vector buffer(data); 367 | 368 | if (data.size() == 0) { 369 | cout << "Empty vector" << endl; 370 | return; 371 | } 372 | cout << "vector size = " << data.size() << endl; 373 | cout << "vector size = " << data.size() * sizeof(uint64_t) / 1024.0 << "KB" 374 | << endl; 375 | 376 | vector compdata(data.size() * sizeof(uint32_t) + 2048); 377 | const uint8_t *out = turbocompress64(data.data(), data.size(), compdata.data()); 378 | cout << "compression rate:" << setprecision(2) 379 | << data.size() * 1.0 * sizeof(uint64_t) / (out - compdata.data()) 380 | << endl; 381 | cout << "bits/int:" << setprecision(4) 382 | << (out - compdata.data()) * 8.0 / data.size() << endl; 383 | cout << "volume: " << setprecision(2) << (out - compdata.data()) * 1.0 / 1024 384 | << "KB" << endl; 385 | 386 | uint32_t nvalue = 0; 387 | turbouncompress64(compdata.data(), buffer.data(), nvalue); 388 | buffer.resize(nvalue); 389 | if (buffer != data) 390 | throw runtime_error("bug"); 391 | 392 | double numberofintegers = 0; 393 | int N = (1 << 28) / data.size(); 394 | uint32_t bogus = 0; 395 | WallClockTimer timer; 396 | for (int k = 0; k < N; ++k) { 397 | turbouncompress64(compdata.data(), buffer.data(), nvalue); 398 | numberofintegers += nvalue; 399 | bogus += buffer.back() + buffer.front(); 400 | } 401 | uint64_t timems = timer.split(); 402 | cout << "decoding time per int: " << setprecision(2) 403 | << timems / numberofintegers * 1000 * 1000 << "ns" << endl; 404 | cout << "decoding time per array: " << setprecision(2) 405 | << static_cast(timems) / N * 1000 << "ms" << endl; 406 | cout << "# ignore me " << bogus << endl; 407 | cout << endl; 408 | } 409 | 410 | int main(int argc, char **argv) { 411 | unit(); 412 | deepunit(); 413 | 414 | turbounit(); 415 | turbodeepunit(); 416 | 417 | turbounit64(); 418 | turbodeepunit64(); 419 | if (argc <= 1) { 420 | displayUsage(); 421 | return -1; 422 | } 423 | string filename = argv[1]; 424 | #ifdef _OPENMP 425 | cout << "OpenMP support is available" << endl; 426 | int k; 427 | #pragma omp parallel 428 | { 429 | #pragma omp master 430 | { 431 | k = omp_get_num_threads(); 432 | cout << "Number of Threads requested = " << k << endl; 433 | } 434 | } 435 | #else 436 | cout << "No OpenMP support" << endl; 437 | #endif 438 | 439 | #ifdef _OPENMP 440 | k = 0; 441 | #pragma omp parallel 442 | #pragma omp atomic 443 | k++; 444 | cout << "Number of Threads counted = " << k << endl; 445 | #endif 446 | 447 | cout << "####### processing " << filename << endl; 448 | vector data = loadVector(filename); 449 | cout << endl; 450 | 451 | benchmark(data); 452 | turbobenchmark(data); 453 | turbobenchmark64(data); 454 | 455 | return 0; 456 | } 457 | --------------------------------------------------------------------------------