├── .gitattributes ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── benchmark.c ├── for-gen.c ├── for.c ├── for.h ├── gen.pl ├── msvc2013 ├── libfor │ └── libfor.vcxproj ├── msvc2013.sln └── test │ └── test.vcxproj └── test.c /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | ############################################################################### 7 | # Set default behavior for command prompt diff. 8 | # 9 | # This is need for earlier builds of msysgit that does not have it on by 10 | # default for csharp files. 11 | # Note: This is only used by command line 12 | ############################################################################### 13 | #*.cs diff=csharp 14 | 15 | ############################################################################### 16 | # Set the merge driver for project and solution files 17 | # 18 | # Merging from the command prompt will add diff markers to the files if there 19 | # are conflicts (Merging from VS is not affected by the settings below, in VS 20 | # the diff markers are never inserted). Diff markers may cause the following 21 | # file extensions to fail to load in VS. An alternative would be to treat 22 | # these files as binary and thus will always conflict and require user 23 | # intervention with every merge. To do so, just uncomment the entries below 24 | ############################################################################### 25 | #*.sln merge=binary 26 | #*.csproj merge=binary 27 | #*.vbproj merge=binary 28 | #*.vcxproj merge=binary 29 | #*.vcproj merge=binary 30 | #*.dbproj merge=binary 31 | #*.fsproj merge=binary 32 | #*.lsproj merge=binary 33 | #*.wixproj merge=binary 34 | #*.modelproj merge=binary 35 | #*.sqlproj merge=binary 36 | #*.wwaproj merge=binary 37 | 38 | ############################################################################### 39 | # behavior for image files 40 | # 41 | # image files are treated as binary by default. 42 | ############################################################################### 43 | #*.jpg binary 44 | #*.png binary 45 | #*.gif binary 46 | 47 | ############################################################################### 48 | # diff behavior for common document formats 49 | # 50 | # Convert binary document formats to text before diffing them. This feature 51 | # is only available from the command line. Turn it on by uncommenting the 52 | # entries below. 53 | ############################################################################### 54 | #*.doc diff=astextplain 55 | #*.DOC diff=astextplain 56 | #*.docx diff=astextplain 57 | #*.DOCX diff=astextplain 58 | #*.dot diff=astextplain 59 | #*.DOT diff=astextplain 60 | #*.pdf diff=astextplain 61 | #*.PDF diff=astextplain 62 | #*.rtf diff=astextplain 63 | #*.RTF diff=astextplain 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | test 2 | benchmark 3 | perf.data 4 | 5 | # Object files 6 | *.o 7 | *.ko 8 | *.obj 9 | *.elf 10 | 11 | # Precompiled Headers 12 | *.gch 13 | *.pch 14 | 15 | # Libraries 16 | *.lib 17 | *.a 18 | *.la 19 | *.lo 20 | 21 | # Shared objects (inc. Windows DLLs) 22 | *.dll 23 | *.so 24 | *.so.* 25 | *.dylib 26 | 27 | # Executables 28 | *.exe 29 | *.out 30 | *.app 31 | *.i*86 32 | *.x86_64 33 | *.hex 34 | 35 | # Debug files 36 | *.dSYM/ 37 | Debug 38 | Release 39 | *.filters 40 | *.suo 41 | *sdf 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .SUFFIXES: 2 | # 3 | .SUFFIXES: .cpp .o .c .h 4 | # replace the CXX variable with a path to a C++11 compatible compiler. 5 | ifeq ($(INTEL), 1) 6 | # if you wish to use the Intel compiler, please do "make INTEL=1". 7 | CXX ?= /opt/intel/bin/icpc 8 | CC ?= /opt/intel/bin/icpc 9 | ifeq ($(DEBUG),1) 10 | CFLAGS = -std=c99 -O3 -Wall -ansi -DDEBUG=1 -D_GLIBCXX_DEBUG -ggdb 11 | else 12 | CFLAGS = -std=c99 -O2 -Wall -ansi -DNDEBUG=1 -ggdb 13 | endif # debug 14 | else #intel 15 | ifeq ($(DEBUG),1) 16 | CFLAGS = -pedantic -ggdb -DDEBUG=1 -D_GLIBCXX_DEBUG -Wall -Wextra 17 | else 18 | CFLAGS = -pedantic -Wall -Wextra -O3 19 | endif #debug 20 | endif #intel 21 | 22 | 23 | HEADERS= $(shell ls *h) 24 | 25 | all: benchmark test libfor.a 26 | echo "please run unit tests by running ./test" 27 | 28 | for.o: for.h for.c for-gen.c 29 | $(CC) $(CFLAGS) -c for.c 30 | 31 | for: $(HEADERS) for.o 32 | ar rvs libfor.a for.o 33 | 34 | gen: gen.pl 35 | perl gen.pl > for-gen.c 36 | 37 | test: for $(HEADERS) test.c $(OBJECTS) 38 | $(CC) $(CFLAGS) -o test test.c $(OBJECTS) libfor.a 39 | 40 | benchmark: for $(HEADERS) benchmark.c 41 | $(CC) $(CFLAGS) -o benchmark benchmark.c libfor.a 42 | 43 | clean: 44 | rm -f *.o test benchmark 45 | 46 | .PHONY: all clean test 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | libfor - Fast C Library for Frame of Reference Integer Compression 2 | ====================== 3 | 4 | An ANSI C library with a fast scalar (non-SIMD) implementation for 5 | Frame of Reference (FOR) integer compression. It works on Linux, 6 | Microsoft Windows and most likely all other sane systems. 7 | 8 | FOR can compress sorted and unsorted integer sequences. 9 | 10 | In addition, the library can perform operations directly on compressed data: 11 | 12 | * select: returns a value at a specified index 13 | * linear search: for unsorted sequences, or short sorted sequences 14 | * lower bound search: based on binary search, for sorted sequences 15 | * append: appends an integer to a compressed sequence 16 | 17 | Simple demo 18 | ------------------------ 19 | 20 | #define LEN 100 21 | uint32_t in[LEN] = {0}; 22 | uint8_t out[512]; 23 | 24 | // Fill |in| with numbers of your choice 25 | for (int i = 0; i < LEN; i++) 26 | in[i] = i; 27 | 28 | // Now compress; can also use for_compress_sorted() if the numbers 29 | // are sorted. This is slightly faster. 30 | uint32_t size = for_compress_unsorted(&in[0], &out[0], LEN); 31 | printf("compressing %u integers (%u bytes) into %u bytes\n", 32 | LEN, LEN * 4, size); 33 | 34 | // Decompress again 35 | uint32_t decompressed[LEN]; 36 | for_uncompress(&out[0], &decompressed[0], LEN); 37 | 38 | Usage 39 | ------------------------ 40 | 41 | It can't be more simple: 42 | 43 | make 44 | 45 | To run the tests: 46 | 47 | ./test 48 | 49 | Where is this used? 50 | ---------------------- 51 | 52 | I use this library to compress 32bit integers for upscaledb, a very 53 | fast embedded key/value store (see http://upscaledb.com). 54 | 55 | If you would like me to add your application to this list then please send 56 | me a mail at chris@crupp.de. 57 | 58 | Licensing 59 | ------------------------ 60 | 61 | Apache License, Version 2.0 62 | 63 | Requirements 64 | ------------------------ 65 | 66 | This library only works with little-endian CPUs. 67 | 68 | Tested on Linux and Windows (Visual Studio 2013). Porting it should not 69 | be difficult. 70 | 71 | Acknowledgement 72 | ------------------------ 73 | 74 | This work is based on Daniel Lemire (http://lemire.me)'s ideas and 75 | implementation at https://github.com/lemire/FrameOfReference. 76 | 77 | For further information, see 78 | * Goldstein J, Ramakrishnan R, Shaft U. Compressing relations and indexes. Proceedings of the Fourteenth International Conference on Data Engineering, ICDE ’98, IEEE Computer Society: Washington, DC, USA, 1998; 370–379. 79 | * Daniel Lemire and Leonid Boytsov, Decoding billions of integers per second through vectorization, Software Practice & Experience 45 (1), 2015. http://arxiv.org/abs/1209.2137 http://onlinelibrary.wiley.com/doi/10.1002/spe.2203/abstract 80 | * Daniel Lemire, Leonid Boytsov, Nathan Kurz, SIMD Compression and the Intersection of Sorted Integers, Software Practice & Experience (to appear) http://arxiv.org/abs/1401.6399 81 | * Jeff Plaisance, Nathan Kurz, Daniel Lemire, Vectorized VByte Decoding, International Symposium on Web Algorithms 2015, 2015. http://arxiv.org/abs/1503.07387 82 | * Wayne Xin Zhao, Xudong Zhang, Daniel Lemire, Dongdong Shan, Jian-Yun Nie, Hongfei Yan, Ji-Rong Wen, A General SIMD-based Approach to Accelerating Compression Algorithms, ACM Transactions on Information Systems 33 (3), 2015. http://arxiv.org/abs/1502.01916 83 | 84 | 85 | -------------------------------------------------------------------------------- /benchmark.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de). 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include 17 | #include 18 | 19 | #include "for.h" 20 | 21 | #define VERIFY(c) while (!c) { \ 22 | printf("%s:%d: expression failed\n", \ 23 | __FILE__, __LINE__); \ 24 | exit(-1); \ 25 | } 26 | 27 | #define VERIFY_ARRAY(a1, a2, len) \ 28 | do { \ 29 | uint32_t i; \ 30 | for (i = 0; i < len; i++) { \ 31 | if (a1[i] != a2[i]) { \ 32 | printf("data mismatch at %u\n", i); \ 33 | exit(-1); \ 34 | } \ 35 | } \ 36 | } while (0) 37 | 38 | static void 39 | run(uint32_t length) 40 | { 41 | uint32_t i, s1, s2, s3; 42 | uint8_t *out = (uint8_t *) malloc(length * sizeof(uint32_t)); 43 | uint32_t *in = (uint32_t *)malloc(length * sizeof(uint32_t)); 44 | uint32_t *tmp = (uint32_t *)malloc(length * sizeof(uint32_t)); 45 | 46 | for (i = 0; i < length; i++) 47 | in[i] = 33 + i; 48 | 49 | s1 = for_compress_sorted(in, out, length); 50 | s2 = for_uncompress(out, tmp, length); 51 | s3 = for_compressed_size_sorted(in, length); 52 | VERIFY(s1 == s2); 53 | VERIFY(s2 == s3); 54 | /* VERIFY_ARRAY(in, tmp, length); */ 55 | 56 | free(in); 57 | free(out); 58 | free(tmp); 59 | } 60 | 61 | int 62 | main() 63 | { 64 | run(1024 * 1024 * 10); /* 10 mb */ 65 | return 0; 66 | } 67 | -------------------------------------------------------------------------------- /for.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de). 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include "for.h" 17 | 18 | #include 19 | #include /* for malloc, free */ 20 | #include /* for memcpy */ 21 | 22 | #if defined(_MSC_VER) && _MSC_VER < 1600 23 | typedef unsigned int uint32_t; 24 | typedef unsigned char uint8_t; 25 | typedef signed char int8_t; 26 | #else 27 | # include 28 | #endif 29 | 30 | #define METADATA 5 /* size of metadata overhead */ 31 | 32 | #ifdef _MSC_VER 33 | # define INLINE __inline 34 | # include 35 | 36 | uint32_t __inline CLZ(uint32_t value) { 37 | uint32_t leading_zero = 0; 38 | _BitScanReverse(&leading_zero, value); 39 | return 31 - leading_zero; 40 | } 41 | #else 42 | # define INLINE inline 43 | # define CLZ __builtin_clz 44 | #endif 45 | 46 | typedef uint32_t(*for_unpackfunc_t) (uint32_t, const uint8_t *, uint32_t *); 47 | typedef uint32_t(*for_packfunc_t) (uint32_t, const uint32_t *, uint8_t *); 48 | typedef uint32_t(*for_unpackxfunc_t) (uint32_t, const uint8_t *, uint32_t *, 49 | uint32_t); 50 | typedef uint32_t(*for_packxfunc_t) (uint32_t, const uint32_t *, uint8_t *, 51 | uint32_t); 52 | typedef uint32_t(*for_linsearchfunc_t)(uint32_t, const uint8_t *, uint32_t, 53 | int *); 54 | typedef uint32_t(*for_linsearchxfunc_t)(uint32_t, const uint8_t *, uint32_t, 55 | uint32_t, int *); 56 | 57 | /* include the generated file */ 58 | #include "for-gen.c" 59 | 60 | static INLINE uint32_t 61 | required_bits(const uint32_t v) 62 | { 63 | return v == 0 ? 0 : 32 - CLZ(v); 64 | } 65 | 66 | uint32_t 67 | for_compressed_size_bits(uint32_t length, uint32_t bits) 68 | { 69 | uint32_t c = 0; 70 | uint32_t b; 71 | 72 | assert(bits <= 32); 73 | 74 | /* each block is byte-aligned */ 75 | if (length >= 32) { 76 | b = length / 32; 77 | c += ((b * 32 * bits) + 7) / 8; 78 | length %= 32; 79 | } 80 | 81 | if (length >= 16) { 82 | b = length / 16; 83 | c += ((b * 16 * bits) + 7) / 8; 84 | length %= 16; 85 | } 86 | 87 | if (length >= 8) { 88 | b = length / 8; 89 | c += ((b * 8 * bits) + 7) / 8; 90 | length %= 8; 91 | } 92 | 93 | return c + ((length * bits) + 7) / 8; 94 | } 95 | 96 | uint32_t 97 | for_compressed_size_unsorted(const uint32_t *in, uint32_t length) 98 | { 99 | uint32_t i, b, m, M; 100 | 101 | if (length == 0) 102 | return 0; 103 | 104 | /* calculate min/max */ 105 | m = in[0]; 106 | M = m; 107 | 108 | for (i = 1; i < length; i++) { 109 | if (in[i] < m) 110 | m = in[i]; 111 | if (in[i] > M) 112 | M = in[i]; 113 | } 114 | 115 | /* calculate the bits */ 116 | b = required_bits(M - m); 117 | 118 | return METADATA + for_compressed_size_bits(length, b); 119 | } 120 | 121 | uint32_t 122 | for_compressed_size_sorted(const uint32_t *in, uint32_t length) 123 | { 124 | uint32_t b, m, M; 125 | 126 | if (length == 0) 127 | return 0; 128 | 129 | /* calculate min/max */ 130 | m = in[0]; 131 | M = in[length - 1]; 132 | 133 | /* calculate the bits */ 134 | b = required_bits(M - m); 135 | 136 | return METADATA + for_compressed_size_bits(length, b); 137 | } 138 | 139 | uint32_t 140 | for_compress_bits(const uint32_t *in, uint8_t *out, uint32_t length, 141 | uint32_t base, uint32_t bits) 142 | { 143 | uint32_t i = 0; 144 | uint32_t written = 0; 145 | 146 | assert(bits <= 32); 147 | 148 | for (; i + 32 <= length; i += 32, in += 32) 149 | written += for_pack32[bits](base, in, out + written); 150 | 151 | for (; i + 16 <= length; i += 16, in += 16) 152 | written += for_pack16[bits](base, in, out + written); 153 | 154 | for (; i + 8 <= length; i += 8, in += 8) 155 | written += for_pack8[bits](base, in, out + written); 156 | 157 | return written + for_packx[bits](base, in, out + written, length - i); 158 | } 159 | 160 | uint32_t 161 | for_compress_unsorted(const uint32_t *in, uint8_t *out, uint32_t length) 162 | { 163 | uint32_t i, b, m, M; 164 | 165 | if (length == 0) 166 | return 0; 167 | 168 | /* calculate min/max */ 169 | m = in[0]; 170 | M = m; 171 | 172 | for (i = 1; i < length; i++) { 173 | if (in[i] < m) 174 | m = in[i]; 175 | if (in[i] > M) 176 | M = in[i]; 177 | } 178 | 179 | /* calculate the bits */ 180 | b = required_bits(M - m); 181 | 182 | /* store m and the bits */ 183 | *(uint32_t *)(out + 0) = m; 184 | *(uint8_t *) (out + 4) = b; 185 | return METADATA + for_compress_bits(in, out + METADATA, length, m, b); 186 | } 187 | 188 | uint32_t 189 | for_compress_sorted(const uint32_t *in, uint8_t *out, uint32_t length) 190 | { 191 | uint32_t m, M, b; 192 | 193 | if (length == 0) 194 | return 0; 195 | 196 | /* fetch min/max */ 197 | m = in[0]; 198 | M = in[length - 1]; 199 | 200 | /* calculate the bits */ 201 | b = required_bits(M - m); 202 | 203 | /* store m and the bits */ 204 | *(uint32_t *)(out + 0) = m; 205 | *(uint8_t *) (out + 4) = b; 206 | 207 | return METADATA + for_compress_bits(in, out + METADATA, length, m, b); 208 | } 209 | 210 | uint32_t 211 | for_uncompress_bits(const uint8_t *in, uint32_t *out, uint32_t length, 212 | uint32_t base, uint32_t bits) 213 | { 214 | uint32_t i = 0; 215 | const uint8_t *bin = in; 216 | 217 | assert(bits <= 32); 218 | 219 | for (; i + 32 <= length; i += 32, out += 32) 220 | in += for_unpack32[bits](base, in, out); 221 | 222 | for (; i + 16 <= length; i += 16, out += 16) 223 | in += for_unpack16[bits](base, in, out); 224 | 225 | for (; i + 8 <= length; i += 8, out += 8) 226 | in += for_unpack8[bits](base, in, out); 227 | 228 | return (in - bin) + for_unpackx[bits](base, in, out, length - i); 229 | } 230 | 231 | uint32_t 232 | for_uncompress(const uint8_t *in, uint32_t *out, uint32_t length) 233 | { 234 | uint32_t m, b; 235 | 236 | if (length == 0) 237 | return 0; 238 | 239 | /* load min and the bits */ 240 | m = *(uint32_t *)(in + 0); 241 | b = *(in + 4); 242 | 243 | return METADATA + for_uncompress_bits(in + METADATA, out, length, m, b); 244 | } 245 | 246 | uint32_t 247 | for_append_bits(uint8_t *in, uint32_t length, uint32_t base, 248 | uint32_t bits, uint32_t value) 249 | { 250 | uint32_t b, start; 251 | uint8_t *initin = in; 252 | uint32_t *in32 = (uint32_t *)in; 253 | 254 | assert(bits <= 32); 255 | assert(required_bits(value - base) <= bits); 256 | assert(value >= base); 257 | 258 | if (bits == 32) { 259 | in32[length] = value - base; 260 | return (length + 1) * sizeof(uint32_t); 261 | } 262 | 263 | if (length > 32) { 264 | b = length / 32; 265 | in += (b * 32 * bits) / 8; 266 | length %= 32; 267 | } 268 | 269 | if (length > 16) { 270 | b = length / 16; 271 | in += (b * 16 * bits) / 8; 272 | length %= 16; 273 | } 274 | 275 | if (length > 8) { 276 | b = length / 8; 277 | in += (b * 8 * bits) / 8; 278 | length %= 8; 279 | } 280 | 281 | start = length * bits; 282 | 283 | in += start / 8; 284 | start %= 8; 285 | 286 | /* |in| now points to the byte where the new value will be stored */ 287 | /* |start| is the bit position where the compressed value starts */ 288 | 289 | in32 = (uint32_t *)in; 290 | 291 | value -= base; 292 | 293 | /* easy common case: the compressed value is not split between words */ 294 | if (start + bits < 32) { 295 | uint32_t mask = (1 << bits) - 1; 296 | *in32 &= ~(mask << start); 297 | *in32 |= value << start; 298 | } 299 | /* not so easy: store value in two words */ 300 | else { 301 | uint32_t mask1 = (1 << bits) - 1; 302 | uint32_t mask2 = (1 << (bits - (32 - start))) - 1; 303 | *(in32 + 0) &= ~(mask1 << start); 304 | *(in32 + 0) |= (value & mask1) << start; 305 | *(in32 + 1) &= ~mask2; 306 | *(in32 + 1) |= value >> (32 - start); 307 | } 308 | 309 | return (in - initin) + ((start + bits) + 7) / 8; 310 | } 311 | 312 | typedef uint32_t (* append_impl)(const uint32_t *in, uint8_t *out, 313 | uint32_t length); 314 | 315 | static uint32_t 316 | for_append_impl(uint8_t *in, uint32_t length, uint32_t value, append_impl impl) 317 | { 318 | uint32_t m, b, bnew, s; 319 | 320 | if (length == 0) 321 | return impl(&value, in, 1); 322 | 323 | /* load min and the bits */ 324 | m = *(uint32_t *)(in + 0); 325 | b = *(in + 4); 326 | 327 | /* if the new value cannot be stored in |b| bits then re-encode the whole 328 | * sequence */ 329 | bnew = required_bits(value - m); 330 | if (m > value || bnew > b) { 331 | uint32_t *tmp = (uint32_t *)malloc(sizeof(uint32_t) * (length + 1)); 332 | if (!tmp) 333 | return 0; 334 | for_uncompress(in, tmp, length); 335 | tmp[length] = value; 336 | s = impl(tmp, in, length + 1); 337 | free(tmp); 338 | return s; 339 | } 340 | 341 | return METADATA + for_append_bits(in + METADATA, length, m, b, value); 342 | } 343 | 344 | uint32_t 345 | for_append_unsorted(uint8_t *in, uint32_t length, uint32_t value) 346 | { 347 | return for_append_impl(in, length, value, for_compress_unsorted); 348 | } 349 | 350 | uint32_t 351 | for_append_sorted(uint8_t *in, uint32_t length, uint32_t value) 352 | { 353 | return for_append_impl(in, length, value, for_compress_sorted); 354 | } 355 | 356 | uint32_t 357 | for_select_bits(const uint8_t *in, uint32_t base, uint32_t bits, 358 | uint32_t index) 359 | { 360 | uint32_t b, start; 361 | const uint32_t *in32; 362 | 363 | assert(bits <= 32); 364 | 365 | if (bits == 32) { 366 | in32 = (uint32_t *)in; 367 | return base + in32[index]; 368 | } 369 | 370 | if (index > 32) { 371 | b = index / 32; 372 | in += (b * 32 * bits) / 8; 373 | index %= 32; 374 | } 375 | 376 | if (index > 16) { 377 | b = index / 16; 378 | in += (b * 16 * bits) / 8; 379 | index %= 16; 380 | } 381 | 382 | if (index > 8) { 383 | b = index / 8; 384 | in += (b * 8 * bits) / 8; 385 | index %= 8; 386 | } 387 | 388 | start = index * bits; 389 | 390 | in += start / 8; 391 | start %= 8; 392 | 393 | /* |in| now points to the byte where the requested index is stored */ 394 | /* |start| is the bit position where the compressed value starts */ 395 | 396 | in32 = (uint32_t *)in; 397 | 398 | /* easy common case: the compressed value is not split between words */ 399 | if (start + bits < 32) { 400 | uint32_t mask = (1 << bits) - 1; 401 | return base + ((*in32 >> start) & mask); 402 | } 403 | /* not so easy: restore value from two words */ 404 | else { 405 | uint32_t mask1 = (1 << bits) - 1; 406 | uint32_t mask2 = (1 << (bits - (32 - start))) - 1; 407 | uint32_t v1 = (*(in32 + 0) >> start) & mask1; 408 | uint32_t v2 = *(in32 + 1) & mask2; 409 | return base + ((v2 << (32 - start)) | v1); 410 | } 411 | } 412 | 413 | uint32_t 414 | for_select(const uint8_t *in, uint32_t index) 415 | { 416 | /* load min and the bits */ 417 | uint32_t m = *(uint32_t *)(in + 0); 418 | uint32_t b = *(in + 4); 419 | 420 | return for_select_bits(in + METADATA, m, b, index); 421 | } 422 | 423 | uint32_t 424 | for_linear_search(const uint8_t *in, uint32_t length, uint32_t value) 425 | { 426 | /* load min and the bits */ 427 | uint32_t m = *(uint32_t *)(in + 0); 428 | uint32_t b = *(in + 4); 429 | 430 | return for_linear_search_bits(in + METADATA, length, m, b, value); 431 | } 432 | 433 | uint32_t 434 | for_linear_search_bits(const uint8_t *in, uint32_t length, uint32_t base, 435 | uint32_t bits, uint32_t value) 436 | { 437 | uint32_t i = 0; 438 | int found = -1; 439 | 440 | assert(bits <= 32); 441 | if (bits == 0) 442 | return (value == base ? 0 : length); 443 | 444 | for (; i + 32 <= length; i += 32) { 445 | in += for_linsearch32[bits](base, in, value, &found); 446 | if (found >= 0) 447 | return i + found; 448 | } 449 | 450 | for (; i + 16 <= length; i += 16) { 451 | in += for_linsearch16[bits](base, in, value, &found); 452 | if (found >= 0) 453 | return i + found; 454 | } 455 | 456 | for (; i + 8 <= length; i += 8) { 457 | in += for_linsearch8[bits](base, in, value, &found); 458 | if (found >= 0) 459 | return i + found; 460 | } 461 | 462 | for_linsearchx[bits](base, in, length - i, value, &found); 463 | if (found >= 0) 464 | return i + found; 465 | 466 | /* not found */ 467 | return length; 468 | } 469 | 470 | uint32_t 471 | for_lower_bound_search(const uint8_t *in, uint32_t length, uint32_t value, 472 | uint32_t *actual) 473 | { 474 | /* load min and the bits */ 475 | uint32_t m = *(uint32_t *)(in + 0); 476 | uint32_t b = *(in + 4); 477 | 478 | return for_lower_bound_search_bits(in + METADATA, length, m, b, 479 | value, actual); 480 | } 481 | 482 | /* adapted from wikipedia */ 483 | uint32_t 484 | for_lower_bound_search_bits(const uint8_t *in, uint32_t length, uint32_t base, 485 | uint32_t bits, uint32_t value, uint32_t *actual) 486 | { 487 | uint32_t imid; 488 | uint32_t imin = 0; 489 | uint32_t imax = length - 1; 490 | uint32_t v; 491 | 492 | while (imin + 1 < imax) { 493 | imid = imin + ((imax - imin) / 2); 494 | 495 | v = for_select_bits(in, base, bits, imid); 496 | if (v >= value) { 497 | imax = imid; 498 | } 499 | else if (v < value) { 500 | imin = imid; 501 | } 502 | } 503 | 504 | v = for_select_bits(in, base, bits, imin); 505 | if (v >= value) { 506 | *actual = v; 507 | return imin; 508 | } 509 | 510 | v = for_select_bits(in, base, bits, imax); 511 | *actual = v; 512 | return imax; 513 | } 514 | 515 | -------------------------------------------------------------------------------- /for.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de). 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | /** 18 | * A fast implementation for Frame of Reference encoding. 19 | * 20 | * See the README.md file for more information, example code and references. 21 | * 22 | * Feel free to send comments/questions to chris@crupp.de. I am available 23 | * for consulting. 24 | */ 25 | 26 | #ifndef FOR_H_5580af15_4570_41f9_ba2b_8afb1400e81e 27 | #define FOR_H_5580af15_4570_41f9_ba2b_8afb1400e81e 28 | 29 | #include 30 | 31 | #ifdef __cplusplus 32 | extern "C" { 33 | #endif 34 | 35 | /** 36 | * Returns the size required to compress a sequence of |length| ints, 37 | * each compressed with |bits| bits 38 | * 39 | * This function will NOT include any overhead required by 40 | * for_compress_sorted() and for_compress_unsorted(). 41 | * 42 | * Invariant: bits <= 32 43 | */ 44 | extern uint32_t 45 | for_compressed_size_bits(uint32_t length, uint32_t bits); 46 | 47 | /** 48 | * Returns the size required to compress an unsorted sequence of |length| ints. 49 | * 50 | * This routine scans |in| for the min/max values and then calls 51 | * for_compressed_size_bits(). 52 | * 53 | * The returned size will include the overhead required for 54 | * for_compress_sorted() and for_compressed_unsorted(). 55 | */ 56 | extern uint32_t 57 | for_compressed_size_unsorted(const uint32_t *in, uint32_t length); 58 | 59 | /** 60 | * Returns the size required to compress a sorted sequence of |length| ints. 61 | * 62 | * This routine extracts min/max values at the beginning and end of 63 | * the sequence, then calls for_compressed_size_bits(). It is therefore 64 | * slightly faster than for_compressed_size_unsorted(). 65 | * 66 | * The returned size will include the overhead required for 67 | * for_compress_sorted() and for_compressed_unsorted(). 68 | */ 69 | extern uint32_t 70 | for_compressed_size_sorted(const uint32_t *in, uint32_t length); 71 | 72 | /** 73 | * Compresses a sequence of |length| ints at |in| and stores the result 74 | * in |out|. 75 | * 76 | * |base| is the "offset" (or common delta value) of all ints. It is usually 77 | * set to the minimum value of the uncompressed sequence. 78 | * 79 | * |bits| are the bits required to store a single integer. 80 | * 81 | * Returns the number of bytes used for compression. 82 | * 83 | * This is for advanced users who opt for storing |base| and |bits| on their 84 | * own. This function is called by for_compress_sorted() and 85 | * for_compress_unsorted(). 86 | * 87 | * Invariant: bits <= 32 88 | */ 89 | extern uint32_t 90 | for_compress_bits(const uint32_t *in, uint8_t *out, uint32_t length, 91 | uint32_t base, uint32_t bits); 92 | 93 | /** 94 | * Compresses an unsorted sequence of |length| ints at |in| and stores the 95 | * result in |out|. 96 | * 97 | * This routine scans |in| for the min/max values and then calls 98 | * for_compress_bits(). 99 | * 100 | * The minimun value and the bits are stored as metadata in |out|. 101 | */ 102 | extern uint32_t 103 | for_compress_unsorted(const uint32_t *in, uint8_t *out, uint32_t length); 104 | 105 | /** 106 | * Compresses a sorted sequence of |length| ints at |in| and stores the 107 | * result in |out|. 108 | * 109 | * This routine extracts min/max values at the beginning and end of 110 | * the sequence, then calls for_compress_bits(). 111 | * 112 | * The minimun value and the bits are stored as metadata in |out|. 113 | */ 114 | extern uint32_t 115 | for_compress_sorted(const uint32_t *in, uint8_t *out, uint32_t length); 116 | 117 | /** 118 | * Uncompresses a sequence of |length| ints at |in| and stores the 119 | * result in |out|. 120 | * 121 | * |base| is the "offset" (or common delta value) of all ints. It is usually 122 | * set to the minimum value of the uncompressed sequence. 123 | * 124 | * |bits| are the bits required to store a single integer. 125 | * 126 | * Returns the number of compressed bytes processed. 127 | * 128 | * This function is for advanced users. It is the counterpart of 129 | * for_compress_bits(). 130 | * 131 | * Invariant: bits <= 32 132 | */ 133 | extern uint32_t 134 | for_uncompress_bits(const uint8_t *in, uint32_t *out, uint32_t length, 135 | uint32_t base, uint32_t bits); 136 | 137 | /** 138 | * Uncompresses a sequence of |length| ints at |in| and stores the 139 | * result in |out|. 140 | * 141 | * This function is a convenience wrapper for for_uncompress_bits(). It 142 | * expects metadata at the beginning of |in|. Use in combination with 143 | * for_compress_sorted() and for_compress_unsorted(). 144 | * 145 | * Returns the number of compressed bytes processed. 146 | */ 147 | extern uint32_t 148 | for_uncompress(const uint8_t *in, uint32_t *out, uint32_t length); 149 | 150 | /** 151 | * Appends a |value| to a compressed sequence of unsorted integers. 152 | * 153 | * This function is optimized for appending new values at the end of an 154 | * encoded sequence. This is only possible if the new value (more precisely: 155 | * the delta of the new value) can be stored in the same amount of bits that 156 | * were used to encode the other integers. 157 | * 158 | * If this is not the case then memory is allocated, the whole sequence is 159 | * decoded and re-encoded using more bits. This requires a heap allocation 160 | * with malloc(). 161 | * 162 | * Returns the size (in bytes) of the compressed data, or 0 if malloc() fails. 163 | */ 164 | extern uint32_t 165 | for_append_unsorted(uint8_t *in, uint32_t length, uint32_t value); 166 | 167 | /** 168 | * Appends a |value| to a compressed sequence of sorted integers. 169 | * 170 | * This function is optimized for appending new values at the end of an 171 | * encoded sequence. This is only possible if the new value (more precisely: 172 | * the delta of the new value) can be stored in the same amount of bits that 173 | * were used to encode the other integers. 174 | * 175 | * If this is not the case then memory is allocated, the whole sequence is 176 | * decoded and re-encoded using more bits. This requires a heap allocation 177 | * with malloc(). 178 | * 179 | * Returns the size (in bytes) of the compressed data, or 0 if malloc() fails. 180 | */ 181 | extern uint32_t 182 | for_append_sorted(uint8_t *in, uint32_t length, uint32_t value); 183 | 184 | /** 185 | * Appends a |value| to a compressed integer sequence. 186 | * 187 | * |base| is the "offset" (or common delta value) of all ints. It is usually 188 | * set to the minimum value of the uncompressed sequence. 189 | * 190 | * |bits| are the bits required to store a single integer. 191 | * 192 | * Returns the size (in bytes) of the compressed data. 193 | * 194 | * Invariant: bits <= 32 195 | * Invariant: the new |value| (more precisely: |value - base|) can be stored 196 | * in |bits| bits. Details can be found in the implementation of 197 | * for_append() in for.c. 198 | */ 199 | extern uint32_t 200 | for_append_bits(uint8_t *in, uint32_t length, uint32_t base, 201 | uint32_t bits, uint32_t value); 202 | 203 | /** 204 | * Returns the value at the given |index| from a compressed sequence. 205 | * 206 | * Make sure that |index| does not exceed the length of the sequence. 207 | * 208 | * |base| is the "offset" (or common delta value) of all ints. It is usually 209 | * set to the minimum value of the uncompressed sequence. 210 | * 211 | * Invariant: bits <= 32 212 | */ 213 | extern uint32_t 214 | for_select_bits(const uint8_t *in, uint32_t base, uint32_t bits, 215 | uint32_t index); 216 | 217 | /** 218 | * Returns the value at the given |index| from a compressed sequence. 219 | * 220 | * Make sure that |index| does not exceed the length of the sequence. 221 | * 222 | * This function is a convenience wrapper for for_select_bits(). It 223 | * expects metadata at the beginning of |in|. Use in combination with 224 | * for_compress_sorted() and for_compress_unsorted(). 225 | */ 226 | extern uint32_t 227 | for_select(const uint8_t *in, uint32_t index); 228 | 229 | /** 230 | * Performs a linear search for |value|. 231 | * 232 | * Returns the index of the found element, or |length| if the key was not 233 | * found. 234 | * 235 | * This function is a convenience wrapper for for_linear_search_bits(). It 236 | * expects metadata at the beginning of |in|. Use in combination with 237 | * for_compress_sorted() and for_compress_unsorted(). 238 | */ 239 | extern uint32_t 240 | for_linear_search(const uint8_t *in, uint32_t length, uint32_t value); 241 | 242 | /** 243 | * Performs a linear search for |value|. 244 | * 245 | * Returns the index of the found element, or |length| if the key was not 246 | * found. 247 | * 248 | * |base| is the "offset" (or common delta value) of all ints. It is usually 249 | * set to the minimum value of the uncompressed sequence. 250 | * 251 | * Invariant: bits <= 32 252 | */ 253 | extern uint32_t 254 | for_linear_search_bits(const uint8_t *in, uint32_t length, uint32_t base, 255 | uint32_t bits, uint32_t value); 256 | 257 | /** 258 | * Performs lower bound binary search search for |value|. 259 | * 260 | * A lower bound search returns the first element in the sequence which does 261 | * not compare less than |value|. 262 | * The actual result is stored in |*actual|. 263 | * 264 | * This function is a convenience wrapper for for_lower_bound_search_bits(). It 265 | * expects metadata at the beginning of |in|. Use in combination with 266 | * for_compress_sorted() and for_compress_unsorted(). 267 | */ 268 | extern uint32_t 269 | for_lower_bound_search(const uint8_t *in, uint32_t length, uint32_t value, 270 | uint32_t *actual); 271 | 272 | /** 273 | * Performs lower bound binary search search for |value|. 274 | * 275 | * A lower bound search returns the first element in the sequence which does 276 | * not compare less than |value|. 277 | * The actual result is stored in |*actual|. 278 | * 279 | * |base| is the "offset" (or common delta value) of all ints. It is usually 280 | * set to the minimum value of the uncompressed sequence. 281 | * 282 | * Invariant: bits <= 32 283 | */ 284 | extern uint32_t 285 | for_lower_bound_search_bits(const uint8_t *in, uint32_t length, uint32_t base, 286 | uint32_t bits, uint32_t value, uint32_t *actual); 287 | 288 | 289 | #ifdef __cplusplus 290 | } /* extern "C" */ 291 | #endif 292 | 293 | #endif /* FOR_H_5580af15_4570_41f9_ba2b_8afb1400e81e */ 294 | -------------------------------------------------------------------------------- /gen.pl: -------------------------------------------------------------------------------- 1 | 2 | sub consume 3 | { 4 | my $old = shift; 5 | my $c = shift; 6 | my $t = $old + $c; 7 | print " /* consumed: $c bytes (total: $t) */\n"; 8 | return $t; 9 | } 10 | 11 | sub bitmask 12 | { 13 | my $bits = shift; 14 | my $mask = 0; 15 | for (my $m = 0; $m < $bits; $m++) { 16 | $mask |= 1 << $m; 17 | } 18 | return $mask; 19 | } 20 | 21 | # 22 | # TODO 23 | # if bits are aligned (i.e. $bits_per_word % $bits == 0), then use 4 different 24 | # tmp variables (tmp0..tmp3), iterate between them and OR the result! 25 | # 26 | sub generate_pack_impl 27 | { 28 | my $fname = shift; 29 | my $type = shift; 30 | my $bits_per_word = shift; 31 | my $bits = shift; 32 | my $block = shift; 33 | 34 | my $consumed = 0; 35 | my $inittmp = 1; 36 | 37 | print "static uint32_t\n"; 38 | print "$fname(uint32_t base, const uint32_t *in, uint8_t *out) {\n"; 39 | 40 | # optimized path for 32bit 41 | if ($bits == 32) { 42 | print " uint32_t i;\n"; 43 | print " uint32_t *out32 = (uint32_t *)out;\n"; 44 | print " for (i = 0; i < $block; i++)\n"; 45 | print " out32[i] = in[i] - base;\n"; 46 | print " return $block * sizeof(uint32_t);\n"; 47 | print "}\n\n"; 48 | return; 49 | } 50 | 51 | print " $type tmp, length;\n"; 52 | 53 | my $i = 0; 54 | my $j = 0; 55 | 56 | while (1) { 57 | my $b = 0; 58 | while ($b < $bits_per_word and $i < $block) { 59 | # new value fits into the current word? 60 | if ($b + $bits <= $bits_per_word) { 61 | if ($inittmp != 0) { 62 | print " tmp = (*(in + $j) - base) << $b;\n"; 63 | $inittmp = 0; 64 | } 65 | else { 66 | print " tmp |= (*(in + $j) - base) << $b;\n"; 67 | } 68 | $b += $bits; 69 | } 70 | # if not then encode at a word boundary 71 | else { 72 | print " tmp |= (*(in + $j) - base) << $b;\n"; 73 | print " *($type *)out = tmp;\n"; 74 | print " out += sizeof($type);\n"; 75 | $consumed = consume($consumed, $bits_per_word / 8); 76 | my $d = ($b + $bits) - 32; 77 | print " tmp = (*(in + $j) - base) >> ($bits - $d);\n"; 78 | $b = $d; 79 | } 80 | 81 | $j++; 82 | $i++; 83 | } 84 | 85 | # move to next block 86 | if ($i < $block) { 87 | print " *($type *)out = tmp;\n"; 88 | print " out += sizeof($type);\n"; 89 | $inittmp = 1; 90 | $consumed = consume($consumed, $bits_per_word / 8); 91 | } 92 | # or reached the end? 93 | else { 94 | use integer; # force integer division 95 | print " /* remaining: " . ($bits_per_word - $b) . " bits */\n"; 96 | print " length = ($bits_per_word / 8) - ($bits_per_word - $b) / 8;\n"; 97 | $consumed = consume($consumed, 98 | ($bits_per_word / 8) - ($bits_per_word - $b) / 8); 99 | print " memcpy(out, &tmp, length);\n"; 100 | print " return $consumed;\n"; 101 | last; 102 | } 103 | } 104 | print "}\n\n"; 105 | } 106 | 107 | sub generate_pack 108 | { 109 | my $bits = shift; 110 | my $block = shift; 111 | 112 | my $fname = "pack$bits" . '_' . $block; 113 | 114 | generate_pack_impl($fname, 'uint32_t', 32, $bits, $block); 115 | } 116 | 117 | sub generate_unpack_impl 118 | { 119 | my $fname = shift; 120 | my $type = shift; 121 | my $bits_per_word = shift; 122 | my $bits = shift; 123 | my $block = shift; 124 | 125 | my $mask = bitmask($bits); 126 | 127 | print "static uint32_t\n"; 128 | print "$fname(uint32_t base, const uint8_t *in, uint32_t *out) {\n"; 129 | 130 | # optimized path for 32bit 131 | if ($bits == 32) { 132 | print " uint32_t i;\n"; 133 | print " uint32_t *in32 = (uint32_t *)in;\n"; 134 | print " for (i = 0; i < $block; i++)\n"; 135 | print " out[i] = base + in32[i];\n"; 136 | print " return $block * sizeof(uint32_t);\n"; 137 | print "}\n\n"; 138 | return; 139 | } 140 | 141 | print " $type *in32 = ($type *)in;\n"; 142 | print " $type tmp; (void)tmp;\n"; 143 | my $consumed = 4; 144 | 145 | my $i = 0; 146 | my $j = 0; 147 | 148 | while (1) { 149 | my $b = 0; 150 | while ($b < $bits_per_word and $i < $block) { 151 | # new value fits into the current word? 152 | if ($b + $bits <= $bits_per_word) { 153 | print " *(out + $j) = base + ((*in32 >> $b) & $mask);\n"; 154 | $b += $bits; 155 | } 156 | # if not then decode at a word boundary 157 | else { 158 | print " tmp = (*in32 >> $b);\n"; 159 | print " in32++;\n"; 160 | $consumed = consume($consumed, $bits_per_word / 8); 161 | my $d = ($b + $bits) - 32; 162 | print " tmp |= (*in32 % (1U << $d)) << ($bits - $d);\n"; 163 | print " *(out + $j) = base + tmp;\n"; 164 | $b = $d; 165 | } 166 | $j++; 167 | $i++; 168 | } 169 | 170 | # move to next block 171 | if ($i < $block) { 172 | print " in32++;\n"; 173 | $consumed = consume($consumed, $bits_per_word / 8); 174 | } 175 | # or reached the end? 176 | else { 177 | use integer; # force integer division 178 | my $remaining = $bits_per_word - $b; 179 | print " /* remaining: $remaining bits */\n"; 180 | $consumed -= $remaining / 8; 181 | print " return $consumed;\n"; 182 | last; 183 | } 184 | } 185 | print "}\n\n"; 186 | } 187 | 188 | sub generate_unpack 189 | { 190 | my $bits = shift; 191 | my $block = shift; 192 | 193 | my $fname = "unpack$bits" . '_' . $block; 194 | 195 | generate_unpack_impl($fname, 'uint32_t', 32, $bits, $block); 196 | } 197 | 198 | sub generate_packx 199 | { 200 | my $fname = shift; 201 | my $type = shift; 202 | my $bits_per_word = shift; 203 | my $bits = shift; 204 | my $block = 8; 205 | 206 | my $inittmp = 1; 207 | 208 | print "static uint32_t\n"; 209 | print "$fname(uint32_t base, const uint32_t *in, uint8_t *out, uint32_t length) {\n"; 210 | print " $type tmp, remaining;\n"; 211 | print " if (length == 0)\n"; 212 | print " return 0;\n"; 213 | 214 | my $i = 0; 215 | my $j = 0; 216 | 217 | while (1) { 218 | my $b = 0; 219 | while ($b < $bits_per_word and $i < $block) { 220 | # new value fits into the current word? 221 | if ($b + $bits <= $bits_per_word) { 222 | if ($inittmp != 0) { 223 | print " tmp = (*(in + $j) - base) << $b;\n"; 224 | $inittmp = 0; 225 | } 226 | else { 227 | print " tmp |= (*(in + $j) - base) << $b;\n"; 228 | } 229 | $b += $bits; 230 | } 231 | # if not then encode at a word boundary 232 | else { 233 | print " tmp |= (*(in + $j) - base) << $b;\n"; 234 | print " *($type *)out = tmp;\n"; 235 | print " out += sizeof($type);\n"; 236 | my $d = ($b + $bits) - 32; 237 | print " tmp = (*(in + $j) - base) >> ($bits - $d);\n"; 238 | $b = $d; 239 | } 240 | 241 | $j++; 242 | $i++; 243 | 244 | print " if (length == $j)\n goto bail;\n"; 245 | } 246 | 247 | # move to next block 248 | if ($i < $block) { 249 | print " *($type *)out = tmp;\n"; 250 | print " out += sizeof($type);\n"; 251 | $inittmp = 1; 252 | } 253 | # or reached the end? 254 | else { 255 | print "bail:\n"; 256 | print " remaining = (((length * $bits) + 7) / 8) % 4;\n"; 257 | print " if (remaining == 0) remaining = 4;\n"; 258 | print " memcpy(out, &tmp, remaining);\n"; 259 | print " return ((length * $bits) + 7) / 8;\n"; 260 | last; 261 | } 262 | } 263 | print "}\n\n"; 264 | } 265 | 266 | sub generate_unpackx 267 | { 268 | my $fname = shift; 269 | my $type = shift; 270 | my $bits_per_word = shift; 271 | my $bits = shift; 272 | my $block = 8; 273 | 274 | my $mask = bitmask($bits); 275 | 276 | my $inittmp = 1; 277 | 278 | print "static uint32_t\n"; 279 | print "$fname(uint32_t base, const uint8_t *in, uint32_t *out, uint32_t length) {\n"; 280 | print " $type tmp;\n"; 281 | print " if (length == 0)\n"; 282 | print " return 0;\n"; 283 | 284 | my $i = 0; 285 | my $j = 0; 286 | 287 | while (1) { 288 | my $b = 0; 289 | while ($b < $bits_per_word and $i < $block) { 290 | if ($inittmp != 0) { 291 | print " tmp = *($type *)in;\n"; 292 | $inittmp = 0; 293 | } 294 | 295 | # new value fits into the current word? 296 | if ($b + $bits <= $bits_per_word) { 297 | print " *(out + $j) = base + ((tmp >> $b) & $mask);\n"; 298 | $b += $bits; 299 | } 300 | # if not then decode at a word boundary 301 | else { 302 | print " *(out + $j) = tmp >> $b;\n"; 303 | print " in += sizeof($type);\n"; 304 | my $d = ($b + $bits) - 32; 305 | print " tmp = *($type *)in;\n"; 306 | print " *(out + $j) |= (tmp % (1U << $d)) << ($bits - $d);\n"; 307 | print " *(out + $j) += base;\n"; 308 | $b = $d; 309 | } 310 | 311 | $j++; 312 | $i++; 313 | 314 | print " if (length == $j)\n goto bail;\n"; 315 | } 316 | 317 | # move to next block 318 | if ($i < $block) { 319 | print " in += sizeof($type);\n"; 320 | $inittmp = 1; 321 | } 322 | # or reached the end? 323 | else { 324 | print "bail:\n"; 325 | print " return ((length * $bits) + 7) / 8;\n"; 326 | last; 327 | } 328 | } 329 | print "}\n\n"; 330 | } 331 | 332 | sub generate_packer_array 333 | { 334 | my $n = shift; 335 | my $b = shift; 336 | my $f = shift; 337 | print "$f for_$n$b\[33\] = {\n"; 338 | if ($b == 'x') { 339 | print " $n"."0_x,\n"; 340 | } 341 | else { 342 | print " $n"."0_" . "$b,\n"; 343 | } 344 | for (my $i = 1; $i < 32; $i++) { 345 | print " $n$i" . '_' . "$b,\n"; 346 | } 347 | print " $n" . "32_$b\n"; 348 | print "};\n\n"; 349 | } 350 | 351 | sub generate_linsearch_impl 352 | { 353 | my $fname = shift; 354 | my $type = shift; 355 | my $bits_per_word = shift; 356 | my $bits = shift; 357 | my $block = shift; 358 | 359 | my $consumed = 0; 360 | my $inittmp = 1; 361 | 362 | my $mask = bitmask($bits); 363 | 364 | print "static uint32_t\n"; 365 | print "$fname(uint32_t base, const uint8_t *in, uint32_t value, int *found) {\n"; 366 | 367 | # optimized path for 32bit 368 | if ($bits == 32) { 369 | print " uint32_t i;\n"; 370 | print " uint32_t *in32 = (uint32_t *)in;\n"; 371 | print " value -= base;\n"; 372 | print " for (i = 0; i < $block; i++) {\n"; 373 | print " if (in32[i] == value) {\n"; 374 | print " *found = i;\n"; 375 | print " return 0;\n"; 376 | print " }\n"; 377 | print " }\n"; 378 | print " return $block * sizeof(uint32_t);\n"; 379 | print "}\n\n"; 380 | return; 381 | } 382 | 383 | print " $type tmp, tmp2;\n"; 384 | print " value -= base;\n"; 385 | print " (void)tmp2;\n"; 386 | 387 | my $i = 0; 388 | my $j = 0; 389 | 390 | while (1) { 391 | my $b = 0; 392 | while ($b < $bits_per_word and $i < $block) { 393 | if ($inittmp != 0) { 394 | print " tmp = *($type *)in;\n"; 395 | $consumed = consume($consumed, $bits_per_word / 8); 396 | $inittmp = 0; 397 | } 398 | 399 | # new value fits into the current word? 400 | if ($b + $bits <= $bits_per_word) { 401 | print " if (((tmp >> $b) & $mask) == value) {\n"; 402 | print " *found = $j;\n"; 403 | print " return $j;\n"; 404 | print " }\n"; 405 | $b += $bits; 406 | } 407 | # if not then decode at a word boundary 408 | else { 409 | print " tmp2 = tmp >> $b;\n"; 410 | print " in += sizeof($type);\n"; 411 | $consumed = consume($consumed, $bits_per_word / 8); 412 | my $d = ($b + $bits) - 32; 413 | print " tmp = *($type *)in;\n"; 414 | print " if ((tmp2 | (tmp % (1U << $d)) << ($bits - $d)) == value) {\n"; 415 | print " *found = $j;\n"; 416 | print " return $j;\n"; 417 | print " }\n"; 418 | $b = $d; 419 | } 420 | $j++; 421 | $i++; 422 | } 423 | 424 | # move to next block 425 | if ($i < $block) { 426 | print " in += sizeof($type);\n"; 427 | $inittmp = 1; 428 | } 429 | # or reached the end? 430 | else { 431 | use integer; # force integer division 432 | my $remaining = $bits_per_word - $b; 433 | print " /* remaining: $remaining bits */\n"; 434 | $consumed -= $remaining / 8; 435 | print " return ($consumed);\n"; 436 | last; 437 | } 438 | } 439 | print "}\n\n"; 440 | } 441 | 442 | sub generate_linsearch 443 | { 444 | my $bits = shift; 445 | my $block = shift; 446 | 447 | my $fname = "linsearch$bits" . '_' . $block; 448 | 449 | generate_linsearch_impl($fname, 'uint32_t', 32, $bits, $block); 450 | } 451 | 452 | sub generate_linsearchx 453 | { 454 | my $fname = shift; 455 | my $type = shift; 456 | my $bits_per_word = shift; 457 | my $bits = shift; 458 | my $block = 8; 459 | 460 | my $mask = bitmask($bits); 461 | 462 | my $inittmp = 1; 463 | 464 | print "static uint32_t\n"; 465 | print "$fname(uint32_t base, const uint8_t *in, uint32_t length, uint32_t value, int *found) {\n"; 466 | print " $type tmp, tmp2;\n"; 467 | print " (void)tmp2;\n"; 468 | print " if (length == 0)\n"; 469 | print " return 0;\n"; 470 | print " value -= base;\n"; 471 | 472 | my $i = 0; 473 | my $j = 0; 474 | 475 | while (1) { 476 | my $b = 0; 477 | while ($b < $bits_per_word and $i < $block) { 478 | if ($inittmp != 0) { 479 | print " tmp = *($type *)in;\n"; 480 | $inittmp = 0; 481 | } 482 | 483 | # new value fits into the current word? 484 | if ($b + $bits <= $bits_per_word) { 485 | print " if (value == ((tmp >> $b) & $mask)) {\n"; 486 | print " *found = $j;\n"; 487 | print " return $j;\n"; 488 | print " }\n"; 489 | $b += $bits; 490 | } 491 | # if not then decode at a word boundary 492 | else { 493 | print " tmp2 = tmp >> $b;\n"; 494 | print " in += sizeof($type);\n"; 495 | my $d = ($b + $bits) - 32; 496 | print " tmp = *($type *)in;\n"; 497 | print " if ((tmp2 | (tmp % (1U << $d)) << ($bits - $d)) == value) {\n"; 498 | print " *found = $j;\n"; 499 | print " return $j;\n"; 500 | print " }\n"; 501 | $b = $d; 502 | } 503 | 504 | $j++; 505 | $i++; 506 | 507 | print " if (length == $j)\n goto bail;\n"; 508 | } 509 | 510 | # move to next block 511 | if ($i < $block) { 512 | print " in += sizeof($type);\n"; 513 | $inittmp = 1; 514 | } 515 | # or reached the end? 516 | else { 517 | print "bail:\n"; 518 | print " return ((length * $bits) + 7) / 8;\n"; 519 | last; 520 | } 521 | } 522 | print "}\n\n"; 523 | } 524 | 525 | # print the prologue 526 | print < 0) 634 | *found = 0; 635 | return 0; 636 | } 637 | 638 | EOL 639 | 640 | for (my $b = 1; $b <= 32; $b++) { 641 | generate_pack($b, 32); 642 | generate_unpack($b, 32); 643 | } 644 | 645 | generate_packer_array('pack', 32, 'for_packfunc_t'); 646 | generate_packer_array('unpack', 32, 'for_unpackfunc_t'); 647 | 648 | for (my $b = 1; $b <= 32; $b++) { 649 | generate_pack($b, 16); 650 | generate_unpack($b, 16); 651 | } 652 | 653 | generate_packer_array('pack', 16, 'for_packfunc_t'); 654 | generate_packer_array('unpack', 16, 'for_unpackfunc_t'); 655 | 656 | for (my $b = 1; $b <= 32; $b++) { 657 | generate_pack($b, 8); 658 | generate_unpack($b, 8); 659 | } 660 | 661 | generate_packer_array('pack', 8, 'for_packfunc_t'); 662 | generate_packer_array('unpack', 8, 'for_unpackfunc_t'); 663 | 664 | for (my $b = 1; $b <= 32; $b++) { 665 | generate_packx("pack$b" . '_x', 'uint32_t', 32, $b); 666 | generate_unpackx("unpack$b" . '_x', 'uint32_t', 32, $b); 667 | } 668 | 669 | generate_packer_array('pack', 'x', 'for_packxfunc_t'); 670 | generate_packer_array('unpack', 'x', 'for_unpackxfunc_t'); 671 | 672 | for (my $b = 1; $b <= 32; $b++) { 673 | generate_linsearch($b, 32); 674 | } 675 | 676 | generate_packer_array('linsearch', 32, 'for_linsearchfunc_t'); 677 | 678 | for (my $b = 1; $b <= 32; $b++) { 679 | generate_linsearch($b, 16); 680 | } 681 | 682 | generate_packer_array('linsearch', 16, 'for_linsearchfunc_t'); 683 | 684 | for (my $b = 1; $b <= 32; $b++) { 685 | generate_linsearch($b, 8); 686 | } 687 | 688 | generate_packer_array('linsearch', 8, 'for_linsearchfunc_t'); 689 | 690 | for (my $b = 1; $b <= 32; $b++) { 691 | generate_linsearchx("linsearch$b" . '_x', 'uint32_t', 32, $b); 692 | } 693 | 694 | generate_packer_array('linsearch', 'x', 'for_linsearchxfunc_t'); 695 | 696 | -------------------------------------------------------------------------------- /msvc2013/libfor/libfor.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | 14 | 15 | CompileAsC 16 | 17 | 18 | 19 | 20 | 21 | 22 | {12718772-D61F-49AE-8F58-B5D20CA8C3CD} 23 | Win32Proj 24 | libfor 25 | 26 | 27 | 28 | StaticLibrary 29 | true 30 | v120 31 | Unicode 32 | 33 | 34 | StaticLibrary 35 | false 36 | v120 37 | true 38 | Unicode 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | Level3 56 | Disabled 57 | WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions) 58 | 59 | 60 | Windows 61 | true 62 | 63 | 64 | 65 | 66 | Level3 67 | 68 | 69 | MaxSpeed 70 | true 71 | true 72 | WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions) 73 | 74 | 75 | Windows 76 | true 77 | true 78 | true 79 | 80 | 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /msvc2013/msvc2013.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 2013 4 | VisualStudioVersion = 12.0.21005.1 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libfor", "libfor\libfor.vcxproj", "{12718772-D61F-49AE-8F58-B5D20CA8C3CD}" 7 | EndProject 8 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test", "test\test.vcxproj", "{193A0D68-FEC4-46FE-BA94-9360985922DF}" 9 | ProjectSection(ProjectDependencies) = postProject 10 | {12718772-D61F-49AE-8F58-B5D20CA8C3CD} = {12718772-D61F-49AE-8F58-B5D20CA8C3CD} 11 | EndProjectSection 12 | EndProject 13 | Global 14 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 15 | Debug|Win32 = Debug|Win32 16 | Release|Win32 = Release|Win32 17 | EndGlobalSection 18 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 19 | {12718772-D61F-49AE-8F58-B5D20CA8C3CD}.Debug|Win32.ActiveCfg = Debug|Win32 20 | {12718772-D61F-49AE-8F58-B5D20CA8C3CD}.Debug|Win32.Build.0 = Debug|Win32 21 | {12718772-D61F-49AE-8F58-B5D20CA8C3CD}.Release|Win32.ActiveCfg = Release|Win32 22 | {12718772-D61F-49AE-8F58-B5D20CA8C3CD}.Release|Win32.Build.0 = Release|Win32 23 | {193A0D68-FEC4-46FE-BA94-9360985922DF}.Debug|Win32.ActiveCfg = Debug|Win32 24 | {193A0D68-FEC4-46FE-BA94-9360985922DF}.Debug|Win32.Build.0 = Debug|Win32 25 | {193A0D68-FEC4-46FE-BA94-9360985922DF}.Release|Win32.ActiveCfg = Release|Win32 26 | {193A0D68-FEC4-46FE-BA94-9360985922DF}.Release|Win32.Build.0 = Release|Win32 27 | EndGlobalSection 28 | GlobalSection(SolutionProperties) = preSolution 29 | HideSolutionNode = FALSE 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /msvc2013/test/test.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | 14 | {193A0D68-FEC4-46FE-BA94-9360985922DF} 15 | Win32Proj 16 | test 17 | 18 | 19 | 20 | Application 21 | true 22 | v120 23 | Unicode 24 | 25 | 26 | Application 27 | false 28 | v120 29 | true 30 | Unicode 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | true 44 | 45 | 46 | false 47 | 48 | 49 | 50 | 51 | 52 | Level3 53 | Disabled 54 | WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) 55 | true 56 | 57 | 58 | Console 59 | true 60 | libfor.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 61 | ..\..\Debug;..\Debug 62 | 63 | 64 | 65 | 66 | Level3 67 | 68 | 69 | MaxSpeed 70 | true 71 | true 72 | WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) 73 | true 74 | 75 | 76 | Console 77 | true 78 | true 79 | true 80 | ..\..\Release;..\Release 81 | libfor.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /test.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de). 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include 17 | #include 18 | 19 | #include "for.h" 20 | 21 | typedef uint32_t (*for_unpackfunc_t) (uint32_t, const uint8_t *, uint32_t *); 22 | typedef uint32_t (*for_packfunc_t) (uint32_t, const uint32_t *, uint8_t *); 23 | typedef uint32_t (*for_unpackxfunc_t)(uint32_t, const uint8_t *, uint32_t *, 24 | uint32_t); 25 | typedef uint32_t (*for_packxfunc_t) (uint32_t, const uint32_t *, uint8_t *, 26 | uint32_t); 27 | 28 | extern for_packfunc_t for_pack32[33]; 29 | extern for_unpackfunc_t for_unpack32[33]; 30 | extern for_packfunc_t for_pack16[33]; 31 | extern for_unpackfunc_t for_unpack16[33]; 32 | extern for_packfunc_t for_pack8[33]; 33 | extern for_unpackfunc_t for_unpack8[33]; 34 | extern for_packxfunc_t for_packx[33]; 35 | extern for_unpackxfunc_t for_unpackx[33]; 36 | 37 | #define VERIFY(c) while (!(c)) { \ 38 | printf("%s:%d: expression failed\n", \ 39 | __FILE__, __LINE__); \ 40 | abort(); \ 41 | } 42 | 43 | #define VERIFY_ARRAY(a1, a2, len) \ 44 | do { \ 45 | uint32_t __i; \ 46 | for (__i = 0; __i < len; __i++) { \ 47 | if (a1[__i] != a2[__i]) { \ 48 | printf("data mismatch at %u\n", __i); \ 49 | abort(); \ 50 | } \ 51 | } \ 52 | } while (0) 53 | 54 | static uint32_t inbuf[1024]; 55 | 56 | static uint32_t * 57 | generate_input(uint32_t base, uint32_t length, uint32_t bits) 58 | { 59 | uint32_t i; 60 | uint32_t max = (1 << bits) - 1; 61 | 62 | for (i = 0; i < length; i++) { 63 | if (bits == 0) 64 | inbuf[i] = base; 65 | else if (bits == 32) 66 | inbuf[i] = base + i; 67 | else 68 | inbuf[i] = base + (i % max); 69 | } 70 | 71 | return &inbuf[0]; 72 | } 73 | 74 | static void 75 | highlevel_sorted(uint32_t length) 76 | { 77 | uint32_t i, s1, s2, s3; 78 | uint8_t out[1024 * 10]; 79 | uint32_t in[1024 * 10]; 80 | uint32_t tmp[1024 * 10]; 81 | 82 | printf("highlevel sorted %u ints\n", length); 83 | 84 | for (i = 0; i < length; i++) 85 | in[i] = 33 + i; 86 | 87 | s3 = for_compressed_size_sorted(in, length); 88 | tmp[s3] = 'x'; 89 | s1 = for_compress_sorted(in, out, length); 90 | VERIFY(tmp[s3] == 'x'); 91 | s2 = for_uncompress(out, tmp, length); 92 | VERIFY(s1 == s2); 93 | VERIFY(s2 == s3); 94 | VERIFY_ARRAY(in, tmp, length); 95 | 96 | for (i = 0; i < length; i++) 97 | VERIFY(in[i] == for_select(out, i)); 98 | 99 | for (i = 0; i < length; i++) 100 | VERIFY(i == for_linear_search(out, length, in[i])); 101 | 102 | for (i = 0; i < length; i++) { 103 | uint32_t actual; 104 | uint32_t index = for_lower_bound_search(out, length, in[i], &actual); 105 | VERIFY(in[i] == in[index]); 106 | VERIFY(actual == in[i]); 107 | } 108 | } 109 | 110 | static uint32_t 111 | rnd() 112 | { 113 | static uint32_t a = 3; 114 | a = (((a * 214013L + 2531011L) >> 16) & 32767); 115 | return (a); 116 | } 117 | 118 | static void 119 | highlevel_unsorted(uint32_t length) 120 | { 121 | uint32_t i, s1, s2, s3; 122 | uint8_t out[1024 * 10]; 123 | uint32_t in[1024 * 10]; 124 | uint32_t tmp[1024 * 10]; 125 | 126 | printf("highlevel unsorted %u ints\n", length); 127 | 128 | for (i = 0; i < length; i++) 129 | in[i] = 7 + (rnd() - 7); 130 | 131 | s3 = for_compressed_size_unsorted(in, length); 132 | tmp[s3] = 'x'; 133 | s1 = for_compress_unsorted(in, out, length); 134 | s2 = for_uncompress(out, tmp, length); 135 | VERIFY(s1 == s2); 136 | VERIFY(s2 == s3); 137 | VERIFY(tmp[s3] == 'x'); 138 | VERIFY_ARRAY(in, tmp, length); 139 | 140 | for (i = 0; i < length; i++) 141 | VERIFY(in[i] == for_select(out, i)); 142 | 143 | for (i = 0; i < length; i++) { 144 | uint32_t index = for_linear_search(out, length, in[i]); 145 | VERIFY(in[i] == in[index]); 146 | } 147 | } 148 | 149 | static void 150 | append_sorted() 151 | { 152 | #undef max 153 | #define max 10000 154 | int i; 155 | uint32_t s1, s2; 156 | 157 | uint8_t out1[max * 8] = {0}; 158 | uint8_t out2[max * 8] = {0}; 159 | uint32_t in[max]; 160 | 161 | printf("append sorted, %d ints\n", max); 162 | 163 | for (i = 0; i < max; i++) { 164 | in[i] = i; 165 | 166 | /* insert with "append" */ 167 | s1 = for_append_sorted(&out1[0], i, in[i]); 168 | 169 | /* compress the full sequence */ 170 | s2 = for_compress_sorted(in, out2, i + 1); 171 | 172 | VERIFY(s1 == s2); 173 | VERIFY_ARRAY(out1, out2, s1); 174 | } 175 | } 176 | 177 | static void 178 | append_unsorted() 179 | { 180 | #undef max 181 | #define max 10000 182 | int i; 183 | uint32_t s1, s2; 184 | 185 | uint8_t out1[max * 8] = {0}; 186 | uint8_t out2[max * 8] = {0}; 187 | uint32_t in[max]; 188 | 189 | printf("append unsorted, %d ints\n", max); 190 | 191 | for (i = 0; i < max; i++) { 192 | in[i] = 7 + (rnd() - 7); 193 | 194 | /* insert with "append" */ 195 | s1 = for_append_unsorted(&out1[0], i, in[i]); 196 | 197 | /* compress the full sequence */ 198 | s2 = for_compress_unsorted(in, out2, i + 1); 199 | 200 | VERIFY(s1 == s2); 201 | VERIFY_ARRAY(out1, out2, s1); 202 | } 203 | } 204 | 205 | static void 206 | append_sorted_bignum() 207 | { 208 | #undef max 209 | #define max 10 210 | int i; 211 | uint32_t s1, s2; 212 | 213 | uint8_t out1[max * 8] = {0}; 214 | uint8_t out2[max * 8] = {0}; 215 | uint32_t in[max]; 216 | 217 | printf("append sorted bignum, %d ints\n", max); 218 | 219 | for (i = 0; i < max; i++) { 220 | in[i] = 1 << (17 + i); 221 | 222 | /* insert with "append" */ 223 | s1 = for_append_sorted(&out1[0], i, in[i]); 224 | 225 | /* compress the full sequence */ 226 | s2 = for_compress_sorted(in, out2, i + 1); 227 | 228 | VERIFY(s1 == s2); 229 | VERIFY_ARRAY(out1, out2, s1); 230 | } 231 | } 232 | 233 | static void 234 | lowlevel_block_func(uint32_t bits, for_packfunc_t pack, for_unpackfunc_t unpack, 235 | uint32_t *in, uint32_t base, uint32_t length) 236 | { 237 | uint32_t i; 238 | uint8_t out[1024]; 239 | uint32_t tmp[1024]; 240 | 241 | uint32_t s1 = pack(base, in, out); 242 | uint32_t s2 = unpack(base, out, tmp); 243 | VERIFY(s1 == s2); 244 | VERIFY_ARRAY(in, tmp, length); 245 | 246 | for (i = 0; i < length; i++) 247 | VERIFY(in[i] == for_select_bits(out, base, bits, i)); 248 | 249 | for (i = 0; i < length; i++) { 250 | uint32_t index = for_linear_search_bits(out, length, base, bits, in[i]); 251 | VERIFY(in[i] == in[index]); 252 | } 253 | } 254 | 255 | static void 256 | lowlevel_blockx_func(uint32_t bits, for_packxfunc_t pack, 257 | for_unpackxfunc_t unpack, uint32_t *in, uint32_t base, 258 | uint32_t length) 259 | { 260 | uint32_t i; 261 | uint8_t out[1024]; 262 | uint32_t tmp[1024]; 263 | 264 | uint32_t s1 = pack(base, in, out, length); 265 | uint32_t s2 = unpack(base, out, tmp, length); 266 | VERIFY(s1 == s2); 267 | VERIFY_ARRAY(in, tmp, length); 268 | 269 | for (i = 0; i < length; i++) 270 | VERIFY(in[i] == for_select_bits(out, base, bits, i)); 271 | 272 | for (i = 0; i < length; i++) { 273 | uint32_t index = for_linear_search_bits(out, length, base, bits, in[i]); 274 | VERIFY(in[i] == in[index]); 275 | } 276 | } 277 | 278 | static void 279 | lowlevel_block32(uint32_t bits) 280 | { 281 | uint32_t *in = generate_input(10, 32, bits); 282 | 283 | printf("lowlevel pack/unpack 32 ints, %2d bits\n", bits); 284 | lowlevel_block_func(bits, for_pack32[bits], for_unpack32[bits], in, 10, 32); 285 | } 286 | 287 | static void 288 | lowlevel_block16(uint32_t bits) 289 | { 290 | uint32_t *in = generate_input(10, 16, bits); 291 | 292 | printf("lowlevel pack/unpack 16 ints, %2d bits\n", bits); 293 | lowlevel_block_func(bits, for_pack16[bits], for_unpack16[bits], in, 10, 16); 294 | } 295 | 296 | static void 297 | lowlevel_block8(uint32_t bits) 298 | { 299 | uint32_t *in = generate_input(10, 8, bits); 300 | 301 | printf("lowlevel pack/unpack 8 ints, %2d bits\n", bits); 302 | lowlevel_block_func(bits, for_pack8[bits], for_unpack8[bits], in, 10, 8); 303 | } 304 | 305 | static void 306 | lowlevel_blockx(int length, uint32_t bits) 307 | { 308 | uint32_t *in = generate_input(10, 8, bits); 309 | 310 | printf("lowlevel pack/unpack %d ints, %2d bits\n", length, bits); 311 | lowlevel_blockx_func(bits, for_packx[bits], 312 | for_unpackx[bits], in, 10, length); 313 | } 314 | 315 | int 316 | main() 317 | { 318 | int i, b; 319 | 320 | for (i = 0; i <= 32; i++) 321 | lowlevel_block32(i); 322 | 323 | for (i = 0; i <= 32; i++) 324 | lowlevel_block16(i); 325 | 326 | for (i = 0; i <= 32; i++) 327 | lowlevel_block8(i); 328 | 329 | for (i = 0; i < 32; i++) { 330 | for (b = 0; b < 8; b++) 331 | lowlevel_blockx(b, i); 332 | } 333 | 334 | highlevel_sorted(0); 335 | highlevel_sorted(1); 336 | highlevel_sorted(2); 337 | highlevel_sorted(3); 338 | highlevel_sorted(16); 339 | highlevel_sorted(17); 340 | highlevel_sorted(32); 341 | highlevel_sorted(33); 342 | highlevel_sorted(64); 343 | highlevel_sorted(65); 344 | highlevel_sorted(128); 345 | highlevel_sorted(129); 346 | highlevel_sorted(256); 347 | highlevel_sorted(257); 348 | highlevel_sorted(1024); 349 | highlevel_sorted(1025); 350 | highlevel_sorted(1333); 351 | 352 | highlevel_unsorted(0); 353 | highlevel_unsorted(1); 354 | highlevel_unsorted(2); 355 | highlevel_unsorted(3); 356 | highlevel_unsorted(16); 357 | highlevel_unsorted(17); 358 | highlevel_unsorted(32); 359 | highlevel_unsorted(33); 360 | highlevel_unsorted(64); 361 | highlevel_unsorted(65); 362 | highlevel_unsorted(128); 363 | highlevel_unsorted(129); 364 | highlevel_unsorted(256); 365 | highlevel_unsorted(257); 366 | highlevel_unsorted(1024); 367 | highlevel_unsorted(1025); 368 | highlevel_unsorted(1333); 369 | 370 | append_sorted(); 371 | append_sorted_bignum(); 372 | append_unsorted(); 373 | 374 | printf("\nsuccess!\n"); 375 | return 0; 376 | } 377 | --------------------------------------------------------------------------------