├── cmake └── maskedvbyteConfig.cmake.in ├── .gitignore ├── .github └── workflows │ └── ubuntu.yml ├── include ├── varintencode.h └── varintdecode.h ├── examples └── example.c ├── Makefile ├── src ├── varintencode.c └── varintdecode.c ├── tests └── unit.c ├── CMakeLists.txt ├── README.md └── LICENSE /cmake/maskedvbyteConfig.cmake.in: -------------------------------------------------------------------------------- 1 | @PACKAGE_INIT@ 2 | 3 | include(CMakeFindDependencyMacro) 4 | # Add find_dependency(...) here if the library gains external deps 5 | 6 | include("${CMAKE_CURRENT_LIST_DIR}/maskedvbyteTargets.cmake") 7 | 8 | check_required_components(maskedvbyte) 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Object files 2 | *.o 3 | *.ko 4 | *.obj 5 | *.elf 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Libraries 12 | *.lib 13 | *.a 14 | *.la 15 | *.lo 16 | 17 | # Shared objects (inc. Windows DLLs) 18 | *.dll 19 | *.so 20 | *.so.* 21 | *.dylib 22 | 23 | # Executables 24 | *.exe 25 | *.out 26 | *.app 27 | *.i*86 28 | *.x86_64 29 | *.hex 30 | -------------------------------------------------------------------------------- /.github/workflows/ubuntu.yml: -------------------------------------------------------------------------------- 1 | name: Ubuntu 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | ubuntu-build: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v3 10 | - name: Use Makefile 11 | run: | 12 | make && 13 | ./unit 14 | - name: Use CMake 15 | run: | 16 | cmake -B build && 17 | cmake --build build && 18 | ctest --test-dir build 19 | 20 | -------------------------------------------------------------------------------- /include/varintencode.h: -------------------------------------------------------------------------------- 1 | #ifndef VARINTENCODE_H_ 2 | #define VARINTENCODE_H_ 3 | 4 | #include 5 | #include 6 | 7 | #if defined(__cplusplus) 8 | extern "C" { 9 | #endif 10 | 11 | // Encode an array of a given length read from in to bout in varint format. 12 | // Returns the number of bytes written. 13 | size_t vbyte_encode(const uint32_t *in, size_t length, uint8_t *bout); 14 | 15 | // Encode an array of a given length read from in to bout in varint format with differential 16 | // coding starting at value prev. (Setting prev to 0 is a good default.) 17 | // 18 | // Returns the number of bytes written. 19 | size_t vbyte_encode_delta(const uint32_t *in, size_t length, uint8_t *bout, uint32_t prev); 20 | 21 | #if defined(__cplusplus) 22 | } 23 | #endif 24 | 25 | #endif /* VARINTENCODE_H_ */ 26 | -------------------------------------------------------------------------------- /examples/example.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "varintencode.h" 6 | #include "varintdecode.h" 7 | 8 | int main() { 9 | int N = 5000; 10 | uint32_t * datain = malloc(N * sizeof(uint32_t)); 11 | uint8_t * compressedbuffer = malloc(N * sizeof(uint32_t)); 12 | uint32_t * recovdata = malloc(N * sizeof(uint32_t)); 13 | for (int k = 0; k < N; ++k) 14 | datain[k] = 120; 15 | size_t compsize = vbyte_encode(datain, N, compressedbuffer); // encoding 16 | // here the result is stored in compressedbuffer using compsize bytes 17 | size_t compsize2 = masked_vbyte_decode(compressedbuffer, recovdata, 18 | N); // decoding (fast) 19 | assert(compsize == compsize2); 20 | free(datain); 21 | free(compressedbuffer); 22 | free(recovdata); 23 | printf("Compressed %d integers down to %d bytes.\n",N,(int) compsize); 24 | return 0; 25 | } 26 | 27 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # minimalist makefile 2 | .SUFFIXES: 3 | # 4 | .SUFFIXES: .cpp .o .c .h 5 | 6 | CFLAGS = -fPIC -msse4 -std=c99 -O3 -Wall -Wextra -pedantic -Wshadow 7 | LDFLAGS = -shared 8 | LIBNAME=libmaskedvbyte.so.0.0.1 9 | all: unit $(LIBNAME) 10 | test: 11 | ./unit 12 | install: $(OBJECTS) 13 | cp $(LIBNAME) /usr/local/lib 14 | ln -s /usr/local/lib/$(LIBNAME) /usr/local/lib/libmaskedvbyte.so 15 | ldconfig 16 | cp $(HEADERS) /usr/local/include 17 | 18 | 19 | 20 | HEADERS=./include/varintdecode.h ./include/varintencode.h 21 | 22 | uninstall: 23 | for h in $(HEADERS) ; do rm /usr/local/$$h; done 24 | rm /usr/local/lib/$(LIBNAME) 25 | rm /usr/local/lib/libmaskedvbyte.so 26 | ldconfig 27 | 28 | 29 | OBJECTS= varintdecode.o varintencode.o 30 | 31 | 32 | varintencode.o: ./src/varintencode.c $(HEADERS) 33 | $(CC) $(CFLAGS) -c ./src/varintencode.c -Iinclude 34 | 35 | varintdecode.o: ./src/varintdecode.c $(HEADERS) 36 | $(CC) $(CFLAGS) -c ./src/varintdecode.c -Iinclude 37 | 38 | 39 | 40 | $(LIBNAME): $(OBJECTS) 41 | $(CC) $(CFLAGS) -o $(LIBNAME) $(OBJECTS) $(LDFLAGS) 42 | 43 | 44 | 45 | 46 | example: ./examples/example.c $(HEADERS) $(OBJECTS) 47 | $(CC) $(CFLAGS) -o example ./examples/example.c -Iinclude $(OBJECTS) 48 | 49 | unit: ./tests/unit.c $(HEADERS) $(OBJECTS) 50 | $(CC) $(CFLAGS) -o unit ./tests/unit.c -Iinclude $(OBJECTS) 51 | dynunit: ./tests/unit.c $(HEADERS) $(LIBNAME) 52 | $(CC) $(CFLAGS) -o dynunit ./tests/unit.c -Iinclude -lmaskedvbyte 53 | 54 | clean: 55 | rm -f unit *.o $(LIBNAME) example 56 | -------------------------------------------------------------------------------- /include/varintdecode.h: -------------------------------------------------------------------------------- 1 | #ifndef VARINTDECODE_H_ 2 | #define VARINTDECODE_H_ 3 | 4 | #include 5 | #include 6 | 7 | #if defined(__cplusplus) 8 | extern "C" { 9 | #endif 10 | 11 | // Read "length" 32-bit integers in varint format from in, storing the result in out. Returns the number of bytes read. 12 | size_t masked_vbyte_decode(const uint8_t* in, uint32_t* out, uint64_t length); 13 | 14 | // Read "length" 32-bit integers in varint format from in, storing the result in out with differential coding starting at prev. Setting prev to zero is a good default. Returns the number of bytes read. 15 | size_t masked_vbyte_decode_delta(const uint8_t* in, uint32_t* out, uint64_t length, uint32_t prev); 16 | 17 | // Read 32-bit integers in varint format from in, reading inputsize bytes, storing the result in out. Returns the number of integers read. 18 | size_t masked_vbyte_decode_fromcompressedsize(const uint8_t* in, uint32_t* out, 19 | size_t inputsize); 20 | 21 | // Read 32-bit integers in varint format from in, reading inputsize bytes, storing the result in out with differential coding starting at prev. Setting prev to zero is a good default. Returns the number of integers read. 22 | size_t masked_vbyte_decode_fromcompressedsize_delta(const uint8_t* in, uint32_t* out, 23 | size_t inputsize, uint32_t prev); 24 | 25 | // assuming that the data was differentially-coded, retrieve one particular value (at location slot) 26 | uint32_t masked_vbyte_select_delta(const uint8_t *in, uint64_t length, 27 | uint32_t prev, size_t slot); 28 | 29 | // return the position of the first value >= key, assumes differential-coded values 30 | int masked_vbyte_search_delta(const uint8_t *in, uint64_t length, uint32_t prev, 31 | uint32_t key, uint32_t *presult); 32 | 33 | #if defined(__cplusplus) 34 | } 35 | #endif 36 | 37 | #endif /* VARINTDECODE_H_ */ 38 | -------------------------------------------------------------------------------- /src/varintencode.c: -------------------------------------------------------------------------------- 1 | #include "varintencode.h" 2 | 3 | 4 | size_t vbyte_encode_delta(const uint32_t *in, size_t length, uint8_t *bout, uint32_t prev) { 5 | uint8_t *initbout = bout; 6 | for (size_t k = 0; k < length; ++k) { 7 | const uint32_t val = in[k] - prev; 8 | prev = in[k]; 9 | if (val < (1U << 7)) { 10 | *bout = val & 0x7F; 11 | ++bout; 12 | } else if (val < (1U << 14)) { 13 | *bout = (uint8_t)((val & 0x7F) | (1U << 7)); 14 | ++bout; 15 | *bout = (uint8_t)(val >> 7); 16 | ++bout; 17 | } else if (val < (1U << 21)) { 18 | *bout = (uint8_t)((val & 0x7F) | (1U << 7)); 19 | ++bout; 20 | *bout = (uint8_t)(((val >> 7) & 0x7F) | (1U << 7)); 21 | ++bout; 22 | *bout = (uint8_t)(val >> 14); 23 | ++bout; 24 | } else if (val < (1U << 28)) { 25 | *bout = (uint8_t)((val & 0x7F) | (1U << 7)); 26 | ++bout; 27 | *bout = (uint8_t)(((val >> 7) & 0x7F) | (1U << 7)); 28 | ++bout; 29 | *bout = (uint8_t)(((val >> 14) & 0x7F) | (1U << 7)); 30 | ++bout; 31 | *bout = (uint8_t)(val >> 21); 32 | ++bout; 33 | } else { 34 | *bout = (uint8_t)((val & 0x7F) | (1U << 7)); 35 | ++bout; 36 | *bout = (uint8_t)(((val >> 7) & 0x7F) | (1U << 7)); 37 | ++bout; 38 | *bout = (uint8_t)(((val >> 14) & 0x7F) | (1U << 7)); 39 | ++bout; 40 | *bout = (uint8_t)(((val >> 21) & 0x7F) | (1U << 7)); 41 | ++bout; 42 | *bout = (uint8_t)(val >> 28); 43 | ++bout; 44 | } 45 | } 46 | return bout - initbout; 47 | } 48 | 49 | size_t vbyte_encode(const uint32_t *in, size_t length, uint8_t *bout) { 50 | uint8_t *initbout = bout; 51 | for (size_t k = 0; k < length; ++k) { 52 | const uint32_t val = in[k]; 53 | 54 | if (val < (1U << 7)) { 55 | *bout = val & 0x7F; 56 | ++bout; 57 | } else if (val < (1U << 14)) { 58 | *bout = (uint8_t)((val & 0x7F) | (1U << 7)); 59 | ++bout; 60 | *bout = (uint8_t)(val >> 7); 61 | ++bout; 62 | } else if (val < (1U << 21)) { 63 | *bout = (uint8_t)((val & 0x7F) | (1U << 7)); 64 | ++bout; 65 | *bout = (uint8_t)(((val >> 7) & 0x7F) | (1U << 7)); 66 | ++bout; 67 | *bout = (uint8_t)(val >> 14); 68 | ++bout; 69 | } else if (val < (1U << 28)) { 70 | *bout = (uint8_t)((val & 0x7F) | (1U << 7)); 71 | ++bout; 72 | *bout = (uint8_t)(((val >> 7) & 0x7F) | (1U << 7)); 73 | ++bout; 74 | *bout = (uint8_t)(((val >> 14) & 0x7F) | (1U << 7)); 75 | ++bout; 76 | *bout = (uint8_t)(val >> 21); 77 | ++bout; 78 | } else { 79 | *bout = (uint8_t)((val & 0x7F) | (1U << 7)); 80 | ++bout; 81 | *bout = (uint8_t)(((val >> 7) & 0x7F) | (1U << 7)); 82 | ++bout; 83 | *bout = (uint8_t)(((val >> 14) & 0x7F) | (1U << 7)); 84 | ++bout; 85 | *bout = (uint8_t)(((val >> 21) & 0x7F) | (1U << 7)); 86 | ++bout; 87 | *bout = (uint8_t)(val >> 28); 88 | ++bout; 89 | } 90 | } 91 | return bout - initbout; 92 | } 93 | 94 | 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /tests/unit.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "varintencode.h" 5 | #include "varintdecode.h" 6 | 7 | int main() { 8 | int N = 4096; 9 | uint32_t * datain = malloc(N * sizeof(uint32_t)); 10 | uint8_t * compressedbuffer = malloc(2 * N * sizeof(uint32_t)); 11 | uint32_t * recovdata = malloc(N * sizeof(uint32_t)); 12 | 13 | for (int length = 0; length <= N; ) { 14 | printf("length = %d \n",length); 15 | printf("Regular VByte \n"); 16 | for (uint32_t gap = 1; gap <= 387420489; gap *= 3) { 17 | for (int k = 0; k < length; ++k) 18 | datain[k] = gap; 19 | size_t compsize = vbyte_encode(datain, length, compressedbuffer); 20 | size_t usedbytes = masked_vbyte_decode(compressedbuffer, recovdata, 21 | length); 22 | if (compsize != usedbytes) { 23 | printf("[masked_vbyte_decode] code is buggy gap = %d, size mismatch %d %d \n",(int) gap, (int) compsize, (int) usedbytes); 24 | return -1; 25 | } 26 | for (int k = 0; k < length; ++k) { 27 | if (recovdata[k] != datain[k]) { 28 | printf("[masked_vbyte_decode] code is buggy gap = %d\n", (int) gap); 29 | return -1; 30 | } 31 | } 32 | size_t decodedints = masked_vbyte_decode_fromcompressedsize(compressedbuffer, recovdata, 33 | compsize); 34 | if (decodedints != (size_t) length) { 35 | printf("[masked_vbyte_decode_fromcompressedsize] code is buggy gap = %d, size mismatch %d %d \n",(int) gap, (int) compsize, (int) usedbytes); 36 | return -1; 37 | } 38 | for (int k = 0; k < length; ++k) { 39 | if (recovdata[k] != datain[k]) { 40 | printf("[masked_vbyte_decode_fromcompressedsize] code is buggy gap = %d\n",(int) gap); 41 | return -1; 42 | } 43 | } 44 | 45 | } 46 | printf("Delta VByte \n"); 47 | for (size_t gap = 1; gap <= 531441; gap *= 3) { 48 | for (int k = 0; k < length; ++k) 49 | datain[k] = gap * k; 50 | size_t compsize = vbyte_encode_delta(datain, length, compressedbuffer,0); 51 | size_t usedbytes = masked_vbyte_decode_delta(compressedbuffer, recovdata, 52 | length,0); 53 | if (compsize != usedbytes) { 54 | printf("[masked_vbyte_decode_delta] code is buggy gap = %d, size mismatch %d %d \n",(int) gap, (int) compsize, (int) usedbytes); 55 | return -1; 56 | } 57 | for (int k = 0; k < length; ++k) { 58 | if (recovdata[k] != datain[k]) { 59 | printf("[masked_vbyte_decode_delta] code is buggy gap = %d\n", (int) gap); 60 | return -1; 61 | } 62 | } 63 | size_t decodedints = masked_vbyte_decode_fromcompressedsize_delta(compressedbuffer, recovdata, 64 | compsize,0); 65 | if (decodedints != (size_t) length) { 66 | printf("[masked_vbyte_decode_fromcompressedsize_delta] code is buggy gap = %d, size mismatch %d %d \n",(int) gap, (int) compsize, (int) usedbytes); 67 | return -1; 68 | } 69 | for (int k = 0; k < length; ++k) { 70 | if (recovdata[k] != datain[k]) { 71 | printf("[masked_vbyte_decode_fromcompressedsize_delta] code is buggy gap = %d\n",(int) gap); 72 | return -1; 73 | } 74 | } 75 | 76 | } 77 | 78 | 79 | if(length < 128) 80 | ++length; 81 | else { 82 | length *= 2; 83 | } 84 | } 85 | free(datain); 86 | free(compressedbuffer); 87 | free(recovdata); 88 | printf("Code looks good.\n"); 89 | return 0; 90 | } 91 | 92 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.15) 2 | 3 | project(MaskedVByte VERSION 0.0.1 LANGUAGES C) 4 | 5 | include(GNUInstallDirs) 6 | include(CMakePackageConfigHelpers) 7 | include(CheckCCompilerFlag) 8 | 9 | option(MASKEDVBYTE_BUILD_EXAMPLES "Build examples" ON) 10 | option(MASKEDVBYTE_BUILD_TESTS "Build tests" ON) 11 | 12 | # Library 13 | add_library(maskedvbyte 14 | src/varintdecode.c 15 | src/varintencode.c 16 | ) 17 | add_library(maskedvbyte::maskedvbyte ALIAS maskedvbyte) 18 | 19 | # Public headers 20 | set(MASKEDVBYTE_PUBLIC_HEADERS 21 | include/varintdecode.h 22 | include/varintencode.h 23 | ) 24 | 25 | # Properties 26 | set_target_properties(maskedvbyte PROPERTIES 27 | OUTPUT_NAME maskedvbyte 28 | VERSION ${PROJECT_VERSION} 29 | SOVERSION ${PROJECT_VERSION_MAJOR} 30 | POSITION_INDEPENDENT_CODE ON 31 | ) 32 | 33 | target_compile_features(maskedvbyte PUBLIC c_std_99) 34 | 35 | target_include_directories(maskedvbyte 36 | PUBLIC 37 | $ 38 | $ 39 | ) 40 | 41 | # Enable SSE4.1 on GCC/Clang when available 42 | if (CMAKE_C_COMPILER_ID MATCHES "Clang|AppleClang|GNU") 43 | check_c_compiler_flag("-msse4.1" HAS_SSE41_FLAG) 44 | if (HAS_SSE41_FLAG) 45 | target_compile_options(maskedvbyte PRIVATE -msse4.1) 46 | endif() 47 | endif() 48 | 49 | # Examples 50 | if (MASKEDVBYTE_BUILD_EXAMPLES) 51 | add_executable(example examples/example.c) 52 | target_link_libraries(example PRIVATE maskedvbyte) 53 | target_compile_features(example PRIVATE c_std_99) 54 | target_include_directories(example PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include) 55 | endif() 56 | 57 | # Tests 58 | if (MASKEDVBYTE_BUILD_TESTS) 59 | include(CTest) 60 | add_executable(unit tests/unit.c) 61 | target_link_libraries(unit PRIVATE maskedvbyte) 62 | target_compile_features(unit PRIVATE c_std_99) 63 | target_include_directories(unit PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include) 64 | add_test(NAME maskedvbyte_unit COMMAND unit) 65 | endif() 66 | 67 | # Install rules 68 | install(TARGETS maskedvbyte 69 | EXPORT maskedvbyteTargets 70 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} 71 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 72 | RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} 73 | INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} 74 | ) 75 | 76 | install(FILES ${MASKEDVBYTE_PUBLIC_HEADERS} 77 | DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} 78 | ) 79 | 80 | # Package config and export 81 | install(EXPORT maskedvbyteTargets 82 | NAMESPACE maskedvbyte:: 83 | DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/maskedvbyte 84 | ) 85 | 86 | configure_package_config_file( 87 | cmake/maskedvbyteConfig.cmake.in 88 | ${CMAKE_CURRENT_BINARY_DIR}/maskedvbyteConfig.cmake 89 | INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/maskedvbyte 90 | ) 91 | 92 | write_basic_package_version_file( 93 | ${CMAKE_CURRENT_BINARY_DIR}/maskedvbyteConfigVersion.cmake 94 | VERSION ${PROJECT_VERSION} 95 | COMPATIBILITY SameMajorVersion 96 | ) 97 | 98 | install(FILES 99 | ${CMAKE_CURRENT_BINARY_DIR}/maskedvbyteConfig.cmake 100 | ${CMAKE_CURRENT_BINARY_DIR}/maskedvbyteConfigVersion.cmake 101 | DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/maskedvbyte 102 | ) 103 | 104 | # Export from build tree for convenience 105 | export(EXPORT maskedvbyteTargets 106 | NAMESPACE maskedvbyte:: 107 | FILE ${CMAKE_CURRENT_BINARY_DIR}/maskedvbyteTargets.cmake 108 | ) 109 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | MaskedVByte 2 | =========== 3 | [![Ubuntu](https://github.com/fast-pack/MaskedVByte/actions/workflows/ubuntu.yml/badge.svg)](https://github.com/fast-pack/MaskedVByte/actions/workflows/ubuntu.yml) 4 | 5 | Fast, vectorized VByte decoding for 32‑bit integers in C, with optional differential (delta) coding. 6 | 7 | - Requires x86-64 with SSE4.1 (available on virtually all modern x64 CPUs) 8 | - C99 compatible 9 | 10 | 11 | Build and test 12 | -------------- 13 | 14 | ```sh 15 | make # builds the library and the test binary 16 | ./unit # runs a quick correctness test 17 | ``` 18 | 19 | CMake build (alternative) 20 | ------------------------ 21 | 22 | ```sh 23 | mkdir -p build 24 | cmake -S . -B build -DCMAKE_BUILD_TYPE=Release \ 25 | -DMASKEDVBYTE_BUILD_TESTS=ON \ 26 | -DMASKEDVBYTE_BUILD_EXAMPLES=ON 27 | cmake --build build -j 28 | ctest --test-dir build --output-on-failure # optional 29 | 30 | # run the example built by CMake 31 | ./build/example 32 | ``` 33 | 34 | Install with CMake (optional): 35 | 36 | ```sh 37 | cmake --install build --prefix /usr/local 38 | ``` 39 | 40 | Build and run the example 41 | ------------------------- 42 | 43 | ```sh 44 | make example 45 | ./example 46 | ``` 47 | 48 | You should see something like: 49 | 50 | ``` 51 | Compressed 5000 integers down to 5000 bytes. 52 | ``` 53 | 54 | Embedded example, explained 55 | --------------------------- 56 | The example allocates input/output buffers, encodes a flat array of integers with classic VByte, then decodes it back with the masked (vectorized) decoder and verifies the sizes match. 57 | 58 | ```c 59 | #include 60 | #include 61 | #include 62 | 63 | #include "varintencode.h" 64 | #include "varintdecode.h" 65 | 66 | int main() { 67 | int N = 5000; 68 | uint32_t * datain = malloc(N * sizeof(uint32_t)); 69 | uint8_t * compressedbuffer = malloc(N * sizeof(uint32_t)); 70 | uint32_t * recovdata = malloc(N * sizeof(uint32_t)); 71 | for (int k = 0; k < N; ++k) 72 | datain[k] = 120; // constant value fits in one VByte 73 | size_t compsize = vbyte_encode(datain, N, compressedbuffer); // encoding 74 | // result is stored in 'compressedbuffer' using 'compsize' bytes 75 | size_t compsize2 = masked_vbyte_decode(compressedbuffer, recovdata, N); // fast decoding 76 | assert(compsize == compsize2); // sanity check 77 | free(datain); 78 | free(compressedbuffer); 79 | free(recovdata); 80 | printf("Compressed %d integers down to %d bytes.\n", N, (int)compsize); 81 | return 0; 82 | } 83 | ``` 84 | 85 | What’s happening: 86 | - VByte uses a continuation bit; small values like 120 encode to a single byte, so 5000 values compress to 5000 bytes. 87 | - `masked_vbyte_decode` is a vectorized decoder using SSE4.1 for speed. 88 | - Differential coding variants are available when your data is sorted or has small gaps. 89 | 90 | API at a glance 91 | --------------- 92 | Headers are in `include/`. 93 | 94 | - Encoding 95 | - `size_t vbyte_encode(const uint32_t* in, size_t length, uint8_t* bout);` 96 | - `size_t vbyte_encode_delta(const uint32_t* in, size_t length, uint8_t* bout, uint32_t prev);` 97 | 98 | - Decoding 99 | - `size_t masked_vbyte_decode(const uint8_t* in, uint32_t* out, uint64_t length);` 100 | - `size_t masked_vbyte_decode_delta(const uint8_t* in, uint32_t* out, uint64_t length, uint32_t prev);` 101 | - `size_t masked_vbyte_decode_fromcompressedsize(const uint8_t* in, uint32_t* out, size_t inputsize);` 102 | - `size_t masked_vbyte_decode_fromcompressedsize_delta(const uint8_t* in, uint32_t* out, size_t inputsize, uint32_t prev);` 103 | - Random access helpers for delta streams: 104 | - `uint32_t masked_vbyte_select_delta(const uint8_t *in, uint64_t length, uint32_t prev, size_t slot);` 105 | - `int masked_vbyte_search_delta(const uint8_t *in, uint64_t length, uint32_t prev, uint32_t key, uint32_t *presult);` 106 | 107 | Tips 108 | ---- 109 | - Prefer delta coding when your sequence is sorted or has small differences; it often reduces the number of bytes per integer. 110 | - If you know the compressed byte length, use the `*_fromcompressedsize` functions to decode exactly that many bytes. 111 | 112 | 113 | Use from your CMake project 114 | --------------------------- 115 | 116 | After installation (see above): 117 | 118 | ```cmake 119 | find_package(maskedvbyte CONFIG REQUIRED) 120 | target_link_libraries(your_target PRIVATE maskedvbyte::maskedvbyte) 121 | ``` 122 | 123 | Or as a subdirectory (vendored): 124 | 125 | ```cmake 126 | add_subdirectory(path/to/MaskedVByte) 127 | target_link_libraries(your_target PRIVATE maskedvbyte::maskedvbyte) 128 | ``` 129 | 130 | 131 | Interesting applications 132 | ----------------------- 133 | 134 | - [Greg Bowyer has integrated Masked VByte into Lucene, for higher speeds](https://github.com/GregBowyer/lucene-solr/tree/intrinsics). 135 | - Our fast function is also used by [PISA: Performant Indexes and Search for Academia](https://github.com/pisa-engine/pisa). 136 | 137 | Reference 138 | ------------- 139 | 140 | * Daniel Lemire, Nathan Kurz, Christoph Rupp, Stream VByte: Faster Byte-Oriented Integer Compression, Information Processing Letters 130, February 2018, Pages 1-6 https://arxiv.org/abs/1709.08990 141 | * Jeff Plaisance, Nathan Kurz, Daniel Lemire, Vectorized VByte Decoding, International Symposium on Web Algorithms 2015, 2015. http://arxiv.org/abs/1503.07387 142 | 143 | 144 | See also 145 | ------------ 146 | 147 | * SIMDCompressionAndIntersection: A C++ library to compress and intersect sorted lists of integers using SIMD instructions https://github.com/lemire/SIMDCompressionAndIntersection 148 | * The FastPFOR C++ library : Fast integer compression https://github.com/lemire/FastPFor 149 | * High-performance dictionary coding https://github.com/lemire/dictionary 150 | * LittleIntPacker: C library to pack and unpack short arrays of integers as fast as possible https://github.com/lemire/LittleIntPacker 151 | * The SIMDComp library: A simple C library for compressing lists of integers using binary packing https://github.com/lemire/simdcomp 152 | * StreamVByte: Fast integer compression in C using the StreamVByte codec https://github.com/lemire/streamvbyte 153 | * CSharpFastPFOR: A C# integer compression library https://github.com/Genbox/CSharpFastPFOR 154 | * JavaFastPFOR: A java integer compression library https://github.com/lemire/JavaFastPFOR 155 | * Encoding: Integer Compression Libraries for Go https://github.com/zhenjl/encoding 156 | * FrameOfReference is a C++ library dedicated to frame-of-reference (FOR) compression: https://github.com/lemire/FrameOfReference 157 | * libvbyte: A fast implementation for varbyte 32bit/64bit integer compression https://github.com/cruppstahl/libvbyte 158 | * TurboPFor is a C library that offers lots of interesting optimizations. Well worth checking! (GPL license) https://github.com/powturbo/TurboPFor 159 | * Oroch is a C++ library that offers a usable API (MIT license) https://github.com/ademakov/Oroch 160 | 161 | 162 | License 163 | ------- 164 | See `LICENSE` for details. 165 | 166 | 167 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /src/varintdecode.c: -------------------------------------------------------------------------------- 1 | #include "varintdecode.h" 2 | 3 | #if defined(_MSC_VER) 4 | #define ALIGNED(x) __declspec(align(x)) 5 | #else 6 | #if defined(__GNUC__) 7 | #define ALIGNED(x) __attribute__ ((aligned(x))) 8 | #endif 9 | #endif 10 | 11 | 12 | #if defined(_MSC_VER) 13 | # include 14 | /* 64-bit needs extending */ 15 | # define SIMDCOMP_CTZ(result, mask) do { \ 16 | unsigned long index; \ 17 | if (!_BitScanForward(&(index), (mask))) { \ 18 | (result) = 32U; \ 19 | } else { \ 20 | (result) = (uint32_t)(index); \ 21 | } \ 22 | } while (0) 23 | #else 24 | #include 25 | # define SIMDCOMP_CTZ(result, mask) \ 26 | result = __builtin_ctz(mask) 27 | #endif 28 | 29 | typedef struct index_bytes_consumed { 30 | uint8_t index; 31 | uint8_t bytes_consumed; 32 | } index_bytes_consumed; 33 | 34 | static const index_bytes_consumed ALIGNED(0x1000) combined_lookup[] = { 35 | {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, 36 | {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, 37 | {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, 38 | {56, 9}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, 39 | {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, 40 | {119, 7}, {10, 8}, {52, 9}, {83, 7}, {160, 5}, {6, 8}, {44, 9}, 41 | {28, 9}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, 42 | {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, 43 | {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, 44 | {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {50, 9}, 45 | {82, 6}, {160, 5}, {5, 8}, {42, 9}, {26, 9}, {128, 8}, {70, 6}, 46 | {110, 8}, {148, 5}, {165, 6}, {3, 8}, {38, 9}, {22, 9}, {122, 8}, 47 | {14, 9}, {60, 10}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, 48 | {139, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, 49 | {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, 50 | {96, 8}, {78, 8}, {133, 9}, {72, 8}, {115, 9}, {148, 5}, {167, 8}, 51 | {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, 52 | {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, 53 | {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, 54 | {49, 9}, {82, 6}, {160, 5}, {4, 7}, {41, 9}, {25, 9}, {127, 7}, 55 | {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {37, 9}, {21, 9}, 56 | {121, 7}, {13, 9}, {58, 10}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, 57 | {79, 7}, {137, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, 58 | {35, 9}, {19, 9}, {119, 7}, {11, 9}, {54, 10}, {83, 7}, {160, 5}, 59 | {7, 9}, {46, 10}, {30, 10}, {131, 9}, {71, 7}, {113, 9}, {148, 5}, 60 | {166, 7}, {64, 4}, {93, 7}, {75, 7}, {125, 9}, {69, 7}, {107, 9}, 61 | {89, 9}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, 62 | {154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6}, 63 | {67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6}, 64 | {129, 9}, {70, 6}, {111, 9}, {148, 5}, {165, 6}, {64, 4}, {92, 6}, 65 | {74, 6}, {123, 9}, {68, 6}, {105, 9}, {87, 9}, {161, 6}, {66, 6}, 66 | {99, 9}, {81, 9}, {142, 10}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, 67 | {64, 4}, {91, 5}, {73, 5}, {155, 4}, {67, 5}, {151, 4}, {147, 4}, 68 | {160, 5}, {65, 5}, {150, 3}, {146, 3}, {156, 5}, {145, 2}, {152, 5}, 69 | {148, 5}, {168, 9}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, 70 | {151, 4}, {147, 4}, {164, 9}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, 71 | {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, 72 | {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, 73 | {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, 74 | {36, 8}, {20, 8}, {121, 7}, {12, 8}, {57, 10}, {85, 7}, {161, 6}, 75 | {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, 76 | {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {53, 10}, 77 | {83, 7}, {160, 5}, {6, 8}, {45, 10}, {29, 10}, {130, 8}, {71, 7}, 78 | {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, 79 | {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, 80 | {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, 81 | {17, 8}, {118, 6}, {9, 8}, {51, 10}, {82, 6}, {160, 5}, {5, 8}, 82 | {43, 10}, {27, 10}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, 83 | {3, 8}, {39, 10}, {23, 10}, {122, 8}, {15, 10}, {62, 11}, {86, 8}, 84 | {161, 6}, {66, 6}, {98, 8}, {80, 8}, {140, 10}, {145, 2}, {153, 6}, 85 | {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, 86 | {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {134, 10}, 87 | {72, 8}, {116, 10}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, 88 | {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, 89 | {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, 90 | {32, 7}, {16, 7}, {118, 6}, {8, 7}, {100, 6}, {82, 6}, {160, 5}, 91 | {4, 7}, {94, 6}, {76, 6}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, 92 | {165, 6}, {2, 7}, {92, 6}, {74, 6}, {121, 7}, {68, 6}, {103, 7}, 93 | {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {138, 10}, {145, 2}, 94 | {153, 6}, {149, 6}, {0, 0}, {1, 7}, {91, 5}, {73, 5}, {119, 7}, 95 | {67, 5}, {101, 7}, {83, 7}, {160, 5}, {65, 5}, {95, 7}, {77, 7}, 96 | {132, 10}, {71, 7}, {114, 10}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, 97 | {75, 7}, {126, 10}, {69, 7}, {108, 10}, {90, 10}, {162, 7}, {145, 2}, 98 | {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, 99 | {0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6}, 100 | {160, 5}, {65, 5}, {94, 6}, {76, 6}, {156, 5}, {70, 6}, {152, 5}, 101 | {148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6}, {155, 4}, {68, 6}, 102 | {151, 4}, {147, 4}, {161, 6}, {66, 6}, {150, 3}, {146, 3}, {157, 6}, 103 | {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, 104 | {155, 4}, {67, 5}, {151, 4}, {147, 4}, {160, 5}, {65, 5}, {150, 3}, 105 | {146, 3}, {156, 5}, {145, 2}, {152, 5}, {148, 5}, {169, 10}, {0, 4}, 106 | {0, 3}, {0, 3}, {0, 4}, {0, 2}, {0, 4}, {0, 4}, {0, 0}, 107 | {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, 108 | {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, 109 | {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, 110 | {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, 111 | {12, 8}, {56, 9}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, 112 | {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, 113 | {18, 8}, {119, 7}, {10, 8}, {52, 9}, {83, 7}, {160, 5}, {6, 8}, 114 | {44, 9}, {28, 9}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, 115 | {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, 116 | {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, 117 | {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, 118 | {50, 9}, {82, 6}, {160, 5}, {5, 8}, {42, 9}, {26, 9}, {128, 8}, 119 | {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, {38, 9}, {22, 9}, 120 | {122, 8}, {14, 9}, {61, 11}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, 121 | {80, 8}, {139, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, 122 | {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, 123 | {65, 5}, {96, 8}, {78, 8}, {133, 9}, {72, 8}, {115, 9}, {148, 5}, 124 | {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, 125 | {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, 126 | {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, 127 | {8, 7}, {49, 9}, {82, 6}, {160, 5}, {4, 7}, {41, 9}, {25, 9}, 128 | {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {37, 9}, 129 | {21, 9}, {121, 7}, {13, 9}, {59, 11}, {85, 7}, {161, 6}, {66, 6}, 130 | {97, 7}, {79, 7}, {137, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, 131 | {1, 7}, {35, 9}, {19, 9}, {119, 7}, {11, 9}, {55, 11}, {83, 7}, 132 | {160, 5}, {7, 9}, {47, 11}, {31, 11}, {131, 9}, {71, 7}, {113, 9}, 133 | {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {125, 9}, {69, 7}, 134 | {107, 9}, {89, 9}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, 135 | {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5}, 136 | {118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6}, 137 | {76, 6}, {129, 9}, {70, 6}, {111, 9}, {148, 5}, {165, 6}, {64, 4}, 138 | {92, 6}, {74, 6}, {123, 9}, {68, 6}, {105, 9}, {87, 9}, {161, 6}, 139 | {66, 6}, {99, 9}, {81, 9}, {143, 11}, {145, 2}, {153, 6}, {149, 6}, 140 | {0, 0}, {64, 4}, {91, 5}, {73, 5}, {155, 4}, {67, 5}, {151, 4}, 141 | {147, 4}, {160, 5}, {65, 5}, {150, 3}, {146, 3}, {156, 5}, {145, 2}, 142 | {152, 5}, {148, 5}, {168, 9}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, 143 | {145, 2}, {151, 4}, {147, 4}, {164, 9}, {0, 2}, {0, 3}, {0, 3}, 144 | {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, 145 | {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, 146 | {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, 147 | {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, {103, 7}, {85, 7}, 148 | {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, 149 | {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, 150 | {101, 7}, {83, 7}, {160, 5}, {6, 8}, {95, 7}, {77, 7}, {130, 8}, 151 | {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, 152 | {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, 153 | {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, 154 | {33, 8}, {17, 8}, {118, 6}, {9, 8}, {100, 6}, {82, 6}, {160, 5}, 155 | {5, 8}, {94, 6}, {76, 6}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, 156 | {165, 6}, {3, 8}, {92, 6}, {74, 6}, {122, 8}, {68, 6}, {104, 8}, 157 | {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, {141, 11}, {145, 2}, 158 | {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, 159 | {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, 160 | {135, 11}, {72, 8}, {117, 11}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, 161 | {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, 162 | {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, 163 | {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {100, 6}, {82, 6}, 164 | {160, 5}, {4, 7}, {94, 6}, {76, 6}, {127, 7}, {70, 6}, {109, 7}, 165 | {148, 5}, {165, 6}, {2, 7}, {92, 6}, {74, 6}, {121, 7}, {68, 6}, 166 | {103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {157, 6}, 167 | {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {91, 5}, {73, 5}, 168 | {119, 7}, {67, 5}, {101, 7}, {83, 7}, {160, 5}, {65, 5}, {95, 7}, 169 | {77, 7}, {156, 5}, {71, 7}, {152, 5}, {148, 5}, {166, 7}, {64, 4}, 170 | {93, 7}, {75, 7}, {155, 4}, {69, 7}, {151, 4}, {147, 4}, {162, 7}, 171 | {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, 172 | {0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6}, 173 | {82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6}, {156, 5}, {70, 6}, 174 | {152, 5}, {148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6}, {155, 4}, 175 | {68, 6}, {151, 4}, {147, 4}, {161, 6}, {66, 6}, {150, 3}, {146, 3}, 176 | {157, 6}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {0, 4}, {0, 5}, 177 | {0, 5}, {0, 4}, {0, 5}, {0, 4}, {0, 4}, {0, 5}, {0, 5}, 178 | {0, 3}, {0, 3}, {0, 5}, {0, 2}, {0, 5}, {0, 5}, {0, 0}, 179 | {0, 4}, {0, 3}, {0, 3}, {0, 4}, {0, 2}, {0, 4}, {0, 4}, 180 | {0, 0}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, 181 | {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, 182 | {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, 183 | {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, 184 | {121, 7}, {12, 8}, {56, 9}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, 185 | {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, 186 | {34, 8}, {18, 8}, {119, 7}, {10, 8}, {52, 9}, {83, 7}, {160, 5}, 187 | {6, 8}, {44, 9}, {28, 9}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, 188 | {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, 189 | {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, 190 | {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, 191 | {9, 8}, {50, 9}, {82, 6}, {160, 5}, {5, 8}, {42, 9}, {26, 9}, 192 | {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, {38, 9}, 193 | {22, 9}, {122, 8}, {14, 9}, {60, 10}, {86, 8}, {161, 6}, {66, 6}, 194 | {98, 8}, {80, 8}, {139, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, 195 | {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, 196 | {160, 5}, {65, 5}, {96, 8}, {78, 8}, {133, 9}, {72, 8}, {115, 9}, 197 | {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, 198 | {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, 199 | {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, 200 | {118, 6}, {8, 7}, {49, 9}, {82, 6}, {160, 5}, {4, 7}, {41, 9}, 201 | {25, 9}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, 202 | {37, 9}, {21, 9}, {121, 7}, {13, 9}, {58, 10}, {85, 7}, {161, 6}, 203 | {66, 6}, {97, 7}, {79, 7}, {137, 9}, {145, 2}, {153, 6}, {149, 6}, 204 | {0, 0}, {1, 7}, {35, 9}, {19, 9}, {119, 7}, {11, 9}, {54, 10}, 205 | {83, 7}, {160, 5}, {7, 9}, {46, 10}, {30, 10}, {131, 9}, {71, 7}, 206 | {113, 9}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {125, 9}, 207 | {69, 7}, {107, 9}, {89, 9}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, 208 | {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5}, 209 | {73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5}, 210 | {94, 6}, {76, 6}, {129, 9}, {70, 6}, {111, 9}, {148, 5}, {165, 6}, 211 | {64, 4}, {92, 6}, {74, 6}, {123, 9}, {68, 6}, {105, 9}, {87, 9}, 212 | {161, 6}, {66, 6}, {99, 9}, {81, 9}, {142, 10}, {145, 2}, {153, 6}, 213 | {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {155, 4}, {67, 5}, 214 | {151, 4}, {147, 4}, {160, 5}, {65, 5}, {150, 3}, {146, 3}, {156, 5}, 215 | {145, 2}, {152, 5}, {148, 5}, {168, 9}, {64, 4}, {150, 3}, {146, 3}, 216 | {155, 4}, {145, 2}, {151, 4}, {147, 4}, {164, 9}, {0, 2}, {0, 3}, 217 | {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, 218 | {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, 219 | {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, 220 | {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, {57, 10}, 221 | {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, 222 | {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, 223 | {10, 8}, {53, 10}, {83, 7}, {160, 5}, {6, 8}, {45, 10}, {29, 10}, 224 | {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, 225 | {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, 226 | {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, 227 | {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {51, 10}, {82, 6}, 228 | {160, 5}, {5, 8}, {43, 10}, {27, 10}, {128, 8}, {70, 6}, {110, 8}, 229 | {148, 5}, {165, 6}, {3, 8}, {39, 10}, {23, 10}, {122, 8}, {15, 10}, 230 | {63, 12}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, {140, 10}, 231 | {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, 232 | {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, 233 | {78, 8}, {134, 10}, {72, 8}, {116, 10}, {148, 5}, {167, 8}, {64, 4}, 234 | {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, 235 | {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, 236 | {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {100, 6}, 237 | {82, 6}, {160, 5}, {4, 7}, {94, 6}, {76, 6}, {127, 7}, {70, 6}, 238 | {109, 7}, {148, 5}, {165, 6}, {2, 7}, {92, 6}, {74, 6}, {121, 7}, 239 | {68, 6}, {103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, 240 | {138, 10}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {91, 5}, 241 | {73, 5}, {119, 7}, {67, 5}, {101, 7}, {83, 7}, {160, 5}, {65, 5}, 242 | {95, 7}, {77, 7}, {132, 10}, {71, 7}, {114, 10}, {148, 5}, {166, 7}, 243 | {64, 4}, {93, 7}, {75, 7}, {126, 10}, {69, 7}, {108, 10}, {90, 10}, 244 | {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, 245 | {0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5}, 246 | {100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6}, {156, 5}, 247 | {70, 6}, {152, 5}, {148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6}, 248 | {155, 4}, {68, 6}, {151, 4}, {147, 4}, {161, 6}, {66, 6}, {150, 3}, 249 | {146, 3}, {157, 6}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, 250 | {91, 5}, {73, 5}, {155, 4}, {67, 5}, {151, 4}, {147, 4}, {160, 5}, 251 | {65, 5}, {150, 3}, {146, 3}, {156, 5}, {145, 2}, {152, 5}, {148, 5}, 252 | {169, 10}, {0, 4}, {0, 3}, {0, 3}, {0, 4}, {0, 2}, {0, 4}, 253 | {0, 4}, {0, 0}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, 254 | {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, 255 | {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, 256 | {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, 257 | {20, 8}, {121, 7}, {12, 8}, {56, 9}, {85, 7}, {161, 6}, {66, 6}, 258 | {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, 259 | {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {52, 9}, {83, 7}, 260 | {160, 5}, {6, 8}, {44, 9}, {28, 9}, {130, 8}, {71, 7}, {112, 8}, 261 | {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, 262 | {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, 263 | {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, 264 | {118, 6}, {9, 8}, {50, 9}, {82, 6}, {160, 5}, {5, 8}, {42, 9}, 265 | {26, 9}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, 266 | {38, 9}, {22, 9}, {122, 8}, {14, 9}, {104, 8}, {86, 8}, {161, 6}, 267 | {66, 6}, {98, 8}, {80, 8}, {139, 9}, {145, 2}, {153, 6}, {149, 6}, 268 | {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, 269 | {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {133, 9}, {72, 8}, 270 | {115, 9}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, 271 | {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, 272 | {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, 273 | {16, 7}, {118, 6}, {8, 7}, {49, 9}, {82, 6}, {160, 5}, {4, 7}, 274 | {41, 9}, {25, 9}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, 275 | {2, 7}, {37, 9}, {21, 9}, {121, 7}, {13, 9}, {103, 7}, {85, 7}, 276 | {161, 6}, {66, 6}, {97, 7}, {79, 7}, {137, 9}, {145, 2}, {153, 6}, 277 | {149, 6}, {0, 0}, {1, 7}, {35, 9}, {19, 9}, {119, 7}, {11, 9}, 278 | {101, 7}, {83, 7}, {160, 5}, {7, 9}, {95, 7}, {77, 7}, {131, 9}, 279 | {71, 7}, {113, 9}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, 280 | {125, 9}, {69, 7}, {107, 9}, {89, 9}, {162, 7}, {145, 2}, {150, 3}, 281 | {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, 282 | {91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5}, 283 | {65, 5}, {94, 6}, {76, 6}, {129, 9}, {70, 6}, {111, 9}, {148, 5}, 284 | {165, 6}, {64, 4}, {92, 6}, {74, 6}, {123, 9}, {68, 6}, {105, 9}, 285 | {87, 9}, {161, 6}, {66, 6}, {99, 9}, {81, 9}, {144, 12}, {145, 2}, 286 | {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {155, 4}, 287 | {67, 5}, {151, 4}, {147, 4}, {160, 5}, {65, 5}, {150, 3}, {146, 3}, 288 | {156, 5}, {145, 2}, {152, 5}, {148, 5}, {168, 9}, {64, 4}, {150, 3}, 289 | {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {164, 9}, {0, 2}, 290 | {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, 291 | {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, 292 | {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, 293 | {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, 294 | {103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, 295 | {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, 296 | {119, 7}, {10, 8}, {101, 7}, {83, 7}, {160, 5}, {6, 8}, {95, 7}, 297 | {77, 7}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, 298 | {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, 299 | {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, 300 | {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {100, 6}, 301 | {82, 6}, {160, 5}, {5, 8}, {94, 6}, {76, 6}, {128, 8}, {70, 6}, 302 | {110, 8}, {148, 5}, {165, 6}, {3, 8}, {92, 6}, {74, 6}, {122, 8}, 303 | {68, 6}, {104, 8}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, 304 | {157, 6}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, 305 | {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, 306 | {96, 8}, {78, 8}, {156, 5}, {72, 8}, {152, 5}, {148, 5}, {167, 8}, 307 | {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, 308 | {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, 309 | {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, 310 | {100, 6}, {82, 6}, {160, 5}, {4, 7}, {94, 6}, {76, 6}, {127, 7}, 311 | {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {92, 6}, {74, 6}, 312 | {121, 7}, {68, 6}, {103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, 313 | {79, 7}, {157, 6}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, 314 | {91, 5}, {73, 5}, {119, 7}, {67, 5}, {101, 7}, {83, 7}, {160, 5}, 315 | {65, 5}, {95, 7}, {77, 7}, {156, 5}, {71, 7}, {152, 5}, {148, 5}, 316 | {166, 7}, {64, 4}, {93, 7}, {75, 7}, {155, 4}, {69, 7}, {151, 4}, 317 | {147, 4}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, 318 | {154, 7}, {0, 0}, {0, 0}, {0, 6}, {0, 5}, {0, 5}, {0, 6}, 319 | {0, 5}, {0, 6}, {0, 6}, {0, 5}, {0, 5}, {0, 6}, {0, 6}, 320 | {0, 5}, {0, 6}, {0, 5}, {0, 5}, {0, 6}, {0, 4}, {0, 6}, 321 | {0, 6}, {0, 4}, {0, 6}, {0, 4}, {0, 4}, {0, 6}, {0, 6}, 322 | {0, 3}, {0, 3}, {0, 6}, {0, 2}, {0, 6}, {0, 6}, {0, 0}, 323 | {0, 4}, {0, 5}, {0, 5}, {0, 4}, {0, 5}, {0, 4}, {0, 4}, 324 | {0, 5}, {0, 5}, {0, 3}, {0, 3}, {0, 5}, {0, 2}, {0, 5}, 325 | {0, 5}, {0, 0}, {0, 4}, {0, 3}, {0, 3}, {0, 4}, {0, 2}, 326 | {0, 4}, {0, 4}, {0, 0}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, 327 | {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, 328 | {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, 329 | {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, 330 | {36, 8}, {20, 8}, {121, 7}, {12, 8}, {56, 9}, {85, 7}, {161, 6}, 331 | {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, 332 | {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {52, 9}, 333 | {83, 7}, {160, 5}, {6, 8}, {44, 9}, {28, 9}, {130, 8}, {71, 7}, 334 | {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, 335 | {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, 336 | {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, 337 | {17, 8}, {118, 6}, {9, 8}, {50, 9}, {82, 6}, {160, 5}, {5, 8}, 338 | {42, 9}, {26, 9}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, 339 | {3, 8}, {38, 9}, {22, 9}, {122, 8}, {14, 9}, {60, 10}, {86, 8}, 340 | {161, 6}, {66, 6}, {98, 8}, {80, 8}, {139, 9}, {145, 2}, {153, 6}, 341 | {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, 342 | {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {133, 9}, 343 | {72, 8}, {115, 9}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, 344 | {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, 345 | {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, 346 | {32, 7}, {16, 7}, {118, 6}, {8, 7}, {49, 9}, {82, 6}, {160, 5}, 347 | {4, 7}, {41, 9}, {25, 9}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, 348 | {165, 6}, {2, 7}, {37, 9}, {21, 9}, {121, 7}, {13, 9}, {58, 10}, 349 | {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {137, 9}, {145, 2}, 350 | {153, 6}, {149, 6}, {0, 0}, {1, 7}, {35, 9}, {19, 9}, {119, 7}, 351 | {11, 9}, {54, 10}, {83, 7}, {160, 5}, {7, 9}, {46, 10}, {30, 10}, 352 | {131, 9}, {71, 7}, {113, 9}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, 353 | {75, 7}, {125, 9}, {69, 7}, {107, 9}, {89, 9}, {162, 7}, {145, 2}, 354 | {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, 355 | {0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6}, 356 | {160, 5}, {65, 5}, {94, 6}, {76, 6}, {129, 9}, {70, 6}, {111, 9}, 357 | {148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6}, {123, 9}, {68, 6}, 358 | {105, 9}, {87, 9}, {161, 6}, {66, 6}, {99, 9}, {81, 9}, {142, 10}, 359 | {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, 360 | {155, 4}, {67, 5}, {151, 4}, {147, 4}, {160, 5}, {65, 5}, {150, 3}, 361 | {146, 3}, {156, 5}, {145, 2}, {152, 5}, {148, 5}, {168, 9}, {64, 4}, 362 | {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {164, 9}, 363 | {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, 364 | {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, 365 | {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, 366 | {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, 367 | {12, 8}, {57, 10}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, 368 | {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, 369 | {18, 8}, {119, 7}, {10, 8}, {53, 10}, {83, 7}, {160, 5}, {6, 8}, 370 | {45, 10}, {29, 10}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, 371 | {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, 372 | {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, 373 | {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, 374 | {51, 10}, {82, 6}, {160, 5}, {5, 8}, {43, 10}, {27, 10}, {128, 8}, 375 | {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, {39, 10}, {23, 10}, 376 | {122, 8}, {15, 10}, {62, 11}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, 377 | {80, 8}, {140, 10}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, 378 | {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, 379 | {65, 5}, {96, 8}, {78, 8}, {134, 10}, {72, 8}, {116, 10}, {148, 5}, 380 | {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, 381 | {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, 382 | {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, 383 | {8, 7}, {100, 6}, {82, 6}, {160, 5}, {4, 7}, {94, 6}, {76, 6}, 384 | {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {92, 6}, 385 | {74, 6}, {121, 7}, {68, 6}, {103, 7}, {85, 7}, {161, 6}, {66, 6}, 386 | {97, 7}, {79, 7}, {138, 10}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, 387 | {1, 7}, {91, 5}, {73, 5}, {119, 7}, {67, 5}, {101, 7}, {83, 7}, 388 | {160, 5}, {65, 5}, {95, 7}, {77, 7}, {132, 10}, {71, 7}, {114, 10}, 389 | {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {126, 10}, {69, 7}, 390 | {108, 10}, {90, 10}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, 391 | {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5}, 392 | {118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6}, 393 | {76, 6}, {156, 5}, {70, 6}, {152, 5}, {148, 5}, {165, 6}, {64, 4}, 394 | {92, 6}, {74, 6}, {155, 4}, {68, 6}, {151, 4}, {147, 4}, {161, 6}, 395 | {66, 6}, {150, 3}, {146, 3}, {157, 6}, {145, 2}, {153, 6}, {149, 6}, 396 | {0, 0}, {64, 4}, {91, 5}, {73, 5}, {155, 4}, {67, 5}, {151, 4}, 397 | {147, 4}, {160, 5}, {65, 5}, {150, 3}, {146, 3}, {156, 5}, {145, 2}, 398 | {152, 5}, {148, 5}, {169, 10}, {0, 4}, {0, 3}, {0, 3}, {0, 4}, 399 | {0, 2}, {0, 4}, {0, 4}, {0, 0}, {0, 2}, {0, 3}, {0, 3}, 400 | {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, 401 | {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, 402 | {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, 403 | {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, {56, 9}, {85, 7}, 404 | {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, 405 | {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, 406 | {52, 9}, {83, 7}, {160, 5}, {6, 8}, {44, 9}, {28, 9}, {130, 8}, 407 | {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, 408 | {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, 409 | {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, 410 | {33, 8}, {17, 8}, {118, 6}, {9, 8}, {50, 9}, {82, 6}, {160, 5}, 411 | {5, 8}, {42, 9}, {26, 9}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, 412 | {165, 6}, {3, 8}, {38, 9}, {22, 9}, {122, 8}, {14, 9}, {61, 11}, 413 | {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, {139, 9}, {145, 2}, 414 | {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, 415 | {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, 416 | {133, 9}, {72, 8}, {115, 9}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, 417 | {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, 418 | {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, 419 | {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {49, 9}, {82, 6}, 420 | {160, 5}, {4, 7}, {41, 9}, {25, 9}, {127, 7}, {70, 6}, {109, 7}, 421 | {148, 5}, {165, 6}, {2, 7}, {37, 9}, {21, 9}, {121, 7}, {13, 9}, 422 | {59, 11}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {137, 9}, 423 | {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {35, 9}, {19, 9}, 424 | {119, 7}, {11, 9}, {55, 11}, {83, 7}, {160, 5}, {7, 9}, {47, 11}, 425 | {31, 11}, {131, 9}, {71, 7}, {113, 9}, {148, 5}, {166, 7}, {64, 4}, 426 | {93, 7}, {75, 7}, {125, 9}, {69, 7}, {107, 9}, {89, 9}, {162, 7}, 427 | {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, 428 | {0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6}, 429 | {82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6}, {129, 9}, {70, 6}, 430 | {111, 9}, {148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6}, {123, 9}, 431 | {68, 6}, {105, 9}, {87, 9}, {161, 6}, {66, 6}, {99, 9}, {81, 9}, 432 | {143, 11}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, 433 | {73, 5}, {155, 4}, {67, 5}, {151, 4}, {147, 4}, {160, 5}, {65, 5}, 434 | {150, 3}, {146, 3}, {156, 5}, {145, 2}, {152, 5}, {148, 5}, {168, 9}, 435 | {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, 436 | {164, 9}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, 437 | {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, 438 | {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, 439 | {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, 440 | {121, 7}, {12, 8}, {103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, 441 | {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, 442 | {34, 8}, {18, 8}, {119, 7}, {10, 8}, {101, 7}, {83, 7}, {160, 5}, 443 | {6, 8}, {95, 7}, {77, 7}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, 444 | {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, 445 | {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, 446 | {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, 447 | {9, 8}, {100, 6}, {82, 6}, {160, 5}, {5, 8}, {94, 6}, {76, 6}, 448 | {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, {92, 6}, 449 | {74, 6}, {122, 8}, {68, 6}, {104, 8}, {86, 8}, {161, 6}, {66, 6}, 450 | {98, 8}, {80, 8}, {141, 11}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, 451 | {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, 452 | {160, 5}, {65, 5}, {96, 8}, {78, 8}, {135, 11}, {72, 8}, {117, 11}, 453 | {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, 454 | {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, 455 | {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, 456 | {118, 6}, {8, 7}, {100, 6}, {82, 6}, {160, 5}, {4, 7}, {94, 6}, 457 | {76, 6}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, 458 | {92, 6}, {74, 6}, {121, 7}, {68, 6}, {103, 7}, {85, 7}, {161, 6}, 459 | {66, 6}, {97, 7}, {79, 7}, {157, 6}, {145, 2}, {153, 6}, {149, 6}, 460 | {0, 0}, {1, 7}, {91, 5}, {73, 5}, {119, 7}, {67, 5}, {101, 7}, 461 | {83, 7}, {160, 5}, {65, 5}, {95, 7}, {77, 7}, {156, 5}, {71, 7}, 462 | {152, 5}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {155, 4}, 463 | {69, 7}, {151, 4}, {147, 4}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, 464 | {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5}, 465 | {73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5}, 466 | {94, 6}, {76, 6}, {156, 5}, {70, 6}, {152, 5}, {148, 5}, {165, 6}, 467 | {64, 4}, {92, 6}, {74, 6}, {155, 4}, {68, 6}, {151, 4}, {147, 4}, 468 | {161, 6}, {66, 6}, {150, 3}, {146, 3}, {157, 6}, {145, 2}, {153, 6}, 469 | {149, 6}, {0, 0}, {0, 4}, {0, 5}, {0, 5}, {0, 4}, {0, 5}, 470 | {0, 4}, {0, 4}, {0, 5}, {0, 5}, {0, 3}, {0, 3}, {0, 5}, 471 | {0, 2}, {0, 5}, {0, 5}, {0, 0}, {0, 4}, {0, 3}, {0, 3}, 472 | {0, 4}, {0, 2}, {0, 4}, {0, 4}, {0, 0}, {0, 2}, {0, 3}, 473 | {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, 474 | {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, 475 | {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, 476 | {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, {56, 9}, 477 | {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, 478 | {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, 479 | {10, 8}, {52, 9}, {83, 7}, {160, 5}, {6, 8}, {44, 9}, {28, 9}, 480 | {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, 481 | {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, 482 | {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, 483 | {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {50, 9}, {82, 6}, 484 | {160, 5}, {5, 8}, {42, 9}, {26, 9}, {128, 8}, {70, 6}, {110, 8}, 485 | {148, 5}, {165, 6}, {3, 8}, {38, 9}, {22, 9}, {122, 8}, {14, 9}, 486 | {60, 10}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, {139, 9}, 487 | {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, 488 | {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, 489 | {78, 8}, {133, 9}, {72, 8}, {115, 9}, {148, 5}, {167, 8}, {64, 4}, 490 | {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, 491 | {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, 492 | {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {49, 9}, 493 | {82, 6}, {160, 5}, {4, 7}, {41, 9}, {25, 9}, {127, 7}, {70, 6}, 494 | {109, 7}, {148, 5}, {165, 6}, {2, 7}, {37, 9}, {21, 9}, {121, 7}, 495 | {13, 9}, {58, 10}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, 496 | {137, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {35, 9}, 497 | {19, 9}, {119, 7}, {11, 9}, {54, 10}, {83, 7}, {160, 5}, {7, 9}, 498 | {46, 10}, {30, 10}, {131, 9}, {71, 7}, {113, 9}, {148, 5}, {166, 7}, 499 | {64, 4}, {93, 7}, {75, 7}, {125, 9}, {69, 7}, {107, 9}, {89, 9}, 500 | {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, 501 | {0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5}, 502 | {100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6}, {129, 9}, 503 | {70, 6}, {111, 9}, {148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6}, 504 | {123, 9}, {68, 6}, {105, 9}, {87, 9}, {161, 6}, {66, 6}, {99, 9}, 505 | {81, 9}, {142, 10}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, 506 | {91, 5}, {73, 5}, {155, 4}, {67, 5}, {151, 4}, {147, 4}, {160, 5}, 507 | {65, 5}, {150, 3}, {146, 3}, {156, 5}, {145, 2}, {152, 5}, {148, 5}, 508 | {168, 9}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, 509 | {147, 4}, {164, 9}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, 510 | {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, 511 | {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, 512 | {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, 513 | {20, 8}, {121, 7}, {12, 8}, {57, 10}, {85, 7}, {161, 6}, {66, 6}, 514 | {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, 515 | {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {53, 10}, {83, 7}, 516 | {160, 5}, {6, 8}, {45, 10}, {29, 10}, {130, 8}, {71, 7}, {112, 8}, 517 | {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, 518 | {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, 519 | {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, 520 | {118, 6}, {9, 8}, {51, 10}, {82, 6}, {160, 5}, {5, 8}, {43, 10}, 521 | {27, 10}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, 522 | {39, 10}, {23, 10}, {122, 8}, {15, 10}, {104, 8}, {86, 8}, {161, 6}, 523 | {66, 6}, {98, 8}, {80, 8}, {140, 10}, {145, 2}, {153, 6}, {149, 6}, 524 | {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, 525 | {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {134, 10}, {72, 8}, 526 | {116, 10}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, 527 | {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, 528 | {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, 529 | {16, 7}, {118, 6}, {8, 7}, {100, 6}, {82, 6}, {160, 5}, {4, 7}, 530 | {94, 6}, {76, 6}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, 531 | {2, 7}, {92, 6}, {74, 6}, {121, 7}, {68, 6}, {103, 7}, {85, 7}, 532 | {161, 6}, {66, 6}, {97, 7}, {79, 7}, {138, 10}, {145, 2}, {153, 6}, 533 | {149, 6}, {0, 0}, {1, 7}, {91, 5}, {73, 5}, {119, 7}, {67, 5}, 534 | {101, 7}, {83, 7}, {160, 5}, {65, 5}, {95, 7}, {77, 7}, {132, 10}, 535 | {71, 7}, {114, 10}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, 536 | {126, 10}, {69, 7}, {108, 10}, {90, 10}, {162, 7}, {145, 2}, {150, 3}, 537 | {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, 538 | {91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5}, 539 | {65, 5}, {94, 6}, {76, 6}, {156, 5}, {70, 6}, {152, 5}, {148, 5}, 540 | {165, 6}, {64, 4}, {92, 6}, {74, 6}, {155, 4}, {68, 6}, {151, 4}, 541 | {147, 4}, {161, 6}, {66, 6}, {150, 3}, {146, 3}, {157, 6}, {145, 2}, 542 | {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {155, 4}, 543 | {67, 5}, {151, 4}, {147, 4}, {160, 5}, {65, 5}, {150, 3}, {146, 3}, 544 | {156, 5}, {145, 2}, {152, 5}, {148, 5}, {169, 10}, {0, 4}, {0, 3}, 545 | {0, 3}, {0, 4}, {0, 2}, {0, 4}, {0, 4}, {0, 0}, {0, 2}, 546 | {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, 547 | {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, 548 | {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, 549 | {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, 550 | {56, 9}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, 551 | {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, 552 | {119, 7}, {10, 8}, {52, 9}, {83, 7}, {160, 5}, {6, 8}, {44, 9}, 553 | {28, 9}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, 554 | {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, 555 | {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, 556 | {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {50, 9}, 557 | {82, 6}, {160, 5}, {5, 8}, {42, 9}, {26, 9}, {128, 8}, {70, 6}, 558 | {110, 8}, {148, 5}, {165, 6}, {3, 8}, {38, 9}, {22, 9}, {122, 8}, 559 | {14, 9}, {104, 8}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, 560 | {139, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, 561 | {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, 562 | {96, 8}, {78, 8}, {133, 9}, {72, 8}, {115, 9}, {148, 5}, {167, 8}, 563 | {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, 564 | {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, 565 | {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, 566 | {49, 9}, {82, 6}, {160, 5}, {4, 7}, {41, 9}, {25, 9}, {127, 7}, 567 | {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {37, 9}, {21, 9}, 568 | {121, 7}, {13, 9}, {103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, 569 | {79, 7}, {137, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, 570 | {35, 9}, {19, 9}, {119, 7}, {11, 9}, {101, 7}, {83, 7}, {160, 5}, 571 | {7, 9}, {95, 7}, {77, 7}, {131, 9}, {71, 7}, {113, 9}, {148, 5}, 572 | {166, 7}, {64, 4}, {93, 7}, {75, 7}, {125, 9}, {69, 7}, {107, 9}, 573 | {89, 9}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, 574 | {154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6}, 575 | {67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6}, 576 | {129, 9}, {70, 6}, {111, 9}, {148, 5}, {165, 6}, {64, 4}, {92, 6}, 577 | {74, 6}, {123, 9}, {68, 6}, {105, 9}, {87, 9}, {161, 6}, {66, 6}, 578 | {99, 9}, {81, 9}, {157, 6}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, 579 | {64, 4}, {91, 5}, {73, 5}, {155, 4}, {67, 5}, {151, 4}, {147, 4}, 580 | {160, 5}, {65, 5}, {150, 3}, {146, 3}, {156, 5}, {145, 2}, {152, 5}, 581 | {148, 5}, {168, 9}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, 582 | {151, 4}, {147, 4}, {164, 9}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, 583 | {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, 584 | {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, 585 | {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, 586 | {36, 8}, {20, 8}, {121, 7}, {12, 8}, {103, 7}, {85, 7}, {161, 6}, 587 | {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, 588 | {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {101, 7}, 589 | {83, 7}, {160, 5}, {6, 8}, {95, 7}, {77, 7}, {130, 8}, {71, 7}, 590 | {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, 591 | {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, 592 | {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, 593 | {17, 8}, {118, 6}, {9, 8}, {100, 6}, {82, 6}, {160, 5}, {5, 8}, 594 | {94, 6}, {76, 6}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, 595 | {3, 8}, {92, 6}, {74, 6}, {122, 8}, {68, 6}, {104, 8}, {86, 8}, 596 | {161, 6}, {66, 6}, {98, 8}, {80, 8}, {157, 6}, {145, 2}, {153, 6}, 597 | {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, 598 | {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {156, 5}, 599 | {72, 8}, {152, 5}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, 600 | {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, 601 | {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, 602 | {0, 7}, {0, 7}, {0, 6}, {0, 7}, {0, 6}, {0, 6}, {0, 5}, 603 | {0, 7}, {0, 6}, {0, 6}, {0, 7}, {0, 6}, {0, 7}, {0, 5}, 604 | {0, 6}, {0, 7}, {0, 6}, {0, 6}, {0, 7}, {0, 6}, {0, 7}, 605 | {0, 7}, {0, 6}, {0, 6}, {0, 7}, {0, 7}, {0, 6}, {0, 2}, 606 | {0, 6}, {0, 6}, {0, 0}, {0, 7}, {0, 5}, {0, 5}, {0, 7}, 607 | {0, 5}, {0, 7}, {0, 7}, {0, 5}, {0, 5}, {0, 7}, {0, 7}, 608 | {0, 5}, {0, 7}, {0, 5}, {0, 5}, {0, 7}, {0, 4}, {0, 7}, 609 | {0, 7}, {0, 4}, {0, 7}, {0, 4}, {0, 4}, {0, 7}, {0, 2}, 610 | {0, 3}, {0, 3}, {0, 7}, {0, 2}, {0, 7}, {0, 0}, {0, 0}, 611 | {0, 6}, {0, 5}, {0, 5}, {0, 6}, {0, 5}, {0, 6}, {0, 6}, 612 | {0, 5}, {0, 5}, {0, 6}, {0, 6}, {0, 5}, {0, 6}, {0, 5}, 613 | {0, 5}, {0, 6}, {0, 4}, {0, 6}, {0, 6}, {0, 4}, {0, 6}, 614 | {0, 4}, {0, 4}, {0, 6}, {0, 6}, {0, 3}, {0, 3}, {0, 6}, 615 | {0, 2}, {0, 6}, {0, 6}, {0, 0}, {0, 4}, {0, 5}, {0, 5}, 616 | {0, 4}, {0, 5}, {0, 4}, {0, 4}, {0, 5}, {0, 5}, {0, 3}, 617 | {0, 3}, {0, 5}, {0, 2}, {0, 5}, {0, 5}, {0, 0}, {0, 4}, 618 | {0, 3}, {0, 3}, {0, 4}, {0, 2}, {0, 4}, {0, 4}, {0, 0}, 619 | {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, 620 | {0, 0} 621 | }; 622 | 623 | 624 | static const int8_t ALIGNED(0x1000) vectorsrawbytes[] = { 625 | 0, -1, 4, -1, 1, -1, 5, -1, 2, -1, -1, -1, 3, -1, -1, -1, // 0 626 | 0, -1, 4, -1, 1, -1, 5, 6, 2, -1, -1, -1, 3, -1, -1, -1, // 1 627 | 0, -1, 4, 5, 1, -1, 6, -1, 2, -1, -1, -1, 3, -1, -1, -1, // 2 628 | 0, -1, 4, 5, 1, -1, 6, 7, 2, -1, -1, -1, 3, -1, -1, -1, // 3 629 | 0, -1, 5, -1, 1, -1, 6, -1, 2, -1, -1, -1, 3, 4, -1, -1, // 4 630 | 0, -1, 5, -1, 1, -1, 6, 7, 2, -1, -1, -1, 3, 4, -1, -1, // 5 631 | 0, -1, 5, 6, 1, -1, 7, -1, 2, -1, -1, -1, 3, 4, -1, -1, // 6 632 | 0, -1, 5, 6, 1, -1, 7, 8, 2, -1, -1, -1, 3, 4, -1, -1, // 7 633 | 0, -1, 5, -1, 1, -1, 6, -1, 2, 3, -1, -1, 4, -1, -1, -1, // 8 634 | 0, -1, 5, -1, 1, -1, 6, 7, 2, 3, -1, -1, 4, -1, -1, -1, // 9 635 | 0, -1, 5, 6, 1, -1, 7, -1, 2, 3, -1, -1, 4, -1, -1, -1, // 10 636 | 0, -1, 5, 6, 1, -1, 7, 8, 2, 3, -1, -1, 4, -1, -1, -1, // 11 637 | 0, -1, 6, -1, 1, -1, 7, -1, 2, 3, -1, -1, 4, 5, -1, -1, // 12 638 | 0, -1, 6, -1, 1, -1, 7, 8, 2, 3, -1, -1, 4, 5, -1, -1, // 13 639 | 0, -1, 6, 7, 1, -1, 8, -1, 2, 3, -1, -1, 4, 5, -1, -1, // 14 640 | 0, -1, 6, 7, 1, -1, 8, 9, 2, 3, -1, -1, 4, 5, -1, -1, // 15 641 | 0, -1, 5, -1, 1, 2, 6, -1, 3, -1, -1, -1, 4, -1, -1, -1, // 16 642 | 0, -1, 5, -1, 1, 2, 6, 7, 3, -1, -1, -1, 4, -1, -1, -1, // 17 643 | 0, -1, 5, 6, 1, 2, 7, -1, 3, -1, -1, -1, 4, -1, -1, -1, // 18 644 | 0, -1, 5, 6, 1, 2, 7, 8, 3, -1, -1, -1, 4, -1, -1, -1, // 19 645 | 0, -1, 6, -1, 1, 2, 7, -1, 3, -1, -1, -1, 4, 5, -1, -1, // 20 646 | 0, -1, 6, -1, 1, 2, 7, 8, 3, -1, -1, -1, 4, 5, -1, -1, // 21 647 | 0, -1, 6, 7, 1, 2, 8, -1, 3, -1, -1, -1, 4, 5, -1, -1, // 22 648 | 0, -1, 6, 7, 1, 2, 8, 9, 3, -1, -1, -1, 4, 5, -1, -1, // 23 649 | 0, -1, 6, -1, 1, 2, 7, -1, 3, 4, -1, -1, 5, -1, -1, -1, // 24 650 | 0, -1, 6, -1, 1, 2, 7, 8, 3, 4, -1, -1, 5, -1, -1, -1, // 25 651 | 0, -1, 6, 7, 1, 2, 8, -1, 3, 4, -1, -1, 5, -1, -1, -1, // 26 652 | 0, -1, 6, 7, 1, 2, 8, 9, 3, 4, -1, -1, 5, -1, -1, -1, // 27 653 | 0, -1, 7, -1, 1, 2, 8, -1, 3, 4, -1, -1, 5, 6, -1, -1, // 28 654 | 0, -1, 7, -1, 1, 2, 8, 9, 3, 4, -1, -1, 5, 6, -1, -1, // 29 655 | 0, -1, 7, 8, 1, 2, 9, -1, 3, 4, -1, -1, 5, 6, -1, -1, // 30 656 | 0, -1, 7, 8, 1, 2, 9, 10, 3, 4, -1, -1, 5, 6, -1, -1, // 31 657 | 0, 1, 5, -1, 2, -1, 6, -1, 3, -1, -1, -1, 4, -1, -1, -1, // 32 658 | 0, 1, 5, -1, 2, -1, 6, 7, 3, -1, -1, -1, 4, -1, -1, -1, // 33 659 | 0, 1, 5, 6, 2, -1, 7, -1, 3, -1, -1, -1, 4, -1, -1, -1, // 34 660 | 0, 1, 5, 6, 2, -1, 7, 8, 3, -1, -1, -1, 4, -1, -1, -1, // 35 661 | 0, 1, 6, -1, 2, -1, 7, -1, 3, -1, -1, -1, 4, 5, -1, -1, // 36 662 | 0, 1, 6, -1, 2, -1, 7, 8, 3, -1, -1, -1, 4, 5, -1, -1, // 37 663 | 0, 1, 6, 7, 2, -1, 8, -1, 3, -1, -1, -1, 4, 5, -1, -1, // 38 664 | 0, 1, 6, 7, 2, -1, 8, 9, 3, -1, -1, -1, 4, 5, -1, -1, // 39 665 | 0, 1, 6, -1, 2, -1, 7, -1, 3, 4, -1, -1, 5, -1, -1, -1, // 40 666 | 0, 1, 6, -1, 2, -1, 7, 8, 3, 4, -1, -1, 5, -1, -1, -1, // 41 667 | 0, 1, 6, 7, 2, -1, 8, -1, 3, 4, -1, -1, 5, -1, -1, -1, // 42 668 | 0, 1, 6, 7, 2, -1, 8, 9, 3, 4, -1, -1, 5, -1, -1, -1, // 43 669 | 0, 1, 7, -1, 2, -1, 8, -1, 3, 4, -1, -1, 5, 6, -1, -1, // 44 670 | 0, 1, 7, -1, 2, -1, 8, 9, 3, 4, -1, -1, 5, 6, -1, -1, // 45 671 | 0, 1, 7, 8, 2, -1, 9, -1, 3, 4, -1, -1, 5, 6, -1, -1, // 46 672 | 0, 1, 7, 8, 2, -1, 9, 10, 3, 4, -1, -1, 5, 6, -1, -1, // 47 673 | 0, 1, 6, -1, 2, 3, 7, -1, 4, -1, -1, -1, 5, -1, -1, -1, // 48 674 | 0, 1, 6, -1, 2, 3, 7, 8, 4, -1, -1, -1, 5, -1, -1, -1, // 49 675 | 0, 1, 6, 7, 2, 3, 8, -1, 4, -1, -1, -1, 5, -1, -1, -1, // 50 676 | 0, 1, 6, 7, 2, 3, 8, 9, 4, -1, -1, -1, 5, -1, -1, -1, // 51 677 | 0, 1, 7, -1, 2, 3, 8, -1, 4, -1, -1, -1, 5, 6, -1, -1, // 52 678 | 0, 1, 7, -1, 2, 3, 8, 9, 4, -1, -1, -1, 5, 6, -1, -1, // 53 679 | 0, 1, 7, 8, 2, 3, 9, -1, 4, -1, -1, -1, 5, 6, -1, -1, // 54 680 | 0, 1, 7, 8, 2, 3, 9, 10, 4, -1, -1, -1, 5, 6, -1, -1, // 55 681 | 0, 1, 7, -1, 2, 3, 8, -1, 4, 5, -1, -1, 6, -1, -1, -1, // 56 682 | 0, 1, 7, -1, 2, 3, 8, 9, 4, 5, -1, -1, 6, -1, -1, -1, // 57 683 | 0, 1, 7, 8, 2, 3, 9, -1, 4, 5, -1, -1, 6, -1, -1, -1, // 58 684 | 0, 1, 7, 8, 2, 3, 9, 10, 4, 5, -1, -1, 6, -1, -1, -1, // 59 685 | 0, 1, 8, -1, 2, 3, 9, -1, 4, 5, -1, -1, 6, 7, -1, -1, // 60 686 | 0, 1, 8, -1, 2, 3, 9, 10, 4, 5, -1, -1, 6, 7, -1, -1, // 61 687 | 0, 1, 8, 9, 2, 3, 10, -1, 4, 5, -1, -1, 6, 7, -1, -1, // 62 688 | 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, -1, -1, 6, 7, -1, -1, // 63 689 | 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, // 64 690 | 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, // 65 691 | 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, // 66 692 | 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, // 67 693 | 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, // 68 694 | 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, // 69 695 | 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, // 70 696 | 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, // 71 697 | 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, // 72 698 | 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, -1, -1, -1, // 73 699 | 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, -1, -1, // 74 700 | 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, 6, -1, // 75 701 | 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, -1, -1, -1, // 76 702 | 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, -1, -1, // 77 703 | 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, 7, -1, // 78 704 | 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, -1, -1, -1, // 79 705 | 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, -1, -1, // 80 706 | 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, 8, -1, // 81 707 | 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, -1, -1, -1, // 82 708 | 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, -1, -1, // 83 709 | 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, 7, -1, // 84 710 | 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, -1, -1, -1, // 85 711 | 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, -1, -1, // 86 712 | 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, 8, -1, // 87 713 | 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, -1, -1, -1, // 88 714 | 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, -1, -1, // 89 715 | 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, 9, -1, // 90 716 | 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, -1, -1, -1, // 91 717 | 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, -1, -1, // 92 718 | 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, 6, -1, // 93 719 | 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, -1, -1, -1, // 94 720 | 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, -1, -1, // 95 721 | 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, 7, -1, // 96 722 | 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, -1, -1, -1, // 97 723 | 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, -1, -1, // 98 724 | 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, 8, -1, // 99 725 | 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1, // 100 726 | 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, -1, -1, // 101 727 | 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, 7, -1, // 102 728 | 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, -1, -1, -1, // 103 729 | 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, -1, -1, // 104 730 | 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, 8, -1, // 105 731 | 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, -1, -1, -1, // 106 732 | 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, -1, -1, // 107 733 | 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, 9, -1, // 108 734 | 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, -1, -1, -1, // 109 735 | 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, -1, -1, // 110 736 | 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, 8, -1, // 111 737 | 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, -1, -1, -1, // 112 738 | 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, -1, -1, // 113 739 | 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, 9, -1, // 114 740 | 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, -1, -1, -1, // 115 741 | 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, -1, -1, // 116 742 | 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, 10, -1, // 117 743 | 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1, // 118 744 | 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, -1, -1, // 119 745 | 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, 7, -1, // 120 746 | 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, -1, -1, -1, // 121 747 | 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, -1, -1, // 122 748 | 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, 8, -1, // 123 749 | 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, -1, -1, -1, // 124 750 | 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, -1, -1, // 125 751 | 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, 9, -1, // 126 752 | 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1, // 127 753 | 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, -1, -1, // 128 754 | 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, 8, -1, // 129 755 | 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, -1, -1, -1, // 130 756 | 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, -1, -1, // 131 757 | 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, 9, -1, // 132 758 | 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, -1, -1, -1, // 133 759 | 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, -1, -1, // 134 760 | 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, 10, -1, // 135 761 | 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, -1, -1, -1, // 136 762 | 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, -1, -1, // 137 763 | 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, 9, -1, // 138 764 | 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, -1, -1, -1, // 139 765 | 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, -1, -1, // 140 766 | 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, 10, -1, // 141 767 | 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, -1, -1, -1, // 142 768 | 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, -1, -1, // 143 769 | 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, -1, // 144 770 | -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, 1, // 145 771 | -1, -1, -1, -1, -1, -1, -1, 0, 2, -1, -1, -1, -1, -1, -1, 1, // 146 772 | -1, -1, -1, -1, -1, -1, -1, 0, 2, -1, 3, -1, -1, -1, -1, 1, // 147 773 | -1, -1, -1, -1, -1, -1, -1, 0, 2, -1, 3, -1, 4, -1, -1, 1, // 148 774 | -1, -1, -1, -1, -1, -1, -1, 0, 2, -1, 3, -1, 4, -1, 5, 1, // 149 775 | 1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, 2, // 150 776 | 1, -1, -1, -1, -1, -1, -1, 0, 3, -1, -1, -1, -1, -1, -1, 2, // 151 777 | 1, -1, -1, -1, -1, -1, -1, 0, 3, -1, 4, -1, -1, -1, -1, 2, // 152 778 | 1, -1, -1, -1, -1, -1, -1, 0, 3, -1, 4, -1, 5, -1, -1, 2, // 153 779 | 1, -1, -1, -1, -1, -1, -1, 0, 3, -1, 4, -1, 5, -1, 6, 2, // 154 780 | 1, -1, 2, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, 3, // 155 781 | 1, -1, 2, -1, -1, -1, -1, 0, 4, -1, -1, -1, -1, -1, -1, 3, // 156 782 | 1, -1, 2, -1, -1, -1, -1, 0, 4, -1, 5, -1, -1, -1, -1, 3, // 157 783 | 1, -1, 2, -1, -1, -1, -1, 0, 4, -1, 5, -1, 6, -1, -1, 3, // 158 784 | 1, -1, 2, -1, -1, -1, -1, 0, 4, -1, 5, -1, 6, -1, 7, 3, // 159 785 | 1, -1, 2, -1, 3, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, 4, // 160 786 | 1, -1, 2, -1, 3, -1, -1, 0, 5, -1, -1, -1, -1, -1, -1, 4, // 161 787 | 1, -1, 2, -1, 3, -1, -1, 0, 5, -1, 6, -1, -1, -1, -1, 4, // 162 788 | 1, -1, 2, -1, 3, -1, -1, 0, 5, -1, 6, -1, 7, -1, -1, 4, // 163 789 | 1, -1, 2, -1, 3, -1, -1, 0, 5, -1, 6, -1, 7, -1, 8, 4, // 164 790 | 1, -1, 2, -1, 3, -1, 4, 0, -1, -1, -1, -1, -1, -1, -1, 5, // 165 791 | 1, -1, 2, -1, 3, -1, 4, 0, 6, -1, -1, -1, -1, -1, -1, 5, // 166 792 | 1, -1, 2, -1, 3, -1, 4, 0, 6, -1, 7, -1, -1, -1, -1, 5, // 167 793 | 1, -1, 2, -1, 3, -1, 4, 0, 6, -1, 7, -1, 8, -1, -1, 5, // 168 794 | 1, -1, 2, -1, 3, -1, 4, 0, 6, -1, 7, -1, 8, -1, 9, 5, // 169 795 | }; 796 | 797 | static const __m128i* vectors = (const __m128i*)vectorsrawbytes; 798 | 799 | static int read_int(const uint8_t* in, uint32_t* out) { 800 | *out = in[0] & 0x7F; 801 | if (in[0] < 128) { 802 | return 1; 803 | } 804 | *out = ((in[1] & 0x7FU) << 7) | *out; 805 | if (in[1] < 128) { 806 | return 2; 807 | } 808 | *out = ((in[2] & 0x7FU) << 14) | *out; 809 | if (in[2] < 128) { 810 | return 3; 811 | } 812 | *out = ((in[3] & 0x7FU) << 21) | *out; 813 | if (in[3] < 128) { 814 | return 4; 815 | } 816 | *out = ((in[4] & 0x7FU) << 28) | *out; 817 | return 5; 818 | } 819 | 820 | static inline int read_int_delta(const uint8_t* in, uint32_t* out, uint32_t* prev) { 821 | *out = in[0] & 0x7F; 822 | if (in[0] < 128) { 823 | *prev += *out; 824 | *out = *prev; 825 | return 1; 826 | } 827 | *out = ((in[1] & 0x7FU) << 7) | *out; 828 | if (in[1] < 128) { 829 | *prev += *out; 830 | *out = *prev; 831 | return 2; 832 | } 833 | *out = ((in[2] & 0x7FU) << 14) | *out; 834 | if (in[2] < 128) { 835 | *prev += *out; 836 | *out = *prev; 837 | return 3; 838 | } 839 | *out = ((in[3] & 0x7FU) << 21) | *out; 840 | if (in[3] < 128) { 841 | *prev += *out; 842 | *out = *prev; 843 | return 4; 844 | } 845 | *out = ((in[4] & 0x7FU) << 28) | *out; 846 | *prev += *out; 847 | *out = *prev; 848 | return 5; 849 | } 850 | 851 | 852 | static uint64_t masked_vbyte_read_group(const uint8_t* in, uint32_t* out, 853 | uint64_t mask, uint64_t* ints_read) { 854 | __m128i initial = _mm_lddqu_si128((const __m128i *) (in)); 855 | __m128i * mout = (__m128i *) out; 856 | 857 | if (!(mask & 0xFFFF)) { 858 | __m128i result = _mm_cvtepi8_epi32(initial); 859 | _mm_storeu_si128(mout, result); 860 | initial = _mm_srli_si128(initial, 4); 861 | result = _mm_cvtepi8_epi32(initial); 862 | _mm_storeu_si128(mout + 1, result); 863 | initial = _mm_srli_si128(initial, 4); 864 | result = _mm_cvtepi8_epi32(initial); 865 | _mm_storeu_si128(mout + 2, result); 866 | initial = _mm_srli_si128(initial, 4); 867 | result = _mm_cvtepi8_epi32(initial); 868 | _mm_storeu_si128(mout + 3, result); 869 | *ints_read = 16; 870 | return 16; 871 | } 872 | 873 | uint32_t low_12_bits = mask & 0xFFF; 874 | // combine index and bytes consumed into a single lookup 875 | index_bytes_consumed combined = combined_lookup[low_12_bits]; 876 | uint64_t consumed = combined.bytes_consumed; 877 | uint8_t index = combined.index; 878 | 879 | __m128i shuffle_vector = vectors[index]; 880 | 881 | if (index < 64) { 882 | *ints_read = 6; 883 | __m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector); 884 | __m128i low_bytes = _mm_and_si128(bytes_to_decode, 885 | _mm_set1_epi16(0x007F)); 886 | __m128i high_bytes = _mm_and_si128(bytes_to_decode, 887 | _mm_set1_epi16(0x7F00)); 888 | __m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1); 889 | __m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted); 890 | __m128i unpacked_result_a = _mm_and_si128(packed_result, 891 | _mm_set1_epi32(0x0000FFFF)); 892 | _mm_storeu_si128(mout, unpacked_result_a); 893 | __m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16); 894 | _mm_storel_epi64(mout+1, unpacked_result_b); 895 | //_mm_storeu_si128(mout + 1, unpacked_result_b); // maybe faster to write 16 bytes? 896 | return consumed; 897 | } 898 | if (index < 145) { 899 | 900 | *ints_read = 4; 901 | 902 | __m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector); 903 | __m128i low_bytes = _mm_and_si128(bytes_to_decode, 904 | _mm_set1_epi32(0x0000007F)); 905 | __m128i middle_bytes = _mm_and_si128(bytes_to_decode, 906 | _mm_set1_epi32(0x00007F00)); 907 | __m128i high_bytes = _mm_and_si128(bytes_to_decode, 908 | _mm_set1_epi32(0x007F0000)); 909 | __m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1); 910 | __m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2); 911 | __m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted); 912 | __m128i result = _mm_or_si128(low_middle, high_bytes_shifted); 913 | _mm_storeu_si128(mout, result); 914 | return consumed; 915 | } 916 | 917 | *ints_read = 2; 918 | 919 | __m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F)); 920 | __m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector); 921 | __m128i split_bytes = _mm_mullo_epi16(bytes_to_decode, 922 | _mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16)); 923 | __m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8); 924 | __m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes); 925 | __m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56); 926 | __m128i result_evens = _mm_or_si128(recombined, low_byte); 927 | __m128i result = _mm_shuffle_epi8(result_evens, 928 | _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, 929 | -1)); 930 | _mm_storel_epi64(mout, result); 931 | //_mm_storeu_si128(mout, result); // maybe faster to write 16 bytes? 932 | 933 | return consumed; 934 | } 935 | 936 | static inline __m128i PrefixSum(__m128i curr, __m128i prev) { 937 | __m128i Add = _mm_slli_si128(curr, 4); // Cycle 1: [- A B C] (already done) 938 | prev = _mm_shuffle_epi32(prev, 0xff); // Cycle 2: [P P P P] 939 | curr = _mm_add_epi32(curr, Add); // Cycle 2: [A AB BC CD] 940 | Add = _mm_slli_si128(curr, 8); // Cycle 3: [- - A AB] 941 | curr = _mm_add_epi32(curr, prev); // Cycle 3: [PA PAB PBC PCD] 942 | curr = _mm_add_epi32(curr, Add); // Cycle 4: [PA PAB PABC PABCD] 943 | return curr; 944 | } 945 | 946 | // only the first two ints of curr are meaningful, rest is garbage to beignored 947 | static inline __m128i PrefixSum2ints(__m128i curr, __m128i prev) { 948 | __m128i Add = _mm_slli_si128(curr, 4); // Cycle 1: [- A B G] (already done) 949 | prev = _mm_shuffle_epi32(prev, 0xff); // Cycle 2: [P P P P] 950 | curr = _mm_add_epi32(curr, Add); // Cycle 2: [A AB BG GG] 951 | curr = _mm_shuffle_epi32(curr, 0x54); //Cycle 3:[A AB AB AB] 952 | curr = _mm_add_epi32(curr, prev); // Cycle 4: [PA PAB PAB PAB] 953 | return curr; 954 | } 955 | 956 | static uint64_t masked_vbyte_read_group_delta(const uint8_t* in, uint32_t* out, 957 | uint64_t mask, uint64_t* ints_read, __m128i * prev) { 958 | __m128i initial = _mm_lddqu_si128((const __m128i *) (in)); 959 | __m128i * mout = (__m128i *) out; 960 | 961 | if (!(mask & 0xFFFF)) { 962 | __m128i result = _mm_cvtepi8_epi32(initial); 963 | *prev = PrefixSum(result, *prev); 964 | _mm_storeu_si128(mout, *prev); 965 | initial = _mm_srli_si128(initial, 4); 966 | result = _mm_cvtepi8_epi32(initial); 967 | *prev = PrefixSum(result, *prev); 968 | _mm_storeu_si128(mout + 1, *prev); 969 | initial = _mm_srli_si128(initial, 4); 970 | result = _mm_cvtepi8_epi32(initial); 971 | *prev = PrefixSum(result, *prev); 972 | _mm_storeu_si128(mout + 2, *prev); 973 | initial = _mm_srli_si128(initial, 4); 974 | result = _mm_cvtepi8_epi32(initial); 975 | *prev = PrefixSum(result, *prev); 976 | _mm_storeu_si128(mout + 3, *prev); 977 | *ints_read = 16; 978 | return 16; 979 | } 980 | 981 | uint32_t low_12_bits = mask & 0xFFF; 982 | // combine index and bytes consumed into a single lookup 983 | index_bytes_consumed combined = combined_lookup[low_12_bits]; 984 | uint64_t consumed = combined.bytes_consumed; 985 | uint8_t index = combined.index; 986 | 987 | __m128i shuffle_vector = vectors[index]; 988 | 989 | if (index < 64) { 990 | *ints_read = 6; 991 | __m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector); 992 | __m128i low_bytes = _mm_and_si128(bytes_to_decode, 993 | _mm_set1_epi16(0x007F)); 994 | __m128i high_bytes = _mm_and_si128(bytes_to_decode, 995 | _mm_set1_epi16(0x7F00)); 996 | __m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1); 997 | __m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted); 998 | __m128i unpacked_result_a = _mm_and_si128(packed_result, 999 | _mm_set1_epi32(0x0000FFFF)); 1000 | *prev = PrefixSum(unpacked_result_a, *prev); 1001 | _mm_storeu_si128(mout, *prev); 1002 | __m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16); 1003 | *prev = PrefixSum2ints(unpacked_result_b, *prev); 1004 | _mm_storel_epi64(mout + 1, *prev); 1005 | return consumed; 1006 | } 1007 | if (index < 145) { 1008 | 1009 | *ints_read = 4; 1010 | 1011 | __m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector); 1012 | __m128i low_bytes = _mm_and_si128(bytes_to_decode, 1013 | _mm_set1_epi32(0x0000007F)); 1014 | __m128i middle_bytes = _mm_and_si128(bytes_to_decode, 1015 | _mm_set1_epi32(0x00007F00)); 1016 | __m128i high_bytes = _mm_and_si128(bytes_to_decode, 1017 | _mm_set1_epi32(0x007F0000)); 1018 | __m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1); 1019 | __m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2); 1020 | __m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted); 1021 | __m128i result = _mm_or_si128(low_middle, high_bytes_shifted); 1022 | *prev = PrefixSum(result, *prev); 1023 | _mm_storeu_si128(mout, *prev); 1024 | return consumed; 1025 | } 1026 | 1027 | *ints_read = 2; 1028 | 1029 | __m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F)); 1030 | __m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector); 1031 | __m128i split_bytes = _mm_mullo_epi16(bytes_to_decode, 1032 | _mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16)); 1033 | __m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8); 1034 | __m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes); 1035 | __m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56); 1036 | __m128i result_evens = _mm_or_si128(recombined, low_byte); 1037 | __m128i result = _mm_shuffle_epi8(result_evens, 1038 | _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, 1039 | -1)); 1040 | *prev = PrefixSum2ints(result, *prev); 1041 | _mm_storel_epi64(mout, *prev); 1042 | return consumed; 1043 | } 1044 | 1045 | 1046 | static int read_int_group(const uint8_t* in, uint32_t* out, int* ints_read) { 1047 | 1048 | __m128i initial = _mm_lddqu_si128((const __m128i *) in); 1049 | __m128i * const mout = (__m128i *) out; 1050 | 1051 | int mask = _mm_movemask_epi8(initial); 1052 | if (mask == 0) { 1053 | __m128i result; 1054 | result = _mm_cvtepi8_epi32(initial); 1055 | initial = _mm_srli_si128(initial, 4); 1056 | _mm_storeu_si128(mout, result); 1057 | result = _mm_cvtepi8_epi32(initial); 1058 | initial = _mm_srli_si128(initial, 4); 1059 | _mm_storeu_si128(mout + 1, result); 1060 | result = _mm_cvtepi8_epi32(initial); 1061 | initial = _mm_srli_si128(initial, 4); 1062 | _mm_storeu_si128(mout + 2, result); 1063 | result = _mm_cvtepi8_epi32(initial); 1064 | _mm_storeu_si128(mout + 3, result); 1065 | *ints_read = 16; 1066 | return 16; 1067 | } 1068 | int mask2 = mask & 0xFFF; 1069 | index_bytes_consumed combined = combined_lookup[mask2]; 1070 | 1071 | int index = combined.index; 1072 | 1073 | __m128i shuffle_vector = vectors[index]; 1074 | int consumed = combined.bytes_consumed; 1075 | 1076 | if (index < 64) { 1077 | *ints_read = 6; 1078 | __m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector); 1079 | __m128i low_bytes = _mm_and_si128(bytes_to_decode, 1080 | _mm_set1_epi16(0x007F)); 1081 | __m128i high_bytes = _mm_and_si128(bytes_to_decode, 1082 | _mm_set1_epi16(0x7F00)); 1083 | __m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1); 1084 | __m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted); 1085 | __m128i unpacked_result_a = _mm_and_si128(packed_result, 1086 | _mm_set1_epi32(0x0000FFFF)); 1087 | _mm_storeu_si128(mout, unpacked_result_a); 1088 | __m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16); 1089 | _mm_storel_epi64(mout + 1, unpacked_result_b); 1090 | return consumed; 1091 | } 1092 | if (index < 145) { 1093 | 1094 | *ints_read = 4; 1095 | 1096 | __m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector); 1097 | __m128i low_bytes = _mm_and_si128(bytes_to_decode, 1098 | _mm_set1_epi32(0x0000007F)); 1099 | __m128i middle_bytes = _mm_and_si128(bytes_to_decode, 1100 | _mm_set1_epi32(0x00007F00)); 1101 | __m128i high_bytes = _mm_and_si128(bytes_to_decode, 1102 | _mm_set1_epi32(0x007F0000)); 1103 | __m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1); 1104 | __m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2); 1105 | __m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted); 1106 | __m128i result = _mm_or_si128(low_middle, high_bytes_shifted); 1107 | _mm_storeu_si128(mout, result); 1108 | return consumed; 1109 | } 1110 | 1111 | *ints_read = 2; 1112 | 1113 | __m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F)); 1114 | __m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector); 1115 | __m128i split_bytes = _mm_mullo_epi16(bytes_to_decode, 1116 | _mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16)); 1117 | __m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8); 1118 | __m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes); 1119 | __m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56); 1120 | __m128i result_evens = _mm_or_si128(recombined, low_byte); 1121 | __m128i result = _mm_shuffle_epi8(result_evens, 1122 | _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, 1123 | -1)); 1124 | 1125 | _mm_storel_epi64(mout, result); 1126 | return consumed; 1127 | } 1128 | 1129 | 1130 | // len_signed : number of ints we want to decode 1131 | size_t masked_vbyte_decode(const uint8_t* in, uint32_t* out, 1132 | uint64_t length) { 1133 | size_t consumed = 0; // number of bytes read 1134 | uint64_t count = 0; // how many integers we have read so far 1135 | 1136 | uint64_t sig = 0; 1137 | int availablebytes = 0; 1138 | if (96 < length) { 1139 | size_t scanned = 0; 1140 | 1141 | 1142 | #ifdef __AVX2__ 1143 | __m256i low = _mm256_loadu_si256((__m256i *)(in + scanned)); 1144 | uint32_t lowSig = _mm256_movemask_epi8(low); 1145 | #else 1146 | __m128i low1 = _mm_loadu_si128((__m128i *) (in + scanned)); 1147 | uint32_t lowSig1 = _mm_movemask_epi8(low1); 1148 | __m128i low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16)); 1149 | uint32_t lowSig2 = _mm_movemask_epi8(low2); 1150 | uint32_t lowSig = lowSig2 << 16; 1151 | lowSig |= lowSig1; 1152 | #endif 1153 | 1154 | // excess verbosity to avoid problems with sign extension on conversions 1155 | // better to think about what's happening and make it clearer 1156 | __m128i high = _mm_loadu_si128((__m128i *) (in + scanned + 32)); 1157 | uint32_t highSig = _mm_movemask_epi8(high); 1158 | uint64_t nextSig = highSig; 1159 | nextSig <<= 32; 1160 | nextSig |= lowSig; 1161 | scanned += 48; 1162 | 1163 | do { 1164 | uint64_t thisSig = nextSig; 1165 | 1166 | #ifdef __AVX2__ 1167 | low = _mm256_loadu_si256((__m256i *)(in + scanned)); 1168 | lowSig = _mm256_movemask_epi8(low); 1169 | #else 1170 | low1 = _mm_loadu_si128((__m128i *) (in + scanned)); 1171 | lowSig1 = _mm_movemask_epi8(low1); 1172 | low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16)); 1173 | lowSig2 = _mm_movemask_epi8(low2); 1174 | lowSig = lowSig2 << 16; 1175 | lowSig |= lowSig1; 1176 | #endif 1177 | 1178 | high = _mm_loadu_si128((__m128i *) (in + scanned + 32)); 1179 | highSig = _mm_movemask_epi8(high); 1180 | nextSig = highSig; 1181 | nextSig <<= 32; 1182 | nextSig |= lowSig; 1183 | 1184 | uint64_t remaining = scanned - (consumed + 48); 1185 | sig = (thisSig << remaining) | sig; 1186 | 1187 | uint64_t reload = scanned - 16; 1188 | scanned += 48; 1189 | 1190 | // need to reload when less than 16 scanned bytes remain in sig 1191 | while (consumed < reload) { 1192 | uint64_t ints_read; 1193 | uint64_t bytes = masked_vbyte_read_group(in + consumed, 1194 | out + count, sig, &ints_read); 1195 | sig >>= bytes; 1196 | 1197 | // seems like this might force the compiler to prioritize shifting sig >>= bytes 1198 | if (sig == 0xFFFFFFFFFFFFFFFF) 1199 | return 0; // fake check to force earliest evaluation 1200 | 1201 | consumed += bytes; 1202 | count += ints_read; 1203 | } 1204 | } while (count + 112 < length); // 112 == 48 + 48 ahead for scanning + up to 16 remaining in sig 1205 | sig = (nextSig << (scanned - consumed - 48)) | sig; 1206 | availablebytes = scanned - consumed; 1207 | } 1208 | while (availablebytes + count < length) { 1209 | if (availablebytes < 16) { 1210 | if (availablebytes + count + 31 < length) { 1211 | #ifdef __AVX2__ 1212 | uint64_t newsigavx = (uint32_t) _mm256_movemask_epi8(_mm256_loadu_si256((__m256i *)(in + availablebytes + consumed))); 1213 | sig |= (newsigavx << availablebytes); 1214 | #else 1215 | uint64_t newsig = _mm_movemask_epi8( 1216 | _mm_lddqu_si128( 1217 | (const __m128i *) (in + availablebytes 1218 | + consumed))); 1219 | uint64_t newsig2 = _mm_movemask_epi8( 1220 | _mm_lddqu_si128( 1221 | (const __m128i *) (in + availablebytes + 16 1222 | + consumed))); 1223 | sig |= (newsig << availablebytes) 1224 | | (newsig2 << (availablebytes + 16)); 1225 | #endif 1226 | availablebytes += 32; 1227 | } else if (availablebytes + count + 15 < length) { 1228 | int newsig = _mm_movemask_epi8( 1229 | _mm_lddqu_si128( 1230 | (const __m128i *) (in + availablebytes 1231 | + consumed))); 1232 | sig |= newsig << availablebytes; 1233 | availablebytes += 16; 1234 | } else { 1235 | break; 1236 | } 1237 | } 1238 | uint64_t ints_read; 1239 | 1240 | uint64_t eaten = masked_vbyte_read_group(in + consumed, out + count, 1241 | sig, &ints_read); 1242 | consumed += eaten; 1243 | availablebytes -= eaten; 1244 | sig >>= eaten; 1245 | count += ints_read; 1246 | } 1247 | for (; count < length; count++) { 1248 | consumed += read_int(in + consumed, out + count); 1249 | } 1250 | return consumed; 1251 | } 1252 | 1253 | 1254 | // inputsize : number of input bytes we want to decode 1255 | // returns the number of written ints 1256 | size_t masked_vbyte_decode_fromcompressedsize(const uint8_t* in, uint32_t* out, 1257 | size_t inputsize) { 1258 | size_t consumed = 0; // number of bytes read 1259 | uint32_t * initout = out; 1260 | 1261 | uint64_t sig = 0; 1262 | int availablebytes = 0; 1263 | if (96 < inputsize) { 1264 | size_t scanned = 0; 1265 | 1266 | 1267 | #ifdef __AVX2__ 1268 | __m256i low = _mm256_loadu_si256((__m256i *)(in + scanned)); 1269 | uint32_t lowSig = _mm256_movemask_epi8(low); 1270 | #else 1271 | __m128i low1 = _mm_loadu_si128((__m128i *) (in + scanned)); 1272 | uint32_t lowSig1 = _mm_movemask_epi8(low1); 1273 | __m128i low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16)); 1274 | uint32_t lowSig2 = _mm_movemask_epi8(low2); 1275 | uint32_t lowSig = lowSig2 << 16; 1276 | lowSig |= lowSig1; 1277 | #endif 1278 | 1279 | // excess verbosity to avoid problems with sign extension on conversions 1280 | // better to think about what's happening and make it clearer 1281 | __m128i high = _mm_loadu_si128((__m128i *) (in + scanned + 32)); 1282 | uint32_t highSig = _mm_movemask_epi8(high); 1283 | uint64_t nextSig = highSig; 1284 | nextSig <<= 32; 1285 | nextSig |= lowSig; 1286 | scanned += 48; 1287 | 1288 | do { 1289 | uint64_t thisSig = nextSig; 1290 | 1291 | #ifdef __AVX2__ 1292 | low = _mm256_loadu_si256((__m256i *)(in + scanned)); 1293 | lowSig = _mm256_movemask_epi8(low); 1294 | #else 1295 | low1 = _mm_loadu_si128((__m128i *) (in + scanned)); 1296 | lowSig1 = _mm_movemask_epi8(low1); 1297 | low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16)); 1298 | lowSig2 = _mm_movemask_epi8(low2); 1299 | lowSig = lowSig2 << 16; 1300 | lowSig |= lowSig1; 1301 | #endif 1302 | 1303 | high = _mm_loadu_si128((__m128i *) (in + scanned + 32)); 1304 | highSig = _mm_movemask_epi8(high); 1305 | nextSig = highSig; 1306 | nextSig <<= 32; 1307 | nextSig |= lowSig; 1308 | 1309 | uint64_t remaining = scanned - (consumed + 48); 1310 | sig = (thisSig << remaining) | sig; 1311 | 1312 | uint64_t reload = scanned - 16; 1313 | scanned += 48; 1314 | 1315 | // need to reload when less than 16 scanned bytes remain in sig 1316 | while (consumed < reload) { 1317 | uint64_t ints_read; 1318 | uint64_t bytes = masked_vbyte_read_group(in + consumed, 1319 | out, sig, &ints_read); 1320 | sig >>= bytes; 1321 | 1322 | // seems like this might force the compiler to prioritize shifting sig >>= bytes 1323 | if (sig == 0xFFFFFFFFFFFFFFFF) 1324 | return 0; // fake check to force earliest evaluation 1325 | 1326 | consumed += bytes; 1327 | out += ints_read; 1328 | } 1329 | } while (scanned + 112 < inputsize); // 112 == 48 + 48 ahead for scanning + up to 16 remaining in sig 1330 | sig = (nextSig << (scanned - consumed - 48)) | sig; 1331 | availablebytes = scanned - consumed; 1332 | } 1333 | while (1) { 1334 | if (availablebytes < 16) { 1335 | if (availablebytes + consumed + 31 < inputsize) { 1336 | #ifdef __AVX2__ 1337 | uint64_t newsigavx = (uint32_t) _mm256_movemask_epi8(_mm256_loadu_si256((__m256i *)(in + availablebytes + consumed))); 1338 | sig |= (newsigavx << availablebytes); 1339 | #else 1340 | uint64_t newsig = _mm_movemask_epi8( 1341 | _mm_lddqu_si128( 1342 | (const __m128i *) (in + availablebytes 1343 | + consumed))); 1344 | uint64_t newsig2 = _mm_movemask_epi8( 1345 | _mm_lddqu_si128( 1346 | (const __m128i *) (in + availablebytes + 16 1347 | + consumed))); 1348 | sig |= (newsig << availablebytes) 1349 | | (newsig2 << (availablebytes + 16)); 1350 | #endif 1351 | availablebytes += 32; 1352 | } else if(availablebytes + consumed + 15 < inputsize ) { 1353 | int newsig = _mm_movemask_epi8( 1354 | _mm_lddqu_si128( 1355 | (const __m128i *) (in + availablebytes 1356 | + consumed))); 1357 | sig |= newsig << availablebytes; 1358 | availablebytes += 16; 1359 | } else { 1360 | break; 1361 | } 1362 | } 1363 | uint64_t ints_read; 1364 | uint64_t bytes = masked_vbyte_read_group(in + consumed, out, 1365 | sig, &ints_read); 1366 | consumed += bytes; 1367 | availablebytes -= bytes; 1368 | sig >>= bytes; 1369 | out += ints_read; 1370 | } 1371 | while (consumed < inputsize) { 1372 | unsigned int shift = 0; 1373 | for (uint32_t v = 0; consumed < inputsize; shift += 7) { 1374 | uint8_t c = in[consumed++]; 1375 | if ((c & 128) == 0) { 1376 | out[0] = v + (c << shift); 1377 | ++out; 1378 | break; 1379 | } else { 1380 | v += (c & 127) << shift; 1381 | } 1382 | } 1383 | } 1384 | return out - initout; 1385 | } 1386 | 1387 | 1388 | size_t read_ints(const uint8_t* in, uint32_t* out, int length) { 1389 | size_t consumed = 0; 1390 | int count; 1391 | for (count = 0; count + 15 < length;) { 1392 | int ints_read; 1393 | consumed += read_int_group(in + consumed, out + count, &ints_read); 1394 | count += ints_read; 1395 | } 1396 | for (; count < length; count++) { 1397 | consumed += read_int(in + consumed, out + count); 1398 | } 1399 | return consumed; 1400 | } 1401 | 1402 | static int read_int_group_delta(const uint8_t* in, uint32_t* out, 1403 | int* ints_read, __m128i * prev) { 1404 | 1405 | __m128i initial = _mm_lddqu_si128((const __m128i *) in); 1406 | __m128i * const mout = (__m128i *) out; 1407 | 1408 | int mask = _mm_movemask_epi8(initial); 1409 | if (mask == 0) { 1410 | __m128i result; 1411 | result = _mm_cvtepi8_epi32(initial); 1412 | initial = _mm_srli_si128(initial, 4); 1413 | *prev = PrefixSum(result, *prev); 1414 | _mm_storeu_si128(mout, *prev); 1415 | result = _mm_cvtepi8_epi32(initial); 1416 | initial = _mm_srli_si128(initial, 4); 1417 | *prev = PrefixSum(result, *prev); 1418 | _mm_storeu_si128(mout + 1, *prev); 1419 | result = _mm_cvtepi8_epi32(initial); 1420 | initial = _mm_srli_si128(initial, 4); 1421 | *prev = PrefixSum(result, *prev); 1422 | _mm_storeu_si128(mout + 2, *prev); 1423 | result = _mm_cvtepi8_epi32(initial); 1424 | *prev = PrefixSum(result, *prev); 1425 | _mm_storeu_si128(mout + 3, *prev); 1426 | *ints_read = 16; 1427 | return 16; 1428 | } 1429 | int mask2 = mask & 0xFFF; 1430 | index_bytes_consumed combined = combined_lookup[mask2]; 1431 | 1432 | int index = combined.index; 1433 | 1434 | __m128i shuffle_vector = vectors[index]; 1435 | int consumed = combined.bytes_consumed; 1436 | 1437 | if (index < 64) { 1438 | *ints_read = 6; 1439 | __m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector); 1440 | __m128i low_bytes = _mm_and_si128(bytes_to_decode, 1441 | _mm_set1_epi16(0x007F)); 1442 | __m128i high_bytes = _mm_and_si128(bytes_to_decode, 1443 | _mm_set1_epi16(0x7F00)); 1444 | __m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1); 1445 | __m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted); 1446 | __m128i unpacked_result_a = _mm_and_si128(packed_result, 1447 | _mm_set1_epi32(0x0000FFFF)); 1448 | *prev = PrefixSum(unpacked_result_a, *prev); 1449 | _mm_storeu_si128(mout, *prev); 1450 | __m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16); 1451 | *prev = PrefixSum2ints(unpacked_result_b, *prev); 1452 | _mm_storeu_si128(mout + 1, *prev); 1453 | return consumed; 1454 | } 1455 | if (index < 145) { 1456 | 1457 | *ints_read = 4; 1458 | 1459 | __m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector); 1460 | __m128i low_bytes = _mm_and_si128(bytes_to_decode, 1461 | _mm_set1_epi32(0x0000007F)); 1462 | __m128i middle_bytes = _mm_and_si128(bytes_to_decode, 1463 | _mm_set1_epi32(0x00007F00)); 1464 | __m128i high_bytes = _mm_and_si128(bytes_to_decode, 1465 | _mm_set1_epi32(0x007F0000)); 1466 | __m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1); 1467 | __m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2); 1468 | __m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted); 1469 | __m128i result = _mm_or_si128(low_middle, high_bytes_shifted); 1470 | *prev = PrefixSum(result, *prev); 1471 | _mm_storeu_si128(mout, *prev); 1472 | 1473 | return consumed; 1474 | } 1475 | 1476 | *ints_read = 2; 1477 | 1478 | __m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F)); 1479 | __m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector); 1480 | __m128i split_bytes = _mm_mullo_epi16(bytes_to_decode, 1481 | _mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16)); 1482 | __m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8); 1483 | __m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes); 1484 | __m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56); 1485 | __m128i result_evens = _mm_or_si128(recombined, low_byte); 1486 | __m128i result = _mm_shuffle_epi8(result_evens, 1487 | _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, 1488 | -1)); 1489 | *prev = PrefixSum2ints(result, *prev); 1490 | _mm_storeu_si128(mout, *prev); 1491 | return consumed; 1492 | } 1493 | 1494 | 1495 | // len_signed : number of ints we want to decode 1496 | size_t masked_vbyte_decode_delta(const uint8_t* in, uint32_t* out, 1497 | uint64_t length, uint32_t prev) { 1498 | //uint64_t length = (uint64_t) len_signed; // number of ints we want to decode 1499 | size_t consumed = 0; // number of bytes read 1500 | __m128i mprev = _mm_set1_epi32(prev); 1501 | uint64_t count = 0; // how many integers we have read so far 1502 | 1503 | uint64_t sig = 0; 1504 | int availablebytes = 0; 1505 | if (96 < length) { 1506 | size_t scanned = 0; 1507 | 1508 | 1509 | #ifdef __AVX2__ 1510 | __m256i low = _mm256_loadu_si256((__m256i *)(in + scanned)); 1511 | uint32_t lowSig = _mm256_movemask_epi8(low); 1512 | #else 1513 | __m128i low1 = _mm_loadu_si128((__m128i *) (in + scanned)); 1514 | uint32_t lowSig1 = _mm_movemask_epi8(low1); 1515 | __m128i low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16)); 1516 | uint32_t lowSig2 = _mm_movemask_epi8(low2); 1517 | uint32_t lowSig = lowSig2 << 16; 1518 | lowSig |= lowSig1; 1519 | #endif 1520 | 1521 | // excess verbosity to avoid problems with sign extension on conversions 1522 | // better to think about what's happening and make it clearer 1523 | __m128i high = _mm_loadu_si128((__m128i *) (in + scanned + 32)); 1524 | uint32_t highSig = _mm_movemask_epi8(high); 1525 | uint64_t nextSig = highSig; 1526 | nextSig <<= 32; 1527 | nextSig |= lowSig; 1528 | scanned += 48; 1529 | 1530 | do { 1531 | uint64_t thisSig = nextSig; 1532 | 1533 | #ifdef __AVX2__ 1534 | low = _mm256_loadu_si256((__m256i *)(in + scanned)); 1535 | lowSig = _mm256_movemask_epi8(low); 1536 | #else 1537 | low1 = _mm_loadu_si128((__m128i *) (in + scanned)); 1538 | lowSig1 = _mm_movemask_epi8(low1); 1539 | low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16)); 1540 | lowSig2 = _mm_movemask_epi8(low2); 1541 | lowSig = lowSig2 << 16; 1542 | lowSig |= lowSig1; 1543 | #endif 1544 | 1545 | high = _mm_loadu_si128((__m128i *) (in + scanned + 32)); 1546 | highSig = _mm_movemask_epi8(high); 1547 | nextSig = highSig; 1548 | nextSig <<= 32; 1549 | nextSig |= lowSig; 1550 | 1551 | uint64_t remaining = scanned - (consumed + 48); 1552 | sig = (thisSig << remaining) | sig; 1553 | 1554 | uint64_t reload = scanned - 16; 1555 | scanned += 48; 1556 | 1557 | // need to reload when less than 16 scanned bytes remain in sig 1558 | while (consumed < reload) { 1559 | uint64_t ints_read; 1560 | uint64_t bytes = masked_vbyte_read_group_delta(in + consumed, 1561 | out + count, sig, &ints_read, &mprev); 1562 | sig >>= bytes; 1563 | 1564 | // seems like this might force the compiler to prioritize shifting sig >>= bytes 1565 | if (sig == 0xFFFFFFFFFFFFFFFF) 1566 | return 0; // fake check to force earliest evaluation 1567 | 1568 | consumed += bytes; 1569 | count += ints_read; 1570 | } 1571 | } while (count + 112 < length); // 112 == 48 + 48 ahead for scanning + up to 16 remaining in sig 1572 | sig = (nextSig << (scanned - consumed - 48)) | sig; 1573 | availablebytes = scanned - consumed; 1574 | } 1575 | while (availablebytes + count < length) { 1576 | if (availablebytes < 16) { 1577 | if (availablebytes + count + 31 < length) { 1578 | #ifdef __AVX2__ 1579 | uint64_t newsigavx = (uint32_t) _mm256_movemask_epi8(_mm256_loadu_si256((__m256i *)(in + availablebytes + consumed))); 1580 | sig |= (newsigavx << availablebytes); 1581 | #else 1582 | uint64_t newsig = _mm_movemask_epi8( 1583 | _mm_lddqu_si128( 1584 | (const __m128i *) (in + availablebytes 1585 | + consumed))); 1586 | uint64_t newsig2 = _mm_movemask_epi8( 1587 | _mm_lddqu_si128( 1588 | (const __m128i *) (in + availablebytes + 16 1589 | + consumed))); 1590 | sig |= (newsig << availablebytes) 1591 | | (newsig2 << (availablebytes + 16)); 1592 | #endif 1593 | availablebytes += 32; 1594 | } else if (availablebytes + count + 15 < length) { 1595 | int newsig = _mm_movemask_epi8( 1596 | _mm_lddqu_si128( 1597 | (const __m128i *) (in + availablebytes 1598 | + consumed))); 1599 | sig |= newsig << availablebytes; 1600 | availablebytes += 16; 1601 | } else { 1602 | break; 1603 | } 1604 | } 1605 | uint64_t ints_read; 1606 | uint64_t eaten = masked_vbyte_read_group_delta(in + consumed, out + count, 1607 | sig, &ints_read, &mprev); 1608 | consumed += eaten; 1609 | availablebytes -= eaten; 1610 | sig >>= eaten; 1611 | count += ints_read; 1612 | } 1613 | prev = _mm_extract_epi32(mprev, 3); 1614 | for (; count < length; count++) { 1615 | consumed += read_int_delta(in + consumed, out + count, &prev); 1616 | } 1617 | return consumed; 1618 | } 1619 | 1620 | size_t read_ints_delta(const uint8_t* in, uint32_t* out, int length, 1621 | uint32_t prev) { 1622 | __m128i mprev = _mm_set1_epi32(prev); 1623 | size_t consumed = 0; 1624 | int count; 1625 | for (count = 0; count + 15 < length;) { 1626 | int ints_read; 1627 | consumed += read_int_group_delta(in + consumed, out + count, &ints_read, 1628 | &mprev); 1629 | count += ints_read; 1630 | } 1631 | prev = _mm_extract_epi32(mprev, 3); 1632 | for (; count < length; count++) { 1633 | consumed += read_int_delta(in + consumed, out + count, &prev); 1634 | } 1635 | return consumed; 1636 | } 1637 | 1638 | 1639 | 1640 | // inputsize : number of input bytes we want to decode 1641 | // returns the number of written ints 1642 | size_t masked_vbyte_decode_fromcompressedsize_delta(const uint8_t* in, uint32_t* out, 1643 | size_t inputsize, uint32_t prev) { 1644 | size_t consumed = 0; // number of bytes read 1645 | uint32_t * initout = out; 1646 | __m128i mprev = _mm_set1_epi32(prev); 1647 | uint64_t sig = 0; 1648 | int availablebytes = 0; 1649 | if (96 < inputsize) { 1650 | size_t scanned = 0; 1651 | 1652 | 1653 | #ifdef __AVX2__ 1654 | __m256i low = _mm256_loadu_si256((__m256i *)(in + scanned)); 1655 | uint32_t lowSig = _mm256_movemask_epi8(low); 1656 | #else 1657 | __m128i low1 = _mm_loadu_si128((__m128i *) (in + scanned)); 1658 | uint32_t lowSig1 = _mm_movemask_epi8(low1); 1659 | __m128i low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16)); 1660 | uint32_t lowSig2 = _mm_movemask_epi8(low2); 1661 | uint32_t lowSig = lowSig2 << 16; 1662 | lowSig |= lowSig1; 1663 | #endif 1664 | 1665 | // excess verbosity to avoid problems with sign extension on conversions 1666 | // better to think about what's happening and make it clearer 1667 | __m128i high = _mm_loadu_si128((__m128i *) (in + scanned + 32)); 1668 | uint32_t highSig = _mm_movemask_epi8(high); 1669 | uint64_t nextSig = highSig; 1670 | nextSig <<= 32; 1671 | nextSig |= lowSig; 1672 | scanned += 48; 1673 | 1674 | do { 1675 | uint64_t thisSig = nextSig; 1676 | 1677 | #ifdef __AVX2__ 1678 | low = _mm256_loadu_si256((__m256i *)(in + scanned)); 1679 | lowSig = _mm256_movemask_epi8(low); 1680 | #else 1681 | low1 = _mm_loadu_si128((__m128i *) (in + scanned)); 1682 | lowSig1 = _mm_movemask_epi8(low1); 1683 | low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16)); 1684 | lowSig2 = _mm_movemask_epi8(low2); 1685 | lowSig = lowSig2 << 16; 1686 | lowSig |= lowSig1; 1687 | #endif 1688 | 1689 | high = _mm_loadu_si128((__m128i *) (in + scanned + 32)); 1690 | highSig = _mm_movemask_epi8(high); 1691 | nextSig = highSig; 1692 | nextSig <<= 32; 1693 | nextSig |= lowSig; 1694 | 1695 | uint64_t remaining = scanned - (consumed + 48); 1696 | sig = (thisSig << remaining) | sig; 1697 | 1698 | uint64_t reload = scanned - 16; 1699 | scanned += 48; 1700 | 1701 | // need to reload when less than 16 scanned bytes remain in sig 1702 | while (consumed < reload) { 1703 | uint64_t ints_read; 1704 | uint64_t bytes = masked_vbyte_read_group_delta(in + consumed, 1705 | out, sig, &ints_read, &mprev); 1706 | sig >>= bytes; 1707 | 1708 | // seems like this might force the compiler to prioritize shifting sig >>= bytes 1709 | if (sig == 0xFFFFFFFFFFFFFFFF) 1710 | return 0; // fake check to force earliest evaluation 1711 | 1712 | consumed += bytes; 1713 | out += ints_read; 1714 | } 1715 | } while (scanned + 112 < inputsize); // 112 == 48 + 48 ahead for scanning + up to 16 remaining in sig 1716 | sig = (nextSig << (scanned - consumed - 48)) | sig; 1717 | availablebytes = scanned - consumed; 1718 | } 1719 | while (1) { 1720 | if (availablebytes < 16) { 1721 | if (availablebytes + consumed + 31 < inputsize) { 1722 | #ifdef __AVX2__ 1723 | uint64_t newsigavx = (uint32_t) _mm256_movemask_epi8(_mm256_loadu_si256((__m256i *)(in + availablebytes + consumed))); 1724 | sig |= (newsigavx << availablebytes); 1725 | #else 1726 | uint64_t newsig = _mm_movemask_epi8( 1727 | _mm_lddqu_si128( 1728 | (const __m128i *) (in + availablebytes 1729 | + consumed))); 1730 | uint64_t newsig2 = _mm_movemask_epi8( 1731 | _mm_lddqu_si128( 1732 | (const __m128i *) (in + availablebytes + 16 1733 | + consumed))); 1734 | sig |= (newsig << availablebytes) 1735 | | (newsig2 << (availablebytes + 16)); 1736 | #endif 1737 | availablebytes += 32; 1738 | } else if(availablebytes + consumed + 15 < inputsize ) { 1739 | int newsig = _mm_movemask_epi8( 1740 | _mm_lddqu_si128( 1741 | (const __m128i *) (in + availablebytes 1742 | + consumed))); 1743 | sig |= newsig << availablebytes; 1744 | availablebytes += 16; 1745 | } else { 1746 | break; 1747 | } 1748 | } 1749 | uint64_t ints_read; 1750 | uint64_t bytes = masked_vbyte_read_group_delta(in + consumed, out, 1751 | sig, &ints_read, &mprev); 1752 | consumed += bytes; 1753 | availablebytes -= bytes; 1754 | sig >>= bytes; 1755 | out += ints_read; 1756 | } 1757 | prev = _mm_extract_epi32(mprev, 3); 1758 | while (consumed < inputsize) { 1759 | unsigned int shift = 0; 1760 | for (uint32_t v = 0; consumed < inputsize; shift += 7) { 1761 | uint8_t c = in[consumed++]; 1762 | if ((c & 128) == 0) { 1763 | uint32_t delta = v + (c << shift); 1764 | prev += delta; 1765 | *out++ = prev; 1766 | break; 1767 | } else { 1768 | v += (c & 127) << shift; 1769 | } 1770 | } 1771 | } 1772 | return out - initout; 1773 | } 1774 | 1775 | 1776 | 1777 | static const int8_t ALIGNED(16) shuffle_mask_bytes1[16 * 16 ] = { 1778 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1779 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1780 | 4, 5, 6, 7, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1781 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1782 | 8, 9, 10, 11, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1783 | 0, 1, 2, 3, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15, 1784 | 4, 5, 6, 7, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15, 1785 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1786 | 12, 13, 14, 15, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1787 | 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 1788 | 4, 5, 6, 7, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 1789 | 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 12, 13, 14, 15, 1790 | 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 1791 | 0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15, 12, 13, 14, 15, 1792 | 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 12, 13, 14, 15, 1793 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1794 | }; 1795 | 1796 | static const __m128i *shuffle_mask = (__m128i *) shuffle_mask_bytes1; 1797 | 1798 | /* perform a lower-bound search for |key| in |out|; the resulting uint32 1799 | * is stored in |*presult|.*/ 1800 | #define CHECK_AND_INCREMENT(i, out, key, presult) \ 1801 | do { \ 1802 | __m128i tmpout = _mm_sub_epi32(out, conversion); \ 1803 | uint32_t mmask = _mm_movemask_ps(_mm_castsi128_ps(_mm_cmplt_epi32(tmpout, key4))); \ 1804 | if (mmask != 15) { \ 1805 | __m128i pp = _mm_shuffle_epi8(out, shuffle_mask[mmask ^ 15]); \ 1806 | int offset; \ 1807 | SIMDCOMP_CTZ(offset, mmask ^ 15); \ 1808 | *presult = _mm_cvtsi128_si32(pp); \ 1809 | return (i + offset); \ 1810 | } \ 1811 | i += 4; \ 1812 | } while (0) 1813 | 1814 | 1815 | /* perform a lower-bound search for |key| in |out|; the resulting uint32 1816 | * is stored in |*presult|.*/ 1817 | #define CHECK_AND_INCREMENT_2(i, out, key, presult) \ 1818 | do { \ 1819 | __m128i tmpout = _mm_sub_epi32(out, conversion); \ 1820 | uint32_t mmask = 3 & _mm_movemask_ps(_mm_castsi128_ps(_mm_cmplt_epi32(tmpout, key4))); \ 1821 | if (mmask != 3) { \ 1822 | __m128i pp = _mm_shuffle_epi8(out, shuffle_mask[mmask ^ 3]); \ 1823 | int offset; \ 1824 | SIMDCOMP_CTZ(offset, mmask ^ 3); \ 1825 | *presult = _mm_cvtsi128_si32(pp); \ 1826 | return (i + offset); \ 1827 | } \ 1828 | i += 2; \ 1829 | } while (0) 1830 | 1831 | static int masked_vbyte_search_group_delta(const uint8_t *in, uint64_t *p, 1832 | uint64_t mask, uint64_t *ints_read, __m128i *prev, 1833 | int i, uint32_t key, uint32_t *presult) { 1834 | __m128i initial = _mm_lddqu_si128((const __m128i *) (in)); 1835 | __m128i conversion = _mm_set1_epi32(2147483648U); 1836 | __m128i key4 = _mm_set1_epi32(key - 2147483648U); 1837 | 1838 | if (!(mask & 0xFFFF)) { 1839 | __m128i result = _mm_cvtepi8_epi32(initial); 1840 | *prev = PrefixSum(result, *prev); 1841 | CHECK_AND_INCREMENT(i, *prev, key, presult); 1842 | initial = _mm_srli_si128(initial, 4); 1843 | result = _mm_cvtepi8_epi32(initial); 1844 | *prev = PrefixSum(result, *prev); 1845 | CHECK_AND_INCREMENT(i, *prev, key, presult); 1846 | initial = _mm_srli_si128(initial, 4); 1847 | result = _mm_cvtepi8_epi32(initial); 1848 | *prev = PrefixSum(result, *prev); 1849 | CHECK_AND_INCREMENT(i, *prev, key, presult); 1850 | initial = _mm_srli_si128(initial, 4); 1851 | result = _mm_cvtepi8_epi32(initial); 1852 | *prev = PrefixSum(result, *prev); 1853 | CHECK_AND_INCREMENT(i, *prev, key, presult); 1854 | *ints_read = 16; 1855 | *p = 16; 1856 | return (-1); 1857 | } 1858 | 1859 | uint32_t low_12_bits = mask & 0xFFF; 1860 | // combine index and bytes consumed into a single lookup 1861 | index_bytes_consumed combined = combined_lookup[low_12_bits]; 1862 | uint64_t consumed = combined.bytes_consumed; 1863 | uint8_t index = combined.index; 1864 | 1865 | __m128i shuffle_vector = vectors[index]; 1866 | // __m128i shuffle_vector = {0, 0}; // speed check: 20% faster at large, less at small 1867 | 1868 | if (index < 64) { 1869 | *ints_read = 6; 1870 | __m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector); 1871 | __m128i low_bytes = _mm_and_si128(bytes_to_decode, 1872 | _mm_set1_epi16(0x007F)); 1873 | __m128i high_bytes = _mm_and_si128(bytes_to_decode, 1874 | _mm_set1_epi16(0x7F00)); 1875 | __m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1); 1876 | __m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted); 1877 | __m128i unpacked_result_a = _mm_and_si128(packed_result, 1878 | _mm_set1_epi32(0x0000FFFF)); 1879 | *prev = PrefixSum(unpacked_result_a, *prev); 1880 | CHECK_AND_INCREMENT(i, *prev, key, presult); 1881 | __m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16); 1882 | *prev = PrefixSum2ints(unpacked_result_b, *prev); 1883 | //_mm_storel_epi64(&out, *prev); 1884 | CHECK_AND_INCREMENT_2(i, *prev, key, presult); 1885 | *p = consumed; 1886 | return (-1); 1887 | } 1888 | if (index < 145) { 1889 | 1890 | *ints_read = 4; 1891 | 1892 | __m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector); 1893 | __m128i low_bytes = _mm_and_si128(bytes_to_decode, 1894 | _mm_set1_epi32(0x0000007F)); 1895 | __m128i middle_bytes = _mm_and_si128(bytes_to_decode, 1896 | _mm_set1_epi32(0x00007F00)); 1897 | __m128i high_bytes = _mm_and_si128(bytes_to_decode, 1898 | _mm_set1_epi32(0x007F0000)); 1899 | __m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1); 1900 | __m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2); 1901 | __m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted); 1902 | __m128i result = _mm_or_si128(low_middle, high_bytes_shifted); 1903 | *prev = PrefixSum(result, *prev); 1904 | CHECK_AND_INCREMENT(i, *prev, key, presult); 1905 | *p = consumed; 1906 | return (-1); 1907 | } 1908 | 1909 | *ints_read = 2; 1910 | 1911 | __m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F)); 1912 | __m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector); 1913 | __m128i split_bytes = _mm_mullo_epi16(bytes_to_decode, 1914 | _mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16)); 1915 | __m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8); 1916 | __m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes); 1917 | __m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56); 1918 | __m128i result_evens = _mm_or_si128(recombined, low_byte); 1919 | __m128i result = _mm_shuffle_epi8(result_evens, 1920 | _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, 1921 | -1)); 1922 | *prev = PrefixSum2ints(result, *prev); 1923 | //_mm_storel_epi64(&out, *prev); 1924 | CHECK_AND_INCREMENT_2(i, *prev, key, presult); 1925 | *p = consumed; 1926 | return (-1); 1927 | } 1928 | 1929 | 1930 | 1931 | // returns the index of the matching key 1932 | int masked_vbyte_search_delta(const uint8_t *in, uint64_t length, uint32_t prev, 1933 | uint32_t key, uint32_t *presult) { 1934 | size_t consumed = 0; // number of bytes read 1935 | __m128i mprev = _mm_set1_epi32(prev); 1936 | uint64_t count = 0; // how many integers we have read so far 1937 | uint64_t sig = 0; 1938 | int availablebytes = 0; 1939 | if (96 < length) { 1940 | size_t scanned = 0; 1941 | 1942 | #ifdef __AVX2__ 1943 | __m256i low = _mm256_loadu_si256((__m256i *)(in + scanned)); 1944 | uint32_t lowSig = _mm256_movemask_epi8(low); 1945 | #else 1946 | __m128i low1 = _mm_loadu_si128((__m128i *) (in + scanned)); 1947 | uint32_t lowSig1 = _mm_movemask_epi8(low1); 1948 | __m128i low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16)); 1949 | uint32_t lowSig2 = _mm_movemask_epi8(low2); 1950 | uint32_t lowSig = lowSig2 << 16; 1951 | lowSig |= lowSig1; 1952 | #endif 1953 | 1954 | // excess verbosity to avoid problems with sign extension on conversions 1955 | // better to think about what's happening and make it clearer 1956 | __m128i high = _mm_loadu_si128((__m128i *) (in + scanned + 32)); 1957 | uint32_t highSig = _mm_movemask_epi8(high); 1958 | uint64_t nextSig = highSig; 1959 | nextSig <<= 32; 1960 | nextSig |= lowSig; 1961 | scanned += 48; 1962 | 1963 | do { 1964 | uint64_t thisSig = nextSig; 1965 | 1966 | #ifdef __AVX2__ 1967 | low = _mm256_loadu_si256((__m256i *)(in + scanned)); 1968 | lowSig = _mm256_movemask_epi8(low); 1969 | #else 1970 | low1 = _mm_loadu_si128((__m128i *) (in + scanned)); 1971 | lowSig1 = _mm_movemask_epi8(low1); 1972 | low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16)); 1973 | lowSig2 = _mm_movemask_epi8(low2); 1974 | lowSig = lowSig2 << 16; 1975 | lowSig |= lowSig1; 1976 | #endif 1977 | 1978 | high = _mm_loadu_si128((__m128i *) (in + scanned + 32)); 1979 | highSig = _mm_movemask_epi8(high); 1980 | nextSig = highSig; 1981 | nextSig <<= 32; 1982 | nextSig |= lowSig; 1983 | 1984 | uint64_t remaining = scanned - (consumed + 48); 1985 | sig = (thisSig << remaining) | sig; 1986 | 1987 | uint64_t reload = scanned - 16; 1988 | scanned += 48; 1989 | 1990 | // need to reload when less than 16 scanned bytes remain in sig 1991 | while (consumed < reload) { 1992 | uint64_t ints_read = 0, bytes = 0; 1993 | int ret = masked_vbyte_search_group_delta(in + consumed, &bytes, 1994 | sig, &ints_read, &mprev, 1995 | count, key, presult); 1996 | if (ret >= 0) 1997 | return (ret); 1998 | sig >>= bytes; 1999 | 2000 | // seems like this might force the compiler to prioritize shifting sig >>= bytes 2001 | if (sig == 0xFFFFFFFFFFFFFFFF) 2002 | return 0; // fake check to force earliest evaluation 2003 | 2004 | consumed += bytes; 2005 | count += ints_read; 2006 | } 2007 | } while (count + 112 < length); // 112 == 48 + 48 ahead for scanning + up to 16 remaining in sig 2008 | sig = (nextSig << (scanned - consumed - 48)) | sig; 2009 | availablebytes = scanned - consumed; 2010 | } 2011 | 2012 | while (availablebytes + count < length) { 2013 | if (availablebytes < 16) break; 2014 | 2015 | uint64_t ints_read = 0, bytes = 0; 2016 | int ret = masked_vbyte_search_group_delta(in + consumed, &bytes, 2017 | sig, &ints_read, &mprev, count, key, presult); 2018 | if (ret >= 0) 2019 | return (ret); 2020 | consumed += bytes; 2021 | availablebytes -= bytes; 2022 | sig >>= bytes; 2023 | count += ints_read; 2024 | } 2025 | prev = _mm_extract_epi32(mprev, 3); 2026 | for (; count < length; count++) { 2027 | uint32_t out; 2028 | consumed += read_int_delta(in + consumed, &out, &prev); 2029 | if (key <= prev) { 2030 | *presult = prev; 2031 | return (count); 2032 | } 2033 | } 2034 | 2035 | *presult = key + 1; 2036 | return length; 2037 | } 2038 | 2039 | static const int8_t ALIGNED(16) shuffle_mask_bytes2[16 * 16 ] = { 2040 | 0,1,2,3,0,0,0,0,0,0,0,0,0,0,0,0, 2041 | 4,5,6,7,0,0,0,0,0,0,0,0,0,0,0,0, 2042 | 8,9,10,11,0,0,0,0,0,0,0,0,0,0,0,0, 2043 | 12,13,14,15,0,0,0,0,0,0,0,0,0,0,0,0, 2044 | }; 2045 | 2046 | static const __m128i *shuffle_mask_extract = (__m128i *) shuffle_mask_bytes2; 2047 | 2048 | static uint32_t branchlessextract (__m128i out, int i) { 2049 | return _mm_cvtsi128_si32(_mm_shuffle_epi8(out,shuffle_mask_extract[i])); 2050 | } 2051 | 2052 | #define CHECK_SELECT(i, out, slot, presult) \ 2053 | i += 4; \ 2054 | if (i > slot) { \ 2055 | *presult = branchlessextract (out, slot - (i - 4)); \ 2056 | return (1); \ 2057 | } 2058 | 2059 | 2060 | #define CHECK_SELECT_2(i, out, slot, presult) \ 2061 | i += 2; \ 2062 | if (i > slot) { \ 2063 | *presult = branchlessextract (out, slot - (i - 2)); \ 2064 | return (1); \ 2065 | } 2066 | 2067 | static int masked_vbyte_select_group_delta(const uint8_t *in, uint64_t *p, 2068 | uint64_t mask, uint64_t *ints_read, __m128i *prev, 2069 | int slot, uint32_t *presult) { 2070 | __m128i initial = _mm_lddqu_si128((const __m128i *) (in)); 2071 | int i = 0; 2072 | 2073 | if (!(mask & 0xFFFF)) { 2074 | __m128i result = _mm_cvtepi8_epi32(initial); 2075 | *prev = PrefixSum(result, *prev); 2076 | CHECK_SELECT(i, *prev, slot, presult); 2077 | initial = _mm_srli_si128(initial, 4); 2078 | result = _mm_cvtepi8_epi32(initial); 2079 | *prev = PrefixSum(result, *prev); 2080 | CHECK_SELECT(i, *prev, slot, presult); 2081 | initial = _mm_srli_si128(initial, 4); 2082 | result = _mm_cvtepi8_epi32(initial); 2083 | *prev = PrefixSum(result, *prev); 2084 | CHECK_SELECT(i, *prev, slot, presult); 2085 | initial = _mm_srli_si128(initial, 4); 2086 | result = _mm_cvtepi8_epi32(initial); 2087 | *prev = PrefixSum(result, *prev); 2088 | CHECK_SELECT(i, *prev, slot, presult); 2089 | *ints_read = 16; 2090 | *p = 16; 2091 | return (0); 2092 | } 2093 | 2094 | uint32_t low_12_bits = mask & 0xFFF; 2095 | // combine index and bytes consumed into a single lookup 2096 | index_bytes_consumed combined = combined_lookup[low_12_bits]; 2097 | uint64_t consumed = combined.bytes_consumed; 2098 | uint8_t index = combined.index; 2099 | 2100 | __m128i shuffle_vector = vectors[index]; 2101 | // __m128i shuffle_vector = {0, 0}; // speed check: 20% faster at large, less at small 2102 | 2103 | if (index < 64) { 2104 | *ints_read = 6; 2105 | __m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector); 2106 | __m128i low_bytes = _mm_and_si128(bytes_to_decode, 2107 | _mm_set1_epi16(0x007F)); 2108 | __m128i high_bytes = _mm_and_si128(bytes_to_decode, 2109 | _mm_set1_epi16(0x7F00)); 2110 | __m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1); 2111 | __m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted); 2112 | __m128i unpacked_result_a = _mm_and_si128(packed_result, 2113 | _mm_set1_epi32(0x0000FFFF)); 2114 | *prev = PrefixSum(unpacked_result_a, *prev); 2115 | CHECK_SELECT(i, *prev, slot, presult); 2116 | __m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16); 2117 | *prev = PrefixSum2ints(unpacked_result_b, *prev); 2118 | //_mm_storel_epi64(&out, *prev); 2119 | CHECK_SELECT_2(i, *prev, slot, presult); 2120 | *p = consumed; 2121 | return (0); 2122 | } 2123 | if (index < 145) { 2124 | 2125 | *ints_read = 4; 2126 | 2127 | __m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector); 2128 | __m128i low_bytes = _mm_and_si128(bytes_to_decode, 2129 | _mm_set1_epi32(0x0000007F)); 2130 | __m128i middle_bytes = _mm_and_si128(bytes_to_decode, 2131 | _mm_set1_epi32(0x00007F00)); 2132 | __m128i high_bytes = _mm_and_si128(bytes_to_decode, 2133 | _mm_set1_epi32(0x007F0000)); 2134 | __m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1); 2135 | __m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2); 2136 | __m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted); 2137 | __m128i result = _mm_or_si128(low_middle, high_bytes_shifted); 2138 | *prev = PrefixSum(result, *prev); 2139 | CHECK_SELECT(i, *prev, slot, presult); 2140 | *p = consumed; 2141 | return (0); 2142 | } 2143 | 2144 | *ints_read = 2; 2145 | 2146 | __m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F)); 2147 | __m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector); 2148 | __m128i split_bytes = _mm_mullo_epi16(bytes_to_decode, 2149 | _mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16)); 2150 | __m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8); 2151 | __m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes); 2152 | __m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56); 2153 | __m128i result_evens = _mm_or_si128(recombined, low_byte); 2154 | __m128i result = _mm_shuffle_epi8(result_evens, 2155 | _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, 2156 | -1)); 2157 | *prev = PrefixSum2ints(result, *prev); 2158 | //_mm_storel_epi64(&out, *prev); 2159 | CHECK_SELECT_2(i, *prev, slot, presult); 2160 | *p = consumed; 2161 | return (0); 2162 | } 2163 | 2164 | 2165 | 2166 | uint32_t masked_vbyte_select_delta(const uint8_t *in, uint64_t length, 2167 | uint32_t prev, size_t slot) { 2168 | size_t consumed = 0; // number of bytes read 2169 | __m128i mprev = _mm_set1_epi32(prev); 2170 | uint64_t count = 0; // how many integers we have read so far 2171 | uint64_t sig = 0; 2172 | int availablebytes = 0; 2173 | if (96 < length) { 2174 | size_t scanned = 0; 2175 | 2176 | #ifdef __AVX2__ 2177 | __m256i low = _mm256_loadu_si256((__m256i *)(in + scanned)); 2178 | uint32_t lowSig = _mm256_movemask_epi8(low); 2179 | #else 2180 | __m128i low1 = _mm_loadu_si128((__m128i *) (in + scanned)); 2181 | uint32_t lowSig1 = _mm_movemask_epi8(low1); 2182 | __m128i low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16)); 2183 | uint32_t lowSig2 = _mm_movemask_epi8(low2); 2184 | uint32_t lowSig = lowSig2 << 16; 2185 | lowSig |= lowSig1; 2186 | #endif 2187 | 2188 | // excess verbosity to avoid problems with sign extension on conversions 2189 | // better to think about what's happening and make it clearer 2190 | __m128i high = _mm_loadu_si128((__m128i *) (in + scanned + 32)); 2191 | uint32_t highSig = _mm_movemask_epi8(high); 2192 | uint64_t nextSig = highSig; 2193 | nextSig <<= 32; 2194 | nextSig |= lowSig; 2195 | scanned += 48; 2196 | 2197 | do { 2198 | uint64_t thisSig = nextSig; 2199 | 2200 | #ifdef __AVX2__ 2201 | low = _mm256_loadu_si256((__m256i *)(in + scanned)); 2202 | lowSig = _mm256_movemask_epi8(low); 2203 | #else 2204 | low1 = _mm_loadu_si128((__m128i *) (in + scanned)); 2205 | lowSig1 = _mm_movemask_epi8(low1); 2206 | low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16)); 2207 | lowSig2 = _mm_movemask_epi8(low2); 2208 | lowSig = lowSig2 << 16; 2209 | lowSig |= lowSig1; 2210 | #endif 2211 | 2212 | high = _mm_loadu_si128((__m128i *) (in + scanned + 32)); 2213 | highSig = _mm_movemask_epi8(high); 2214 | nextSig = highSig; 2215 | nextSig <<= 32; 2216 | nextSig |= lowSig; 2217 | 2218 | uint64_t remaining = scanned - (consumed + 48); 2219 | sig = (thisSig << remaining) | sig; 2220 | 2221 | uint64_t reload = scanned - 16; 2222 | scanned += 48; 2223 | 2224 | // need to reload when less than 16 scanned bytes remain in sig 2225 | while (consumed < reload) { 2226 | uint32_t result; 2227 | uint64_t ints_read, bytes; 2228 | if (masked_vbyte_select_group_delta(in + consumed, &bytes, 2229 | sig, &ints_read, &mprev, 2230 | slot - count, &result)) { 2231 | return (result); 2232 | } 2233 | sig >>= bytes; 2234 | 2235 | // seems like this might force the compiler to prioritize shifting sig >>= bytes 2236 | if (sig == 0xFFFFFFFFFFFFFFFF) 2237 | return 0; // fake check to force earliest evaluation 2238 | 2239 | consumed += bytes; 2240 | count += ints_read; 2241 | } 2242 | } while (count + 112 < length); // 112 == 48 + 48 ahead for scanning + up to 16 remaining in sig 2243 | sig = (nextSig << (scanned - consumed - 48)) | sig; 2244 | availablebytes = scanned - consumed; 2245 | } 2246 | while (availablebytes + count < length) { 2247 | if (availablebytes < 16) { 2248 | if (availablebytes + count + 31 < length) { 2249 | #ifdef __AVX2__ 2250 | uint64_t newsigavx = (uint32_t) _mm256_movemask_epi8(_mm256_loadu_si256((__m256i *)(in + availablebytes + consumed))); 2251 | sig |= (newsigavx << availablebytes); 2252 | #else 2253 | uint64_t newsig = _mm_movemask_epi8( 2254 | _mm_lddqu_si128( 2255 | (const __m128i *) (in + availablebytes 2256 | + consumed))); 2257 | uint64_t newsig2 = _mm_movemask_epi8( 2258 | _mm_lddqu_si128( 2259 | (const __m128i *) (in + availablebytes + 16 2260 | + consumed))); 2261 | sig |= (newsig << availablebytes) 2262 | | (newsig2 << (availablebytes + 16)); 2263 | #endif 2264 | availablebytes += 32; 2265 | } else if (availablebytes + count + 15 < length) { 2266 | int newsig = _mm_movemask_epi8( 2267 | _mm_lddqu_si128( 2268 | (const __m128i *) (in + availablebytes 2269 | + consumed))); 2270 | sig |= newsig << availablebytes; 2271 | availablebytes += 16; 2272 | } else { 2273 | break; 2274 | } 2275 | } 2276 | 2277 | uint32_t result; 2278 | uint64_t ints_read, bytes; 2279 | if (masked_vbyte_select_group_delta(in + consumed, &bytes, 2280 | sig, &ints_read, &mprev, 2281 | slot - count, &result)) { 2282 | return (result); 2283 | } 2284 | consumed += bytes; 2285 | availablebytes -= bytes; 2286 | sig >>= bytes; 2287 | count += ints_read; 2288 | } 2289 | 2290 | prev = _mm_extract_epi32(mprev, 3); 2291 | for (; count < slot + 1; count++) { 2292 | uint32_t out; 2293 | consumed += read_int_delta(in + consumed, &out, &prev); 2294 | } 2295 | 2296 | return prev; 2297 | } 2298 | --------------------------------------------------------------------------------