├── .clang-format ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── LICENSE ├── README.md ├── bench ├── CMakeLists.txt ├── bp_bench.cpp ├── cuda_bp_bench.cu ├── cuda_vbyte_bench.cu ├── simdbp_bench.cpp ├── streamvbyte_bench.cpp ├── synthetic.hpp └── varintgb_bench.cpp ├── external └── CMakeLists.txt ├── include └── gpu_ic │ ├── cuda_bp.cuh │ ├── cuda_vbyte.cuh │ └── utils │ ├── binary_collection.hpp │ ├── binary_freq_collection.hpp │ ├── bit_istream.hpp │ ├── bit_ostream.hpp │ ├── bit_vector.hpp │ ├── broadword.hpp │ ├── cuda_utils.hpp │ ├── index.cuh │ ├── index.hpp │ ├── intrinsics.hpp │ ├── mappable_vector.hpp │ ├── mapper.hpp │ ├── posting_list.hpp │ ├── progress.hpp │ ├── tables.hpp │ ├── tight_variable_byte.hpp │ └── utils.hpp ├── plot.png ├── src ├── CMakeLists.txt ├── compress_index.cpp ├── cuda_compress_index.cu ├── cuda_perf_decode.cu └── perf_decode.cpp └── test_data ├── queries ├── test_collection.docs └── test_collection.freqs /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | AccessModifierOffset: -1 3 | AlignConsecutiveAssignments: true 4 | AlignConsecutiveDeclarations: true 5 | AlignEscapedNewlinesLeft: true 6 | AlignTrailingComments: false 7 | AllowAllParametersOfDeclarationOnNextLine: true 8 | AllowShortIfStatementsOnASingleLine: false 9 | AllowShortLoopsOnASingleLine: false 10 | AlwaysBreakBeforeMultilineStrings: true 11 | AlwaysBreakTemplateDeclarations: true 12 | BinPackArguments: false 13 | BinPackParameters: false 14 | BreakBeforeBinaryOperators: false 15 | BreakBeforeBraces: Attach 16 | BreakConstructorInitializersBeforeComma: false 17 | ColumnLimit: 100 18 | CommentPragmas: '^@atd' 19 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 20 | ConstructorInitializerIndentWidth: 4 21 | ContinuationIndentWidth: 4 22 | Cpp11BracedListStyle: true 23 | DerivePointerAlignment: false 24 | ExperimentalAutoDetectBinPacking: true 25 | IndentCaseLabels: false 26 | IndentFunctionDeclarationAfterType: false 27 | IndentWidth: 4 28 | MaxEmptyLinesToKeep: 1 29 | NamespaceIndentation: None 30 | ObjCSpaceBeforeProtocolList: false 31 | PenaltyBreakBeforeFirstCallParameter: 10 32 | PenaltyBreakComment: 60 33 | PenaltyBreakFirstLessLess: 20 34 | PenaltyBreakString: 1000 35 | PenaltyExcessCharacter: 1000000 36 | PenaltyReturnTypeOnItsOwnLine: 200 37 | PointerAlignment: Right 38 | SpaceAfterControlStatementKeyword: true 39 | SpaceBeforeAssignmentOperators: true 40 | SpaceInEmptyParentheses: false 41 | SpacesBeforeTrailingComments: 1 42 | SpacesInAngles: false 43 | SpacesInCStyleCastParentheses: false 44 | SpacesInParentheses: false 45 | Standard: Cpp11 46 | TabWidth: 8 47 | UseTab: Never -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | 34 | build/ 35 | 36 | .DS_Store -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "external/googletest"] 2 | path = external/googletest 3 | url = https://github.com/google/googletest.git 4 | [submodule "external/benchmark"] 5 | path = external/benchmark 6 | url = https://github.com/google/benchmark.git 7 | [submodule "external/FastPFor"] 8 | path = external/FastPFor 9 | url = https://github.com/lemire/FastPFor.git 10 | [submodule "external/cub"] 11 | path = external/cub 12 | url = https://github.com/NVlabs/cub.git 13 | [submodule "external/CLI11"] 14 | path = external/CLI11 15 | url = https://github.com/CLIUtils/CLI11.git 16 | [submodule "external/mio"] 17 | path = external/mio 18 | url = https://github.com/mandreyel/mio.git 19 | [submodule "external/boost-cmake"] 20 | path = external/boost-cmake 21 | url = https://github.com/Orphis/boost-cmake.git 22 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0) 2 | project(gpu_integers_compression) 3 | 4 | set(CMAKE_CXX_STANDARD 11) 5 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 6 | set(CMAKE_CXX_EXTENSIONS OFF) 7 | option(BUILD_CUDA "Build the CUDA library" ON) 8 | 9 | if (NOT CMAKE_BUILD_TYPE) 10 | message(STATUS "No build type selected, default to Release") 11 | set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE) 12 | endif() 13 | MESSAGE( STATUS "CMAKE_BUILD_TYPE: " ${CMAKE_BUILD_TYPE} ) 14 | 15 | 16 | if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") 17 | if (CXX_COMPILER_VERSION VERSION_LESS 4.7) 18 | message(STATUS "GCC version must be at least 4.7!") 19 | endif() 20 | set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wcast-align -Ofast -lm -DNDEBUG -DHAVE_CXX0X -march=native") 21 | set (CMAKE_CXX_FLAGS_DEBUG "-Wall -Wcast-align -ggdb -lm -DHAVE_CXX0X -march=native") 22 | elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") 23 | if (CXX_COMPILER_VERSION VERSION_LESS 4.2.1) 24 | message(STATUS "Clang version must be at least 4.2.1!" ) 25 | endif() 26 | set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wcast-align -O3 -DNDEBUG -DHAVE_CXX0X -msse4.1 -march=native") 27 | set (CMAKE_CXX_FLAGS_DEBUG "-Wall -Wcast-align -ggdb -DHAVE_CXX0X -msse4.1 -march=native") 28 | else () 29 | message(FATAL_ERROR "Please, use GCC or Clang compiler!") 30 | endif() 31 | 32 | if (USE_SANITIZERS) 33 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer") 34 | endif () 35 | 36 | set(gtest_disable_pthreads ON) 37 | 38 | 39 | include_directories(include) 40 | add_library(gpu_integers_compression INTERFACE) 41 | target_include_directories(gpu_integers_compression INTERFACE 42 | $ 43 | ) 44 | target_link_libraries(gpu_integers_compression INTERFACE 45 | mio 46 | FastPFor 47 | Boost::boost 48 | ) 49 | 50 | if (BUILD_CUDA) 51 | set(CMAKE_CUDA_STANDARD 11) 52 | set(CMAKE_CUDA_STANDARD_REQUIRED ON) 53 | set(CMAKE_CUDA_EXTENSIONS OFF) 54 | set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --std=c++11") 55 | 56 | find_package(CUDA) 57 | CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS "Auto") 58 | list(APPEND CUDA_NVCC_FLAGS ${ARCH_FLAGS}) 59 | target_link_libraries(gpu_integers_compression INTERFACE cub) 60 | endif () 61 | MESSAGE( STATUS "BUILD_CUDA: " ${BUILD_CUDA} ) 62 | add_subdirectory(external) 63 | 64 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) 65 | 66 | add_subdirectory(src) 67 | 68 | enable_testing() 69 | add_subdirectory(bench) 70 | 71 | 72 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | GPU Integers Compression 2 | ========================== 3 | 4 | This code was used in the experiments of the following paper: 5 | 6 | Antonio Mallia, Michał Siedlaczek, Torsten Suel, Mohamed Zahran. **GPU-Accelerated Decoding of Integer Lists**. In The 28th ACM International Conference on Information and Knowledge Management (CIKM). 2019 7 | 8 | ## Usage 9 | 10 | ### Build 11 | ``` 12 | git clone git@github.com:amallia/gpu-integers-compression.git 13 | cd gpu-integers-compression 14 | mkdir build 15 | cd build 16 | cmake .. 17 | make -j 18 | ``` 19 | 20 | ### External libraries 21 | - Google Test 22 | - Google benchmark 23 | - NVlabs CUB 24 | - FastPFor 25 | - CLI11 26 | - Boost 27 | - mio 28 | 29 | ### Benchmark 30 | 31 | Benchmarks tasks can be found in `bench` folder. For example you can run: 32 | ``` 33 | ./bench/cuda_bp_bench 34 | ./bench/cuda_vbyte_bench 35 | ``` 36 | 37 | ## Codecs 38 | 39 | ### GPU Binary-Packing 40 | ```cpp 41 | #include "gpu_ic/cuda_bp.cuh" 42 | 43 | // Values to encode 44 | std::vector values = {\* ... *\}; 45 | 46 | // Encode 47 | std::vector encoded_values; 48 | encoded_values.resize(values.size() * 8); 49 | auto compressedsize = cuda_bp::encode(encoded_values.data(), values.data(), values.size()); 50 | encoded_values.resize(compressedsize); 51 | encoded_values.shrink_to_fit(); 52 | 53 | // Decode 54 | std::vector decoded_values; 55 | decoded_values.resize(values.size()); 56 | CUDA_CHECK_ERROR(cudaMalloc((void **)&d_encoded, encoded_values.size() * sizeof(uint8_t))); 57 | CUDA_CHECK_ERROR(cudaMemcpy(d_encoded, encoded_values.data(), encoded_values.size() * sizeof(uint8_t), cudaMemcpyHostToDevice)); 58 | 59 | CUDA_CHECK_ERROR(cudaMalloc((void **)&d_decoded, values.size() * sizeof(uint32_t))); 60 | cuda_bp::decode(d_decoded, d_encoded, decoded_values.size()); 61 | CUDA_CHECK_ERROR(cudaMemcpy(decoded_values.data(), d_decoded, values.size() * sizeof(uint32_t), cudaMemcpyDeviceToHost)); 62 | ``` 63 | 64 | ### GPU Vbyte 65 | 66 | ```cpp 67 | #include "gpu_ic/cuda_vbyte.cuh" 68 | 69 | // Values to encode 70 | std::vector values = {\* ... *\}; 71 | 72 | // Encode 73 | std::vector encoded_values; 74 | encoded_values.resize(values.size() * 8); 75 | auto compressedsize = cuda_vbyte::encode(encoded_values.data(), values.data(), values.size()); 76 | encoded_values.resize(compressedsize); 77 | encoded_values.shrink_to_fit(); 78 | 79 | // Decode 80 | std::vector decoded_values; 81 | decoded_values.resize(values.size()); 82 | CUDA_CHECK_ERROR(cudaMalloc((void **)&d_encoded, encoded_values.size() * sizeof(uint8_t))); 83 | CUDA_CHECK_ERROR(cudaMemcpy(d_encoded, encoded_values.data(), encoded_values.size() * sizeof(uint8_t), cudaMemcpyHostToDevice)); 84 | 85 | CUDA_CHECK_ERROR(cudaMalloc((void **)&d_decoded, values.size() * sizeof(uint32_t))); 86 | cuda_vbyte::decode(d_decoded, d_encoded, decoded_values.size()); 87 | CUDA_CHECK_ERROR(cudaMemcpy(decoded_values.data(), d_decoded, values.size() * sizeof(uint32_t), cudaMemcpyDeviceToHost)); 88 | 89 | ``` 90 | 91 | 92 | ## Benchmarks 93 |

94 | 95 |

96 | 97 | -------------------------------------------------------------------------------- /bench/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB BENCH_SOURCES *_bench.cpp) 2 | foreach(BENCH_SRC ${BENCH_SOURCES}) 3 | get_filename_component (BENCH_SRC_NAME ${BENCH_SRC} NAME_WE) 4 | add_executable(${BENCH_SRC_NAME} ${BENCH_SRC}) 5 | target_link_libraries(${BENCH_SRC_NAME} 6 | benchmark 7 | gpu_integers_compression 8 | gtest 9 | gmock 10 | ) 11 | target_include_directories(${BENCH_SRC_NAME} BEFORE PRIVATE $ 12 | ) 13 | add_test(${BENCH_SRC_NAME} ${BENCH_SRC_NAME}) 14 | endforeach(BENCH_SRC) 15 | 16 | if(BUILD_CUDA) 17 | CUDA_ADD_EXECUTABLE(cuda_bp_bench cuda_bp_bench.cu) 18 | target_link_libraries(cuda_bp_bench 19 | benchmark 20 | gpu_integers_compression 21 | gtest 22 | gmock 23 | ) 24 | CUDA_ADD_EXECUTABLE(cuda_vbyte_bench cuda_vbyte_bench.cu) 25 | target_link_libraries(cuda_vbyte_bench 26 | benchmark 27 | gpu_integers_compression 28 | gtest 29 | gmock 30 | ) 31 | endif() 32 | -------------------------------------------------------------------------------- /bench/bp_bench.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2018-present Antonio Mallia 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | #include 19 | #include "benchmark/benchmark.h" 20 | #include "../external/FastPFor/headers/codecfactory.h" 21 | #include "synthetic.hpp" 22 | #include "gpu_ic/utils/utils.hpp" 23 | 24 | template 25 | class ValuesFixture : public ::benchmark::Fixture { 26 | 27 | public: 28 | using ::benchmark::Fixture::SetUp; 29 | using ::benchmark::Fixture::TearDown; 30 | 31 | virtual void SetUp(::benchmark::State& st) { 32 | using namespace FastPForLib; 33 | using namespace gpu_ic; 34 | 35 | IntegerCODEC &codec = *CODECFactory::getFromName("BP32"); 36 | Generator clu(1); 37 | values = clu.generate(st.range(0), 1U << 29); 38 | utils::delta_encode(values.data(), values.size()); 39 | 40 | encoded_values.resize(values.size() * 8); 41 | size_t compressedsize = 0; 42 | codec.encodeArray(values.data(), values.size(), encoded_values.data(), 43 | compressedsize); 44 | encoded_values.resize(compressedsize); 45 | encoded_values.shrink_to_fit(); 46 | 47 | decoded_values.resize(values.size()); 48 | } 49 | 50 | virtual void TearDown(::benchmark::State&) { 51 | ASSERT_EQ(decoded_values.size(), values.size()); 52 | for (size_t i = 0; i < values.size(); ++i) 53 | { 54 | ASSERT_EQ(decoded_values[i], values[i]); 55 | } 56 | values.clear(); 57 | encoded_values.clear(); 58 | decoded_values.clear(); 59 | } 60 | std::vector values; 61 | std::vector encoded_values; 62 | std::vector decoded_values; 63 | }; 64 | 65 | BENCHMARK_TEMPLATE_DEFINE_F(ValuesFixture, decodeUniform, gpu_ic::UniformDataGenerator)(benchmark::State& state) { 66 | using namespace FastPForLib; 67 | IntegerCODEC &codec = *CODECFactory::getFromName("BP32"); 68 | 69 | while (state.KeepRunning()) { 70 | size_t recoveredsize = 0; 71 | codec.decodeArray(encoded_values.data(), encoded_values.size(), 72 | decoded_values.data(), recoveredsize); 73 | } 74 | auto bpi = double(32*encoded_values.size())/decoded_values.size(); 75 | state.counters["bpi"] = benchmark::Counter(bpi, benchmark::Counter::kAvgThreads); 76 | } 77 | BENCHMARK_REGISTER_F(ValuesFixture, decodeUniform)->RangeMultiplier(2)->Range((1ULL << 15), (1ULL<<25)); 78 | 79 | BENCHMARK_TEMPLATE_DEFINE_F(ValuesFixture, decodeClustered, gpu_ic::ClusteredDataGenerator)(benchmark::State& state) { 80 | using namespace FastPForLib; 81 | IntegerCODEC &codec = *CODECFactory::getFromName("BP32"); 82 | 83 | while (state.KeepRunning()) { 84 | size_t recoveredsize = 0; 85 | codec.decodeArray(encoded_values.data(), encoded_values.size(), 86 | decoded_values.data(), recoveredsize); 87 | } 88 | auto bpi = double(32*encoded_values.size())/decoded_values.size(); 89 | state.counters["bpi"] = benchmark::Counter(bpi, benchmark::Counter::kAvgThreads); 90 | } 91 | BENCHMARK_REGISTER_F(ValuesFixture, decodeClustered)->RangeMultiplier(2)->Range((1ULL << 15), (1ULL<<25)); 92 | 93 | BENCHMARK_MAIN(); 94 | 95 | -------------------------------------------------------------------------------- /bench/cuda_bp_bench.cu: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2018-present Antonio Mallia 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | #include "gmock/gmock.h" 19 | #include "gtest/gtest.h" 20 | #include "benchmark/benchmark.h" 21 | 22 | 23 | #include "synthetic.hpp" 24 | #include "gpu_ic/cuda_bp.cuh" 25 | #include "gpu_ic/utils/utils.hpp" 26 | #include "gpu_ic/utils/cuda_utils.hpp" 27 | 28 | template 29 | class ValuesFixture : public ::benchmark::Fixture { 30 | 31 | public: 32 | using ::benchmark::Fixture::SetUp; 33 | using ::benchmark::Fixture::TearDown; 34 | 35 | virtual void SetUp(::benchmark::State& st) { 36 | using namespace gpu_ic; 37 | 38 | Generator clu(1); 39 | values = clu.generate(st.range(0), 1U << 29); 40 | utils::delta_encode(values.data(), values.size()); 41 | 42 | encoded_values.resize(values.size() * 8); 43 | auto compressedsize = cuda_bp::encode(encoded_values.data(), values.data(), values.size()); 44 | encoded_values.resize(compressedsize); 45 | encoded_values.shrink_to_fit(); 46 | 47 | decoded_values.resize(values.size()); 48 | CUDA_CHECK_ERROR(cudaSetDevice(0)); 49 | warmUpGPU<<<1, 1>>>(); 50 | CUDA_CHECK_ERROR(cudaMalloc((void **)&d_encoded, encoded_values.size() * sizeof(uint8_t))); 51 | CUDA_CHECK_ERROR(cudaMemcpy(d_encoded, encoded_values.data(), encoded_values.size() * sizeof(uint8_t), cudaMemcpyHostToDevice)); 52 | 53 | CUDA_CHECK_ERROR(cudaMalloc((void **)&d_decoded, values.size() * sizeof(uint32_t))); 54 | CUDA_CHECK_ERROR(cudaDeviceSynchronize()); 55 | } 56 | 57 | virtual void TearDown(::benchmark::State&) { 58 | CUDA_CHECK_ERROR(cudaMemcpy(decoded_values.data(), d_decoded, values.size() * sizeof(uint32_t), cudaMemcpyDeviceToHost)); 59 | 60 | ASSERT_EQ(decoded_values.size(), values.size()); 61 | for (size_t i = 0; i < values.size(); ++i) 62 | { 63 | ASSERT_EQ(decoded_values[i], values[i]); 64 | } 65 | cudaFree(d_encoded); 66 | cudaFree(d_decoded); 67 | values.clear(); 68 | encoded_values.clear(); 69 | decoded_values.clear(); 70 | } 71 | std::vector values; 72 | std::vector encoded_values; 73 | std::vector decoded_values; 74 | uint8_t * d_encoded; 75 | uint32_t * d_decoded; 76 | }; 77 | 78 | BENCHMARK_TEMPLATE_DEFINE_F(ValuesFixture, decodeUniform128, gpu_ic::UniformDataGenerator, 128)(benchmark::State& state) { 79 | while (state.KeepRunning()) { 80 | cuda_bp::decode<128>(d_decoded, d_encoded, decoded_values.size()); 81 | CUDA_CHECK_ERROR(cudaDeviceSynchronize()); 82 | } 83 | auto bpi = double(8*encoded_values.size())/decoded_values.size(); 84 | state.counters["bpi"] = benchmark::Counter(bpi, benchmark::Counter::kAvgThreads); 85 | } 86 | BENCHMARK_REGISTER_F(ValuesFixture, decodeUniform128)->RangeMultiplier(2)->Range((1ULL << 15), (1ULL<<25)); 87 | 88 | BENCHMARK_TEMPLATE_DEFINE_F(ValuesFixture, decodeUniform256, gpu_ic::UniformDataGenerator, 256)(benchmark::State& state) { 89 | while (state.KeepRunning()) { 90 | cuda_bp::decode<256>(d_decoded, d_encoded, decoded_values.size()); 91 | CUDA_CHECK_ERROR(cudaDeviceSynchronize()); 92 | } 93 | auto bpi = double(8*encoded_values.size())/decoded_values.size(); 94 | state.counters["bpi"] = benchmark::Counter(bpi, benchmark::Counter::kAvgThreads); 95 | } 96 | BENCHMARK_REGISTER_F(ValuesFixture, decodeUniform256)->RangeMultiplier(2)->Range((1ULL << 15), (1ULL<<25)); 97 | 98 | BENCHMARK_TEMPLATE_DEFINE_F(ValuesFixture, decodeClustered128, gpu_ic::ClusteredDataGenerator, 128)(benchmark::State& state) { 99 | while (state.KeepRunning()) { 100 | cuda_bp::decode<128>(d_decoded, d_encoded, decoded_values.size()); 101 | CUDA_CHECK_ERROR(cudaDeviceSynchronize()); 102 | } 103 | auto bpi = double(8*encoded_values.size())/decoded_values.size(); 104 | state.counters["bpi"] = benchmark::Counter(bpi, benchmark::Counter::kAvgThreads); 105 | } 106 | BENCHMARK_REGISTER_F(ValuesFixture, decodeClustered128)->RangeMultiplier(2)->Range((1ULL << 15), (1ULL<<25)); 107 | 108 | BENCHMARK_TEMPLATE_DEFINE_F(ValuesFixture, decodeClustered256, gpu_ic::ClusteredDataGenerator, 256)(benchmark::State& state) { 109 | while (state.KeepRunning()) { 110 | cuda_bp::decode<256>(d_decoded, d_encoded, decoded_values.size()); 111 | CUDA_CHECK_ERROR(cudaDeviceSynchronize()); 112 | } 113 | auto bpi = double(8*encoded_values.size())/decoded_values.size(); 114 | state.counters["bpi"] = benchmark::Counter(bpi, benchmark::Counter::kAvgThreads); 115 | } 116 | BENCHMARK_REGISTER_F(ValuesFixture, decodeClustered256)->RangeMultiplier(2)->Range((1ULL << 15), (1ULL<<25)); 117 | 118 | BENCHMARK_MAIN(); 119 | -------------------------------------------------------------------------------- /bench/cuda_vbyte_bench.cu: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2018-present Antonio Mallia 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | #include "gmock/gmock.h" 19 | #include "gtest/gtest.h" 20 | #include "benchmark/benchmark.h" 21 | 22 | #include "gpu_ic/cuda_vbyte.cuh" 23 | #include "gpu_ic/utils/utils.hpp" 24 | #include "gpu_ic/utils/cuda_utils.hpp" 25 | 26 | #include "synthetic.hpp" 27 | 28 | template 29 | class ValuesFixture : public ::benchmark::Fixture { 30 | 31 | public: 32 | using ::benchmark::Fixture::SetUp; 33 | using ::benchmark::Fixture::TearDown; 34 | 35 | virtual void SetUp(::benchmark::State& st) { 36 | using namespace gpu_ic; 37 | 38 | Generator clu(1); 39 | values = clu.generate(st.range(0), 1U << 29); 40 | utils::delta_encode(values.data(), values.size()); 41 | 42 | encoded_values.resize(values.size() * 8); 43 | auto compressedsize = cuda_vbyte::encode(encoded_values.data(), values.data(), values.size()); 44 | encoded_values.resize(compressedsize); 45 | encoded_values.shrink_to_fit(); 46 | 47 | decoded_values.resize(values.size()); 48 | CUDA_CHECK_ERROR(cudaSetDevice(0)); 49 | warmUpGPU<<<1, 1>>>(); 50 | CUDA_CHECK_ERROR(cudaMalloc((void **)&d_encoded, encoded_values.size() * sizeof(uint8_t))); 51 | CUDA_CHECK_ERROR(cudaMemcpy(d_encoded, encoded_values.data(), encoded_values.size() * sizeof(uint8_t), cudaMemcpyHostToDevice)); 52 | 53 | CUDA_CHECK_ERROR(cudaMalloc((void **)&d_decoded, values.size() * sizeof(uint32_t))); 54 | CUDA_CHECK_ERROR(cudaDeviceSynchronize()); 55 | } 56 | 57 | virtual void TearDown(::benchmark::State&) { 58 | CUDA_CHECK_ERROR(cudaMemcpy(decoded_values.data(), d_decoded, values.size() * sizeof(uint32_t), cudaMemcpyDeviceToHost)); 59 | 60 | ASSERT_EQ(decoded_values.size(), values.size()); 61 | for (size_t i = 0; i < values.size(); ++i) 62 | { 63 | ASSERT_EQ(decoded_values[i], values[i]); 64 | } 65 | 66 | cudaFree(d_encoded); 67 | cudaFree(d_decoded); 68 | values.clear(); 69 | encoded_values.clear(); 70 | decoded_values.clear(); 71 | } 72 | std::vector values; 73 | std::vector encoded_values; 74 | std::vector decoded_values; 75 | uint8_t * d_encoded; 76 | uint32_t * d_decoded; 77 | }; 78 | 79 | BENCHMARK_TEMPLATE_DEFINE_F(ValuesFixture, decodeUniform128, gpu_ic::UniformDataGenerator, 128)(benchmark::State& state) { 80 | while (state.KeepRunning()) { 81 | cuda_vbyte::decode<128>(d_decoded, d_encoded, decoded_values.size()); 82 | CUDA_CHECK_ERROR(cudaDeviceSynchronize()); 83 | } 84 | auto bpi = double(8*encoded_values.size())/decoded_values.size(); 85 | state.counters["bpi"] = benchmark::Counter(bpi, benchmark::Counter::kAvgThreads); 86 | } 87 | BENCHMARK_REGISTER_F(ValuesFixture, decodeUniform128)->RangeMultiplier(2)->Range((1ULL << 15), (1ULL<<25)); 88 | 89 | BENCHMARK_TEMPLATE_DEFINE_F(ValuesFixture, decodeUniform1024, gpu_ic::UniformDataGenerator, 1024)(benchmark::State& state) { 90 | while (state.KeepRunning()) { 91 | cuda_vbyte::decode<1024>(d_decoded, d_encoded, decoded_values.size()); 92 | CUDA_CHECK_ERROR(cudaDeviceSynchronize()); 93 | } 94 | auto bpi = double(8*encoded_values.size())/decoded_values.size(); 95 | state.counters["bpi"] = benchmark::Counter(bpi, benchmark::Counter::kAvgThreads); 96 | } 97 | BENCHMARK_REGISTER_F(ValuesFixture, decodeUniform1024)->RangeMultiplier(2)->Range((1ULL << 15), (1ULL<<25)); 98 | 99 | BENCHMARK_TEMPLATE_DEFINE_F(ValuesFixture, decodeClustered128, gpu_ic::ClusteredDataGenerator, 128)(benchmark::State& state) { 100 | while (state.KeepRunning()) { 101 | cuda_vbyte::decode<128>(d_decoded, d_encoded, decoded_values.size()); 102 | CUDA_CHECK_ERROR(cudaDeviceSynchronize()); 103 | } 104 | auto bpi = double(8*encoded_values.size())/decoded_values.size(); 105 | state.counters["bpi"] = benchmark::Counter(bpi, benchmark::Counter::kAvgThreads); 106 | } 107 | BENCHMARK_REGISTER_F(ValuesFixture, decodeClustered128)->RangeMultiplier(2)->Range((1ULL << 15), (1ULL<<25)); 108 | 109 | BENCHMARK_TEMPLATE_DEFINE_F(ValuesFixture, decodeClustered1024, gpu_ic::ClusteredDataGenerator, 1024)(benchmark::State& state) { 110 | while (state.KeepRunning()) { 111 | cuda_vbyte::decode<1024>(d_decoded, d_encoded, decoded_values.size()); 112 | CUDA_CHECK_ERROR(cudaDeviceSynchronize()); 113 | } 114 | auto bpi = double(8*encoded_values.size())/decoded_values.size(); 115 | state.counters["bpi"] = benchmark::Counter(bpi, benchmark::Counter::kAvgThreads); 116 | } 117 | BENCHMARK_REGISTER_F(ValuesFixture, decodeClustered1024)->RangeMultiplier(2)->Range((1ULL << 15), (1ULL<<25)); 118 | 119 | BENCHMARK_MAIN(); 120 | -------------------------------------------------------------------------------- /bench/simdbp_bench.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2018-present Antonio Mallia 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | #include 19 | #include "benchmark/benchmark.h" 20 | #include "../external/FastPFor/headers/codecfactory.h" 21 | #include "synthetic.hpp" 22 | #include "gpu_ic/utils/utils.hpp" 23 | 24 | template 25 | class ValuesFixture : public ::benchmark::Fixture { 26 | 27 | public: 28 | using ::benchmark::Fixture::SetUp; 29 | using ::benchmark::Fixture::TearDown; 30 | 31 | virtual void SetUp(::benchmark::State& st) { 32 | using namespace FastPForLib; 33 | using namespace gpu_ic; 34 | 35 | IntegerCODEC &codec = *CODECFactory::getFromName("simdbinarypacking"); 36 | 37 | Generator clu(1); 38 | auto tmp = clu.generate(st.range(0), 1U << 29); 39 | values = std::vector(tmp.begin(), tmp.end()); 40 | utils::delta_encode(values.data(), values.size()); 41 | 42 | encoded_values.resize(values.size() * 8); 43 | size_t compressedsize = 0; 44 | codec.encodeArray(values.data(), values.size(), encoded_values.data(), 45 | compressedsize); 46 | encoded_values.resize(compressedsize); 47 | encoded_values.shrink_to_fit(); 48 | 49 | decoded_values.resize(values.size()); 50 | } 51 | 52 | virtual void TearDown(::benchmark::State&) { 53 | ASSERT_EQ(decoded_values.size(), values.size()); 54 | for (size_t i = 0; i < values.size(); ++i) 55 | { 56 | ASSERT_EQ(decoded_values[i], values[i]); 57 | } 58 | values.clear(); 59 | encoded_values.clear(); 60 | decoded_values.clear(); 61 | } 62 | std::vector values; 63 | std::vector encoded_values; 64 | std::vector decoded_values; 65 | }; 66 | 67 | BENCHMARK_TEMPLATE_DEFINE_F(ValuesFixture, decodeUniform, gpu_ic::UniformDataGenerator)(benchmark::State& state) { 68 | using namespace FastPForLib; 69 | IntegerCODEC &codec = *CODECFactory::getFromName("simdbinarypacking"); 70 | 71 | while (state.KeepRunning()) { 72 | size_t recoveredsize = 0; 73 | codec.decodeArray(encoded_values.data(), encoded_values.size(), 74 | decoded_values.data(), recoveredsize); 75 | } 76 | auto bpi = double(32*encoded_values.size())/decoded_values.size(); 77 | state.counters["bpi"] = benchmark::Counter(bpi, benchmark::Counter::kAvgThreads); 78 | 79 | } 80 | BENCHMARK_REGISTER_F(ValuesFixture, decodeUniform)->RangeMultiplier(2)->Range((1ULL << 15), (1ULL<<25)); 81 | 82 | 83 | BENCHMARK_TEMPLATE_DEFINE_F(ValuesFixture, decodeClustered, gpu_ic::ClusteredDataGenerator)(benchmark::State& state) { 84 | using namespace FastPForLib; 85 | IntegerCODEC &codec = *CODECFactory::getFromName("simdbinarypacking"); 86 | 87 | while (state.KeepRunning()) { 88 | size_t recoveredsize = 0; 89 | codec.decodeArray(encoded_values.data(), encoded_values.size(), 90 | decoded_values.data(), recoveredsize); 91 | } 92 | auto bpi = double(32*encoded_values.size())/decoded_values.size(); 93 | state.counters["bpi"] = benchmark::Counter(bpi, benchmark::Counter::kAvgThreads); 94 | 95 | } 96 | BENCHMARK_REGISTER_F(ValuesFixture, decodeClustered)->RangeMultiplier(2)->Range((1ULL << 15), (1ULL<<25)); 97 | 98 | BENCHMARK_MAIN(); 99 | 100 | -------------------------------------------------------------------------------- /bench/streamvbyte_bench.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2018-present Antonio Mallia 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | #include 19 | #include "benchmark/benchmark.h" 20 | #include "../external/FastPFor/headers/codecfactory.h" 21 | #include "synthetic.hpp" 22 | #include "gpu_ic/utils/utils.hpp" 23 | 24 | template 25 | class ValuesFixture : public ::benchmark::Fixture { 26 | 27 | public: 28 | using ::benchmark::Fixture::SetUp; 29 | using ::benchmark::Fixture::TearDown; 30 | 31 | virtual void SetUp(::benchmark::State& st) { 32 | using namespace FastPForLib; 33 | IntegerCODEC &codec = *CODECFactory::getFromName("streamvbyte"); 34 | 35 | Generator clu(1); 36 | values = clu.generate(st.range(0), 1U << 29); 37 | utils::delta_encode(values.data(), values.size()); 38 | 39 | encoded_values.resize(values.size() * 8); 40 | size_t compressedsize = 0; 41 | codec.encodeArray(values.data(), values.size(), encoded_values.data(), 42 | compressedsize); 43 | encoded_values.resize(compressedsize); 44 | encoded_values.shrink_to_fit(); 45 | 46 | decoded_values.resize(values.size()); 47 | } 48 | 49 | virtual void TearDown(::benchmark::State&) { 50 | ASSERT_EQ(decoded_values.size(), values.size()); 51 | for (size_t i = 0; i < values.size(); ++i) 52 | { 53 | ASSERT_EQ(decoded_values[i], values[i]); 54 | } 55 | values.clear(); 56 | encoded_values.clear(); 57 | decoded_values.clear(); 58 | } 59 | std::vector values; 60 | std::vector encoded_values; 61 | std::vector decoded_values; 62 | }; 63 | 64 | 65 | BENCHMARK_TEMPLATE_DEFINE_F(ValuesFixture, decodeUniform, gpu_ic::UniformDataGenerator)(benchmark::State& state) { 66 | using namespace FastPForLib; 67 | IntegerCODEC &codec = *CODECFactory::getFromName("streamvbyte"); 68 | 69 | while (state.KeepRunning()) { 70 | size_t recoveredsize = 0; 71 | codec.decodeArray(encoded_values.data(), encoded_values.size(), 72 | decoded_values.data(), recoveredsize); 73 | } 74 | auto bpi = double(32*encoded_values.size())/decoded_values.size(); 75 | state.counters["bpi"] = benchmark::Counter(bpi, benchmark::Counter::kAvgThreads); 76 | 77 | } 78 | BENCHMARK_REGISTER_F(ValuesFixture, decodeUniform)->RangeMultiplier(2)->Range((1ULL << 15), (1ULL<<25)); 79 | 80 | 81 | BENCHMARK_TEMPLATE_DEFINE_F(ValuesFixture, decodeClustered, gpu_ic::ClusteredDataGenerator)(benchmark::State& state) { 82 | using namespace FastPForLib; 83 | IntegerCODEC &codec = *CODECFactory::getFromName("streamvbyte"); 84 | 85 | while (state.KeepRunning()) { 86 | size_t recoveredsize = 0; 87 | codec.decodeArray(encoded_values.data(), encoded_values.size(), 88 | decoded_values.data(), recoveredsize); 89 | } 90 | auto bpi = double(32*encoded_values.size())/decoded_values.size(); 91 | state.counters["bpi"] = benchmark::Counter(bpi, benchmark::Counter::kAvgThreads); 92 | 93 | } 94 | BENCHMARK_REGISTER_F(ValuesFixture, decodeClustered)->RangeMultiplier(2)->Range((1ULL << 15), (1ULL<<25)); 95 | 96 | BENCHMARK_MAIN(); 97 | 98 | -------------------------------------------------------------------------------- /bench/synthetic.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2018-present Antonio Mallia 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | namespace gpu_ic { 24 | 25 | class UniformDataGenerator { 26 | public: 27 | UniformDataGenerator(uint32_t seed = std::random_device{}()) : rand(seed) {} 28 | 29 | std::vector generate(uint32_t N, uint32_t Max) { 30 | if (Max < N) 31 | throw std::runtime_error("can't generate enough distinct elements in small interval"); 32 | 33 | std::uniform_int_distribution dis(1, Max - 1); 34 | std::vector ans; 35 | if (N == 0) 36 | return ans; // nothing to do 37 | ans.reserve(N); 38 | assert(Max >= 1); 39 | 40 | if (2 * N > Max) { 41 | std::set s; 42 | while (s.size() < Max - N) 43 | s.insert(dis(rand)); 44 | s.insert(Max); 45 | ans.resize(N); 46 | uint32_t i = 0; 47 | size_t c = 0; 48 | for (uint32_t v : s) { 49 | for (; i < v; ++i) 50 | ans[c++] = i; 51 | ++i; 52 | } 53 | assert(c == ans.size()); 54 | } else { 55 | std::set s; 56 | while (s.size() < N) 57 | s.insert(dis(rand)); 58 | ans.assign(s.begin(), s.end()); 59 | assert(N == ans.size()); 60 | } 61 | return ans; 62 | } 63 | std::mt19937 rand; 64 | }; 65 | 66 | class ClusteredDataGenerator { 67 | public: 68 | UniformDataGenerator unidg; 69 | ClusteredDataGenerator(uint32_t seed = std::random_device{}()) : unidg(seed) {} 70 | 71 | template 72 | void fillUniform(iterator begin, iterator end, uint32_t Min, uint32_t Max) { 73 | std::vector v = unidg.generate(static_cast(end - begin), Max - Min); 74 | for (size_t k = 0; k < v.size(); ++k) 75 | *(begin + k) = Min + v[k]; 76 | } 77 | template 78 | void fillClustered(iterator begin, iterator end, uint32_t Min, uint32_t Max) { 79 | const uint32_t N = static_cast(end - begin); 80 | const uint32_t range = Max - Min; 81 | if (range < N) 82 | throw std::runtime_error("can't generate that many in small interval."); 83 | assert(range >= N); 84 | if ((range == N) || (N < 10)) { 85 | fillUniform(begin, end, Min, Max); 86 | return; 87 | } 88 | std::uniform_int_distribution dis(1, range - N); 89 | const uint32_t cut = N / 2 + dis(unidg.rand); 90 | assert(cut >= N / 2); 91 | assert(Max - Min - cut >= N - N / 2); 92 | 93 | std::uniform_real_distribution urd_dis; 94 | const double p = urd_dis(unidg.rand); 95 | assert(p <= 1); 96 | assert(p >= 0); 97 | if (p <= 0.25) { 98 | fillUniform(begin, begin + N / 2, Min, Min + cut); 99 | fillClustered(begin + N / 2, end, Min + cut, Max); 100 | } else if (p <= 0.5) { 101 | fillClustered(begin, begin + N / 2, Min, Min + cut); 102 | fillUniform(begin + N / 2, end, Min + cut, Max); 103 | } else { 104 | fillClustered(begin, begin + N / 2, Min, Min + cut); 105 | fillClustered(begin + N / 2, end, Min + cut, Max); 106 | } 107 | } 108 | 109 | std::vector generate(uint32_t N, uint32_t Max) { 110 | std::vector ans(N); 111 | fillClustered(ans.begin(), ans.end(), 0, Max); 112 | return ans; 113 | } 114 | }; 115 | 116 | } // namespace gpu_ic 117 | -------------------------------------------------------------------------------- /bench/varintgb_bench.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2018-present Antonio Mallia 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "../external/FastPFor/headers/codecfactory.h" 18 | #include "benchmark/benchmark.h" 19 | #include "gpu_ic/utils/utils.hpp" 20 | #include 21 | #include 22 | 23 | #include "synthetic.hpp" 24 | 25 | template 26 | class ValuesFixture : public ::benchmark::Fixture { 27 | 28 | public: 29 | using ::benchmark::Fixture::SetUp; 30 | using ::benchmark::Fixture::TearDown; 31 | 32 | virtual void SetUp(::benchmark::State &st) { 33 | using namespace FastPForLib; 34 | using namespace gpu_ic; 35 | 36 | IntegerCODEC &codec = *CODECFactory::getFromName("varintgb"); 37 | 38 | Generator clu(1); 39 | values = clu.generate(st.range(0), 1U << 29); 40 | utils::delta_encode(values.data(), values.size()); 41 | 42 | encoded_values.resize(values.size() * 8); 43 | size_t compressedsize = 0; 44 | codec.encodeArray(values.data(), values.size(), encoded_values.data(), compressedsize); 45 | encoded_values.resize(compressedsize); 46 | encoded_values.shrink_to_fit(); 47 | 48 | decoded_values.resize(values.size()); 49 | } 50 | 51 | virtual void TearDown(::benchmark::State &) { 52 | ASSERT_EQ(decoded_values.size(), values.size()); 53 | for (size_t i = 0; i < values.size(); ++i) { 54 | ASSERT_EQ(decoded_values[i], values[i]); 55 | } 56 | values.clear(); 57 | encoded_values.clear(); 58 | decoded_values.clear(); 59 | } 60 | std::vector values; 61 | std::vector encoded_values; 62 | std::vector decoded_values; 63 | }; 64 | 65 | BENCHMARK_TEMPLATE_DEFINE_F(ValuesFixture, decodeUniform, gpu_ic::UniformDataGenerator) 66 | (benchmark::State &state) { 67 | using namespace FastPForLib; 68 | IntegerCODEC &codec = *CODECFactory::getFromName("varintgb"); 69 | 70 | while (state.KeepRunning()) { 71 | size_t recoveredsize = 0; 72 | codec.decodeArray( 73 | encoded_values.data(), encoded_values.size(), decoded_values.data(), recoveredsize); 74 | } 75 | auto bpi = double(32 * encoded_values.size()) / decoded_values.size(); 76 | state.counters["bpi"] = benchmark::Counter(bpi, benchmark::Counter::kAvgThreads); 77 | } 78 | BENCHMARK_REGISTER_F(ValuesFixture, decodeUniform) 79 | ->RangeMultiplier(2) 80 | ->Range((1ULL << 15), (1ULL << 25)); 81 | 82 | BENCHMARK_TEMPLATE_DEFINE_F(ValuesFixture, decodeClustered, gpu_ic::ClusteredDataGenerator) 83 | (benchmark::State &state) { 84 | using namespace FastPForLib; 85 | IntegerCODEC &codec = *CODECFactory::getFromName("varintgb"); 86 | 87 | while (state.KeepRunning()) { 88 | size_t recoveredsize = 0; 89 | codec.decodeArray( 90 | encoded_values.data(), encoded_values.size(), decoded_values.data(), recoveredsize); 91 | } 92 | auto bpi = double(32 * encoded_values.size()) / decoded_values.size(); 93 | state.counters["bpi"] = benchmark::Counter(bpi, benchmark::Counter::kAvgThreads); 94 | } 95 | BENCHMARK_REGISTER_F(ValuesFixture, decodeClustered) 96 | ->RangeMultiplier(2) 97 | ->Range((1ULL << 15), (1ULL << 25)); 98 | 99 | BENCHMARK_MAIN(); 100 | -------------------------------------------------------------------------------- /external/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | EXECUTE_PROCESS(COMMAND git submodule update --init 2 | WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/.. 3 | OUTPUT_QUIET 4 | ) 5 | 6 | # Add FastPFor 7 | add_subdirectory(FastPFor EXCLUDE_FROM_ALL) 8 | 9 | add_subdirectory(googletest EXCLUDE_FROM_ALL) 10 | 11 | set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Suppressing benchmark's tests" FORCE) 12 | add_subdirectory(benchmark EXCLUDE_FROM_ALL) 13 | 14 | # Add CLI11 15 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/CLI11 EXCLUDE_FROM_ALL) 16 | 17 | # Add mio 18 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/mio EXCLUDE_FROM_ALL) 19 | 20 | # Add cereal 21 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/boost-cmake EXCLUDE_FROM_ALL) 22 | 23 | if (BUILD_CUDA) 24 | add_library(cub INTERFACE) 25 | target_include_directories(cub INTERFACE 26 | $ 27 | ) 28 | endif () 29 | -------------------------------------------------------------------------------- /include/gpu_ic/cuda_bp.cuh: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2018-present Antonio Mallia 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include "utils/bit_istream.hpp" 25 | #include "utils/bit_ostream.hpp" 26 | #include "utils/cuda_utils.hpp" 27 | #include "utils/utils.hpp" 28 | 29 | namespace cuda_bp { 30 | 31 | template 32 | static size_t encode(uint8_t *out, const uint32_t *in, size_t n) { 33 | bit_ostream bw(out); 34 | 35 | auto blocks = std::ceil((double)n / block_size); 36 | std::vector bits(blocks, 0); 37 | for (size_t i = 0; i < n; ++i) { 38 | auto value = in[i]; 39 | size_t bit = utils::bits(value); 40 | auto b = i / block_size; 41 | bits[b] = std::max(bit, bits[b]); 42 | } 43 | bw.write(0, 32); 44 | uint32_t offset = 0; 45 | for (auto b : bits) { 46 | offset += b * block_size/32; 47 | bw.write(offset, 32); 48 | } 49 | for (size_t i = 0; i < n; ++i) { 50 | auto value = in[i]; 51 | auto b = i / block_size; 52 | bw.write(value, bits[b]); 53 | } 54 | return ceil((double)bw.size() / 8); 55 | } 56 | 57 | template 58 | __global__ void kernel_decode(uint32_t * out, 59 | const uint32_t *in, 60 | size_t n, 61 | const uint32_t *offsets) { 62 | size_t index = blockIdx.x * blockDim.x + threadIdx.x; 63 | if (index < n) { 64 | uint8_t bit_size = (offsets[blockIdx.x + 1] - offsets[blockIdx.x])*32/block_size; 65 | uint32_t offset = offsets[blockIdx.x]; 66 | out[index] = extract(in + offset, threadIdx.x * bit_size, bit_size); 67 | } 68 | } 69 | 70 | template 71 | static void decode(uint32_t *d_out, const uint8_t *d_in, size_t n) { 72 | size_t header_len = 4 * (ceil((double)n / block_size) + 1); 73 | const uint8_t *d_payload = d_in + header_len; 74 | kernel_decode<<>>(d_out, 75 | reinterpret_cast(d_payload), 76 | n, 77 | reinterpret_cast(d_in)); 78 | } 79 | 80 | } // namespace cuda_bp 81 | -------------------------------------------------------------------------------- /include/gpu_ic/cuda_vbyte.cuh: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2018-present Antonio Mallia 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | 22 | #include "cub/cub.cuh" 23 | 24 | #include "utils/bit_ostream.hpp" 25 | #include "utils/cuda_utils.hpp" 26 | 27 | namespace cuda_vbyte { 28 | 29 | template 30 | static size_t encode(uint8_t *out, const uint32_t *in, size_t n) { 31 | 32 | bit_ostream bw_offset(out); 33 | 34 | size_t block_num = ceil((double)n / block_size); 35 | size_t offset_len = 4 * block_num + 4; 36 | size_t size = 0; 37 | 38 | bw_offset.write(0, 32); 39 | size_t i; 40 | for (i = 0; i + block_size < n; i += block_size) { 41 | bit_ostream bw_block(out + offset_len); 42 | for (int j = i; j < i + block_size and j < n; ++j) { 43 | const auto value = in[j]; 44 | if (value < (1U << 8)) { 45 | bw_block.write(0, 2); 46 | } else if (value < (1U << 16)) { 47 | bw_block.write(1, 2); 48 | } else if (value < (1U << 24)) { 49 | bw_block.write(2, 2); 50 | } else { 51 | bw_block.write(3, 2); 52 | } 53 | } 54 | for (int j = i; j < i + block_size and j < n; ++j) { 55 | const auto value = in[j]; 56 | if (value < (1U << 8)) { 57 | bw_block.write(value, 8); 58 | } else if (value < (1U << 16)) { 59 | bw_block.write(value, 16); 60 | } else if (value < (1U << 24)) { 61 | bw_block.write(value, 24); 62 | } else { 63 | bw_block.write(value, 32); 64 | } 65 | } 66 | auto padding = 32 - (bw_block.size() % 32); 67 | bw_block.write(0, padding); 68 | size += ceil((double)bw_block.size() / 8); 69 | bw_offset.write(size, 32); 70 | offset_len += ceil((double)(bw_block.size()) / 8); 71 | } 72 | bit_ostream bw_block(out + offset_len); 73 | auto s = i; 74 | size_t bit = 0; 75 | while(s 95 | __global__ void kernel_decode_vbyte(uint32_t * out, 96 | const uint32_t *in, 97 | size_t n, 98 | const uint32_t *offsets) { 99 | 100 | size_t index = blockIdx.x * blockDim.x + threadIdx.x; 101 | uint32_t offset = offsets[blockIdx.x] / 4; 102 | if ((blockIdx.x +1) * block_size < n) { 103 | __shared__ uint32_t min_offsets[block_size + 1]; 104 | min_offsets[0] = 0; 105 | min_offsets[threadIdx.x + 1] = (extract(in + offset, threadIdx.x * 2, 2) + 1) * 8; 106 | __syncthreads(); 107 | 108 | typedef cub::BlockScan BlockScan; 109 | __shared__ typename BlockScan::TempStorage temp_storage; 110 | BlockScan(temp_storage) 111 | .InclusiveSum(min_offsets[threadIdx.x + 1], min_offsets[threadIdx.x + 1]); 112 | __syncthreads(); 113 | uint32_t bit = min_offsets[threadIdx.x + 1] - min_offsets[threadIdx.x]; 114 | uint32_t header_len = 2 * (block_size/32); 115 | out[index] = extract(in + offset + header_len, min_offsets[threadIdx.x], bit); 116 | } else if(index < n){ 117 | uint8_t bit_size = *(in + offset); 118 | out[index] = extract(in + offset+1, threadIdx.x * bit_size, bit_size); 119 | } 120 | } 121 | template 122 | static void decode(uint32_t *d_out, const uint8_t *d_in, size_t n) { 123 | size_t block_num = ceil((double)n / block_size); 124 | size_t offset_len = 4 * block_num + 4; 125 | const uint8_t *d_payload = d_in + offset_len; 126 | kernel_decode_vbyte<<>>( 127 | d_out, 128 | reinterpret_cast(d_payload), 129 | n, 130 | reinterpret_cast(d_in)); 131 | } 132 | 133 | } // namespace cuda_vbyte 134 | -------------------------------------------------------------------------------- /include/gpu_ic/utils/binary_collection.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "mio/mmap.hpp" 11 | 12 | 13 | #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) 14 | #include 15 | #endif 16 | 17 | namespace gpu_ic { 18 | 19 | template 20 | class base_binary_collection { 21 | public: 22 | using posting_type = uint32_t; 23 | using pointer = typename std::conditional::value, 24 | posting_type const, 25 | posting_type>::type *; 26 | 27 | base_binary_collection(const char *filename) { 28 | std::error_code error; 29 | m_file.map(filename, error); 30 | if ( error ) { 31 | std::cerr << "error mapping file: " << error.message() << ", exiting..." << std::endl; 32 | throw std::runtime_error("Error opening file"); 33 | } 34 | m_data = reinterpret_cast(m_file.data()); 35 | m_data_size = m_file.size() / sizeof(m_data[0]); 36 | 37 | #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) 38 | // Indicates that the application expects to access this address range in a sequential manner 39 | auto ret = posix_madvise((void*)m_data, m_data_size, POSIX_MADV_SEQUENTIAL); 40 | if (ret) std::cerr << "Error calling madvice: " << errno << std::endl; 41 | #endif 42 | } 43 | 44 | class sequence { 45 | public: 46 | sequence(pointer begin, pointer end) : m_begin(begin), m_end(end) {} 47 | sequence() : m_begin(nullptr), m_end(nullptr) {} 48 | 49 | pointer begin() const { return m_begin; } 50 | pointer end() const { return m_end; } 51 | size_t size() const { return m_end - m_begin; } 52 | 53 | posting_type back() const 54 | { 55 | assert(size()); 56 | return *(m_end - 1); 57 | } 58 | 59 | private: 60 | pointer m_begin; 61 | pointer m_end; 62 | }; 63 | 64 | using const_sequence = sequence; 65 | 66 | template 67 | class base_iterator; 68 | 69 | using const_iterator = base_iterator; 70 | using iterator = typename std::conditional::value, 71 | const_iterator, 72 | base_iterator>::type; 73 | 74 | iterator begin() { return iterator(this, 0); } 75 | iterator end() { return iterator(this, m_data_size); } 76 | const_iterator begin() const { return const_iterator(this, 0); } 77 | const_iterator end() const { return const_iterator(this, m_data_size); } 78 | const_iterator cbegin() const { return const_iterator(this, 0); } 79 | const_iterator cend() const { return const_iterator(this, m_data_size); } 80 | 81 | template 82 | class base_iterator : public std::iterator { 83 | public: 84 | base_iterator() : m_collection(nullptr) {} 85 | 86 | S const &operator*() const { return m_cur_seq; } 87 | 88 | S const *operator-> () const { return &m_cur_seq; } 89 | 90 | base_iterator &operator++() { 91 | m_pos = m_next_pos; 92 | read(); 93 | return *this; 94 | } 95 | 96 | bool operator==(base_iterator const &other) const { 97 | assert(m_collection == other.m_collection); 98 | return m_pos == other.m_pos; 99 | } 100 | 101 | bool operator!=(base_iterator const &other) const { return !(*this == other); } 102 | 103 | private: 104 | friend class base_binary_collection; 105 | 106 | base_iterator(base_binary_collection const *coll, size_t pos) 107 | : m_collection(coll), m_pos(pos) { 108 | read(); 109 | } 110 | 111 | void read() 112 | { 113 | assert(m_pos <= m_collection->m_data_size); 114 | if (m_pos == m_collection->m_data_size) return; 115 | 116 | size_t n = 0; 117 | size_t pos = m_pos; 118 | n = m_collection->m_data[pos++]; 119 | // file might be truncated 120 | n = std::min(n, size_t(m_collection->m_data_size - pos)); 121 | auto begin = &m_collection->m_data[pos]; 122 | 123 | m_next_pos = pos + n; 124 | m_cur_seq = S(begin, begin + n); 125 | } 126 | 127 | base_binary_collection const * m_collection; 128 | size_t m_pos, m_next_pos; 129 | S m_cur_seq; 130 | }; 131 | 132 | private: 133 | Source m_file; 134 | pointer m_data; 135 | size_t m_data_size; 136 | }; 137 | 138 | using binary_collection = base_binary_collection<>; 139 | using writable_binary_collection = base_binary_collection; 140 | } 141 | -------------------------------------------------------------------------------- /include/gpu_ic/utils/binary_freq_collection.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "binary_collection.hpp" 8 | 9 | namespace gpu_ic { 10 | 11 | class binary_freq_collection { 12 | public: 13 | 14 | binary_freq_collection(const char* basename) 15 | : m_docs((std::string(basename) + ".docs").c_str()) 16 | , m_freqs((std::string(basename) + ".freqs").c_str()) 17 | { 18 | auto firstseq = *m_docs.begin(); 19 | if (firstseq.size() != 1) { 20 | throw std::invalid_argument("First sequence should only contain number of documents"); 21 | } 22 | m_num_docs = *firstseq.begin(); 23 | } 24 | 25 | class iterator; 26 | 27 | iterator begin() const 28 | { 29 | auto docs_it = m_docs.begin(); 30 | return iterator(++docs_it, m_freqs.begin()); 31 | } 32 | 33 | iterator end() const 34 | { 35 | return iterator(m_docs.end(), m_freqs.end()); 36 | } 37 | 38 | size_t size() const 39 | { 40 | return std::distance(begin(), end()); 41 | } 42 | 43 | uint64_t num_docs() const 44 | { 45 | return m_num_docs; 46 | } 47 | 48 | struct sequence { 49 | binary_collection::const_sequence docs; 50 | binary_collection::const_sequence freqs; 51 | }; 52 | 53 | class iterator : public std::iterator { 55 | public: 56 | iterator() 57 | {} 58 | 59 | value_type const& operator*() const 60 | { 61 | return m_cur_seq; 62 | } 63 | 64 | value_type const* operator->() const 65 | { 66 | return &m_cur_seq; 67 | } 68 | 69 | iterator& operator++() 70 | { 71 | m_cur_seq.docs = *++m_docs_it; 72 | m_cur_seq.freqs = *++m_freqs_it; 73 | return *this; 74 | } 75 | 76 | bool operator==(iterator const& other) const 77 | { 78 | return m_docs_it == other.m_docs_it; 79 | } 80 | 81 | bool operator!=(iterator const& other) const 82 | { 83 | return !(*this == other); 84 | } 85 | 86 | private: 87 | friend class binary_freq_collection; 88 | 89 | iterator(binary_collection::const_iterator docs_it, 90 | binary_collection::const_iterator freqs_it) 91 | : m_docs_it(docs_it), m_freqs_it(freqs_it) { 92 | m_cur_seq.docs = *m_docs_it; 93 | m_cur_seq.freqs = *m_freqs_it; 94 | } 95 | 96 | binary_collection::const_iterator m_docs_it; 97 | binary_collection::const_iterator m_freqs_it; 98 | sequence m_cur_seq; 99 | }; 100 | 101 | private: 102 | binary_collection m_docs; 103 | binary_collection m_freqs; 104 | uint64_t m_num_docs; 105 | }; 106 | } 107 | -------------------------------------------------------------------------------- /include/gpu_ic/utils/bit_istream.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2018-present Antonio Mallia 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | class bit_istream { 20 | public: 21 | bit_istream(uint8_t const *in) 22 | : m_in(reinterpret_cast(in)), m_avail(0), m_buf(0), m_pos(0) {} 23 | 24 | size_t position() const { return m_pos; } 25 | 26 | uint32_t read(uint32_t len) { 27 | if (!len) 28 | return 0; 29 | 30 | if (m_avail < len) { 31 | m_buf |= uint64_t(*m_in++) << m_avail; 32 | m_avail += 32; 33 | } 34 | uint32_t val = m_buf & ((uint64_t(1) << len) - 1); 35 | m_buf >>= len; 36 | m_avail -= len; 37 | m_pos += len; 38 | 39 | return val; 40 | } 41 | 42 | inline uint8_t read_bit() { return read(1); } 43 | 44 | inline uint32_t read_unary() { 45 | uint32_t v = 0; 46 | while (read_bit() == 0) 47 | ++v; 48 | return v; 49 | } 50 | 51 | inline uint32_t read_elias_gamma() { 52 | auto bits = read_unary(); 53 | return read(bits); 54 | } 55 | 56 | inline uint32_t read_elias_delta() { 57 | auto bits = read_elias_gamma(); 58 | return read(bits); 59 | } 60 | 61 | inline uint32_t read_vbyte() { 62 | uint32_t val = 0; 63 | size_t i = 0; 64 | while (read_bit()) { 65 | val |= read(7) << (7 * i++); 66 | } 67 | val |= read(7) << (7 * i); 68 | return val; 69 | } 70 | 71 | private: 72 | uint32_t const *m_in; 73 | uint32_t m_avail; 74 | uint64_t m_buf; 75 | size_t m_pos; 76 | }; -------------------------------------------------------------------------------- /include/gpu_ic/utils/bit_ostream.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2018-present Antonio Mallia 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include "utils.hpp" 20 | 21 | class bit_ostream { 22 | public: 23 | bit_ostream(uint8_t *buf) : m_buf(reinterpret_cast(buf)), m_size(0) {} 24 | 25 | void write(uint32_t bits, uint32_t len) { 26 | if (!len) 27 | return; 28 | uint32_t pos_in_word = m_size % 32; 29 | 30 | m_size += len; 31 | if (pos_in_word == 0) { 32 | *m_buf = bits; 33 | if (len == 32) { 34 | m_buf += 1; 35 | } 36 | } else { 37 | *m_buf |= bits << pos_in_word; 38 | if (len >= 32 - pos_in_word) { 39 | m_buf += 1; 40 | *m_buf = bits >> (32 - pos_in_word); 41 | } 42 | } 43 | } 44 | 45 | size_t size() const { return m_size; } 46 | 47 | inline void write_bit(bool val) { write(val, 1); } 48 | 49 | inline void write_unary(uint32_t val) { 50 | while (val--) { 51 | write_bit(0); 52 | } 53 | write_bit(1); 54 | } 55 | 56 | inline void write_elias_gamma(uint32_t val) { 57 | write_unary(utils::bits(val)); 58 | write(val, utils::bits(val)); 59 | } 60 | 61 | inline void write_elias_delta(uint32_t val) { 62 | write_elias_gamma(utils::bits(val)); 63 | write(val, utils::bits(val)); 64 | } 65 | 66 | inline void write_vbyte(uint32_t val) { 67 | while (val >= 128) { 68 | write(0x80 | (val & 0x7f), 8); 69 | val >>= 7; 70 | } 71 | write(0, 1); 72 | write(val, 7); 73 | } 74 | 75 | private: 76 | uint32_t *m_buf; 77 | size_t m_size; 78 | }; 79 | -------------------------------------------------------------------------------- /include/gpu_ic/utils/bit_vector.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "boost/range.hpp" 6 | 7 | #include "broadword.hpp" 8 | 9 | #include "mappable_vector.hpp" 10 | 11 | namespace gpu_ic { 12 | 13 | namespace detail { 14 | 15 | template 16 | inline IntType1 ceil_div(IntType1 dividend, IntType2 divisor) 17 | { 18 | // XXX(ot): put some static check that IntType1 >= IntType2 19 | IntType1 d = IntType1(divisor); 20 | return IntType1(dividend + d - 1) / d; 21 | } 22 | 23 | inline size_t words_for(uint64_t n) { return ceil_div(n, 64); } 24 | } // namespace detail 25 | 26 | class bit_vector_builder { 27 | public: 28 | using bits_type = std::vector; 29 | 30 | bit_vector_builder(uint64_t size = 0, bool init = 0) : m_size(size) { 31 | m_bits.resize(detail::words_for(size), uint64_t(-init)); 32 | if (size) { 33 | m_cur_word = &m_bits.back(); 34 | // clear padding bits 35 | if (init && size % 64) { 36 | *m_cur_word >>= 64 - (size % 64); 37 | } 38 | } 39 | } 40 | bit_vector_builder(const bit_vector_builder &) = delete; 41 | bit_vector_builder &operator=(const bit_vector_builder &) = delete; 42 | 43 | void reserve(uint64_t size) { m_bits.reserve(detail::words_for(size)); } 44 | 45 | inline void push_back(bool b) { 46 | uint64_t pos_in_word = m_size % 64; 47 | if (pos_in_word == 0) { 48 | m_bits.push_back(0); 49 | m_cur_word = &m_bits.back(); 50 | } 51 | *m_cur_word |= (uint64_t)b << pos_in_word; 52 | ++m_size; 53 | } 54 | 55 | inline void set(uint64_t pos, bool b) { 56 | uint64_t word = pos / 64; 57 | uint64_t pos_in_word = pos % 64; 58 | 59 | m_bits[word] &= ~(uint64_t(1) << pos_in_word); 60 | m_bits[word] |= uint64_t(b) << pos_in_word; 61 | } 62 | 63 | inline void set_bits(uint64_t pos, uint64_t bits, size_t len) { 64 | assert(pos + len <= size()); 65 | // check there are no spurious bits 66 | assert(len == 64 || (bits >> len) == 0); 67 | if (!len) 68 | return; 69 | uint64_t mask = (len == 64) ? uint64_t(-1) : ((uint64_t(1) << len) - 1); 70 | uint64_t word = pos / 64; 71 | uint64_t pos_in_word = pos % 64; 72 | 73 | m_bits[word] &= ~(mask << pos_in_word); 74 | m_bits[word] |= bits << pos_in_word; 75 | 76 | uint64_t stored = 64 - pos_in_word; 77 | if (stored < len) { 78 | m_bits[word + 1] &= ~(mask >> stored); 79 | m_bits[word + 1] |= bits >> stored; 80 | } 81 | } 82 | 83 | inline void append_bits(uint64_t bits, size_t len) { 84 | // check there are no spurious bits 85 | assert(len == 64 || (bits >> len) == 0); 86 | if (!len) 87 | return; 88 | uint64_t pos_in_word = m_size % 64; 89 | m_size += len; 90 | if (pos_in_word == 0) { 91 | m_bits.push_back(bits); 92 | } else { 93 | *m_cur_word |= bits << pos_in_word; 94 | if (len > 64 - pos_in_word) { 95 | m_bits.push_back(bits >> (64 - pos_in_word)); 96 | } 97 | } 98 | m_cur_word = &m_bits.back(); 99 | } 100 | 101 | inline void zero_extend(uint64_t n) { 102 | m_size += n; 103 | uint64_t needed = detail::words_for(m_size) - m_bits.size(); 104 | if (needed) { 105 | m_bits.insert(m_bits.end(), needed, 0); 106 | m_cur_word = &m_bits.back(); 107 | } 108 | } 109 | 110 | inline void one_extend(uint64_t n) { 111 | while (n >= 64) { 112 | append_bits(uint64_t(-1), 64); 113 | n -= 64; 114 | } 115 | if (n) { 116 | append_bits(uint64_t(-1) >> (64 - n), n); 117 | } 118 | } 119 | 120 | void append(bit_vector_builder const &rhs) { 121 | if (!rhs.size()) 122 | return; 123 | 124 | uint64_t pos = m_bits.size(); 125 | uint64_t shift = size() % 64; 126 | m_size = size() + rhs.size(); 127 | m_bits.resize(detail::words_for(m_size)); 128 | 129 | if (shift == 0) { // word-aligned, easy case 130 | std::copy(rhs.m_bits.begin(), rhs.m_bits.end(), m_bits.begin() + ptrdiff_t(pos)); 131 | } else { 132 | uint64_t *cur_word = &m_bits.front() + pos - 1; 133 | for (size_t i = 0; i < rhs.m_bits.size() - 1; ++i) { 134 | uint64_t w = rhs.m_bits[i]; 135 | *cur_word |= w << shift; 136 | *++cur_word = w >> (64 - shift); 137 | } 138 | *cur_word |= rhs.m_bits.back() << shift; 139 | if (cur_word < &m_bits.back()) { 140 | *++cur_word = rhs.m_bits.back() >> (64 - shift); 141 | } 142 | } 143 | m_cur_word = &m_bits.back(); 144 | } 145 | 146 | // reverse in place 147 | void reverse() { 148 | uint64_t shift = 64 - (size() % 64); 149 | 150 | uint64_t remainder = 0; 151 | for (size_t i = 0; i < m_bits.size(); ++i) { 152 | uint64_t cur_word; 153 | if (shift != 64) { // this should be hoisted out 154 | cur_word = remainder | (m_bits[i] << shift); 155 | remainder = m_bits[i] >> (64 - shift); 156 | } else { 157 | cur_word = m_bits[i]; 158 | } 159 | m_bits[i] = broadword::reverse_bits(cur_word); 160 | } 161 | assert(remainder == 0); 162 | std::reverse(m_bits.begin(), m_bits.end()); 163 | } 164 | 165 | bits_type &move_bits() { 166 | assert(detail::words_for(m_size) == m_bits.size()); 167 | return m_bits; 168 | } 169 | 170 | uint64_t size() const { return m_size; } 171 | 172 | void swap(bit_vector_builder &other) { 173 | m_bits.swap(other.m_bits); 174 | std::swap(m_size, other.m_size); 175 | std::swap(m_cur_word, other.m_cur_word); 176 | } 177 | 178 | private: 179 | bits_type m_bits; 180 | uint64_t m_size; 181 | uint64_t *m_cur_word; 182 | }; 183 | 184 | class bit_vector { 185 | public: 186 | bit_vector() = default; 187 | 188 | template 189 | bit_vector(Range const &from) { 190 | std::vector bits; 191 | const uint64_t first_mask = uint64_t(1); 192 | uint64_t mask = first_mask; 193 | uint64_t cur_val = 0; 194 | m_size = 0; 195 | for (typename boost::range_const_iterator::type iter = boost::begin(from); 196 | iter != boost::end(from); 197 | ++iter) { 198 | if (*iter) { 199 | cur_val |= mask; 200 | } 201 | mask <<= 1; 202 | m_size += 1; 203 | if (!mask) { 204 | bits.push_back(cur_val); 205 | mask = first_mask; 206 | cur_val = 0; 207 | } 208 | } 209 | if (mask != first_mask) { 210 | bits.push_back(cur_val); 211 | } 212 | m_bits.steal(bits); 213 | } 214 | 215 | bit_vector(bit_vector_builder *from) { 216 | m_size = from->size(); 217 | m_bits.steal(from->move_bits()); 218 | } 219 | 220 | template 221 | void map(Visitor &visit) { 222 | visit(m_size, "m_size")(m_bits, "m_bits"); 223 | } 224 | 225 | void swap(bit_vector &other) { 226 | std::swap(other.m_size, m_size); 227 | other.m_bits.swap(m_bits); 228 | } 229 | 230 | inline size_t size() const { return m_size; } 231 | 232 | inline bool operator[](uint64_t pos) const { 233 | assert(pos < m_size); 234 | uint64_t block = pos / 64; 235 | assert(block < m_bits.size()); 236 | uint64_t shift = pos % 64; 237 | return (m_bits[block] >> shift) & 1; 238 | } 239 | 240 | inline uint64_t get_bits(uint64_t pos, uint64_t len) const { 241 | assert(pos + len <= size()); 242 | if (!len) { 243 | return 0; 244 | } 245 | uint64_t block = pos / 64; 246 | uint64_t shift = pos % 64; 247 | uint64_t mask = -(len == 64) | ((1ULL << len) - 1); 248 | if (shift + len <= 64) { 249 | return m_bits[block] >> shift & mask; 250 | } else { 251 | return (m_bits[block] >> shift) | (m_bits[block + 1] << (64 - shift) & mask); 252 | } 253 | } 254 | 255 | // same as get_bits(pos, 64) but it can extend further size(), padding with zeros 256 | inline uint64_t get_word(uint64_t pos) const { 257 | assert(pos < size()); 258 | uint64_t block = pos / 64; 259 | uint64_t shift = pos % 64; 260 | uint64_t word = m_bits[block] >> shift; 261 | if (shift && block + 1 < m_bits.size()) { 262 | word |= m_bits[block + 1] << (64 - shift); 263 | } 264 | return word; 265 | } 266 | 267 | // unsafe and fast version of get_word, it retrieves at least 56 bits 268 | inline uint64_t get_word56(uint64_t pos) const { 269 | // XXX check endianness? 270 | const char *ptr = reinterpret_cast(m_bits.data()); 271 | return *(reinterpret_cast(ptr + pos / 8)) >> (pos % 8); 272 | } 273 | 274 | inline uint64_t predecessor0(uint64_t pos) const { 275 | assert(pos < m_size); 276 | uint64_t block = pos / 64; 277 | uint64_t shift = 64 - pos % 64 - 1; 278 | uint64_t word = ~m_bits[block]; 279 | word = (word << shift) >> shift; 280 | 281 | unsigned long ret; 282 | while (!broadword::msb(word, ret)) { 283 | assert(block); 284 | word = ~m_bits[--block]; 285 | }; 286 | return block * 64 + ret; 287 | } 288 | 289 | inline uint64_t successor0(uint64_t pos) const { 290 | assert(pos < m_size); 291 | uint64_t block = pos / 64; 292 | uint64_t shift = pos % 64; 293 | uint64_t word = (~m_bits[block] >> shift) << shift; 294 | 295 | unsigned long ret; 296 | while (!broadword::lsb(word, ret)) { 297 | ++block; 298 | assert(block < m_bits.size()); 299 | word = ~m_bits[block]; 300 | }; 301 | return block * 64 + ret; 302 | } 303 | 304 | inline uint64_t predecessor1(uint64_t pos) const { 305 | assert(pos < m_size); 306 | uint64_t block = pos / 64; 307 | uint64_t shift = 64 - pos % 64 - 1; 308 | uint64_t word = m_bits[block]; 309 | word = (word << shift) >> shift; 310 | 311 | unsigned long ret; 312 | while (!broadword::msb(word, ret)) { 313 | assert(block); 314 | word = m_bits[--block]; 315 | }; 316 | return block * 64 + ret; 317 | } 318 | 319 | inline uint64_t successor1(uint64_t pos) const { 320 | assert(pos < m_size); 321 | uint64_t block = pos / 64; 322 | uint64_t shift = pos % 64; 323 | uint64_t word = (m_bits[block] >> shift) << shift; 324 | 325 | unsigned long ret; 326 | while (!broadword::lsb(word, ret)) { 327 | ++block; 328 | assert(block < m_bits.size()); 329 | word = m_bits[block]; 330 | }; 331 | return block * 64 + ret; 332 | } 333 | 334 | mapper::mappable_vector const &data() const { return m_bits; } 335 | 336 | struct enumerator { 337 | enumerator() : m_bv(0), m_pos(uint64_t(-1)) {} 338 | 339 | enumerator(bit_vector const &bv, size_t pos) : m_bv(&bv), m_pos(pos), m_buf(0), m_avail(0) { 340 | m_bv->data().prefetch(m_pos / 64); 341 | } 342 | 343 | inline bool next() { 344 | if (!m_avail) 345 | fill_buf(); 346 | bool b = m_buf & 1; 347 | m_buf >>= 1; 348 | m_avail -= 1; 349 | m_pos += 1; 350 | return b; 351 | } 352 | 353 | inline uint64_t take(size_t l) { 354 | if (m_avail < l) 355 | fill_buf(); 356 | uint64_t val; 357 | if (l != 64) { 358 | val = m_buf & ((uint64_t(1) << l) - 1); 359 | m_buf >>= l; 360 | } else { 361 | val = m_buf; 362 | } 363 | m_avail -= l; 364 | m_pos += l; 365 | return val; 366 | } 367 | 368 | inline uint64_t skip_zeros() { 369 | uint64_t zs = 0; 370 | // XXX the loop may be optimized by aligning access 371 | while (!m_buf) { 372 | m_pos += m_avail; 373 | zs += m_avail; 374 | m_avail = 0; 375 | fill_buf(); 376 | } 377 | 378 | uint64_t l = broadword::lsb(m_buf); 379 | m_buf >>= l; 380 | m_buf >>= 1; 381 | m_avail -= l + 1; 382 | m_pos += l + 1; 383 | return zs + l; 384 | } 385 | 386 | inline uint64_t position() const { return m_pos; } 387 | 388 | private: 389 | inline void fill_buf() { 390 | m_buf = m_bv->get_word(m_pos); 391 | m_avail = 64; 392 | } 393 | 394 | bit_vector const *m_bv; 395 | size_t m_pos; 396 | uint64_t m_buf; 397 | size_t m_avail; 398 | }; 399 | 400 | struct unary_enumerator { 401 | unary_enumerator() : m_data(0), m_position(0), m_buf(0) {} 402 | 403 | unary_enumerator(bit_vector const &bv, uint64_t pos) { 404 | m_data = bv.data().data(); 405 | m_position = pos; 406 | m_buf = m_data[pos / 64]; 407 | // clear low bits 408 | m_buf &= uint64_t(-1) << (pos % 64); 409 | } 410 | 411 | uint64_t position() const { return m_position; } 412 | 413 | uint64_t next() { 414 | unsigned long pos_in_word; 415 | uint64_t buf = m_buf; 416 | while (!broadword::lsb(buf, pos_in_word)) { 417 | m_position += 64; 418 | buf = m_data[m_position / 64]; 419 | } 420 | 421 | m_buf = buf & (buf - 1); // clear LSB 422 | m_position = (m_position & ~uint64_t(63)) + pos_in_word; 423 | return m_position; 424 | } 425 | 426 | // skip to the k-th one after the current position 427 | void skip(uint64_t k) { 428 | uint64_t skipped = 0; 429 | uint64_t buf = m_buf; 430 | uint64_t w = 0; 431 | while (skipped + (w = broadword::popcount(buf)) <= k) { 432 | skipped += w; 433 | m_position += 64; 434 | buf = m_data[m_position / 64]; 435 | } 436 | assert(buf); 437 | uint64_t pos_in_word = broadword::select_in_word(buf, k - skipped); 438 | m_buf = buf & (uint64_t(-1) << pos_in_word); 439 | m_position = (m_position & ~uint64_t(63)) + pos_in_word; 440 | } 441 | 442 | // return the position of the k-th one after the current position. 443 | uint64_t skip_no_move(uint64_t k) { 444 | uint64_t position = m_position; 445 | uint64_t skipped = 0; 446 | uint64_t buf = m_buf; 447 | uint64_t w = 0; 448 | while (skipped + (w = broadword::popcount(buf)) <= k) { 449 | skipped += w; 450 | position += 64; 451 | buf = m_data[position / 64]; 452 | } 453 | assert(buf); 454 | uint64_t pos_in_word = broadword::select_in_word(buf, k - skipped); 455 | position = (position & ~uint64_t(63)) + pos_in_word; 456 | return position; 457 | } 458 | 459 | // skip to the k-th zero after the current position 460 | void skip0(uint64_t k) { 461 | uint64_t skipped = 0; 462 | uint64_t pos_in_word = m_position % 64; 463 | uint64_t buf = ~m_buf & (uint64_t(-1) << pos_in_word); 464 | uint64_t w = 0; 465 | while (skipped + (w = broadword::popcount(buf)) <= k) { 466 | skipped += w; 467 | m_position += 64; 468 | buf = ~m_data[m_position / 64]; 469 | } 470 | assert(buf); 471 | pos_in_word = broadword::select_in_word(buf, k - skipped); 472 | m_buf = ~buf & (uint64_t(-1) << pos_in_word); 473 | m_position = (m_position & ~uint64_t(63)) + pos_in_word; 474 | } 475 | 476 | private: 477 | uint64_t const *m_data; 478 | uint64_t m_position; 479 | uint64_t m_buf; 480 | }; 481 | 482 | protected: 483 | size_t m_size; 484 | mapper::mappable_vector m_bits; 485 | }; 486 | 487 | } // namespace gpu_ic -------------------------------------------------------------------------------- /include/gpu_ic/utils/broadword.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "intrinsics.hpp" 5 | #include "tables.hpp" 6 | 7 | namespace gpu_ic { namespace broadword { 8 | 9 | static const uint64_t ones_step_4 = 0x1111111111111111ULL; 10 | static const uint64_t ones_step_8 = 0x0101010101010101ULL; 11 | static const uint64_t ones_step_9 = 1ULL << 0 | 1ULL << 9 | 1ULL << 18 | 1ULL << 27 | 1ULL << 36 | 1ULL << 45 | 1ULL << 54; 12 | static const uint64_t msbs_step_8 = 0x80ULL * ones_step_8; 13 | static const uint64_t msbs_step_9 = 0x100ULL * ones_step_9; 14 | static const uint64_t incr_step_8 = 0x80ULL << 56 | 0x40ULL << 48 | 0x20ULL << 40 | 0x10ULL << 32 | 0x8ULL << 24 | 0x4ULL << 16 | 0x2ULL << 8 | 0x1; 15 | static const uint64_t inv_count_step_9 = 1ULL << 54 | 2ULL << 45 | 3ULL << 36 | 4ULL << 27 | 5ULL << 18 | 6ULL << 9 | 7ULL; 16 | 17 | static const uint64_t magic_mask_1 = 0x5555555555555555ULL; 18 | static const uint64_t magic_mask_2 = 0x3333333333333333ULL; 19 | static const uint64_t magic_mask_3 = 0x0F0F0F0F0F0F0F0FULL; 20 | static const uint64_t magic_mask_4 = 0x00FF00FF00FF00FFULL; 21 | static const uint64_t magic_mask_5 = 0x0000FFFF0000FFFFULL; 22 | static const uint64_t magic_mask_6 = 0x00000000FFFFFFFFULL; 23 | 24 | inline uint64_t leq_step_8(uint64_t x, uint64_t y) 25 | { 26 | return ((((y | msbs_step_8) - (x & ~msbs_step_8)) ^ (x ^ y)) & msbs_step_8) >> 7; 27 | } 28 | 29 | inline uint64_t uleq_step_8(uint64_t x, uint64_t y) 30 | { 31 | return (((((y | msbs_step_8) - (x & ~msbs_step_8)) ^ (x ^ y)) ^ (x & ~y)) & msbs_step_8) >> 7; 32 | } 33 | 34 | inline uint64_t zcompare_step_8(uint64_t x) 35 | { 36 | return ((x | ((x | msbs_step_8) - ones_step_8)) & msbs_step_8) >> 7; 37 | } 38 | 39 | inline uint64_t uleq_step_9(uint64_t x, uint64_t y) 40 | { 41 | return (((((y | msbs_step_9) - (x & ~msbs_step_9)) | (x ^ y)) ^ (x & ~y)) & msbs_step_9 ) >> 8; 42 | } 43 | 44 | inline uint64_t byte_counts(uint64_t x) 45 | { 46 | x = x - ((x & 0xa * ones_step_4) >> 1); 47 | x = (x & 3 * ones_step_4) + ((x >> 2) & 3 * ones_step_4); 48 | x = (x + (x >> 4)) & 0x0f * ones_step_8; 49 | return x; 50 | } 51 | 52 | inline uint64_t bytes_sum(uint64_t x) 53 | { 54 | return x * ones_step_8 >> 56; 55 | } 56 | 57 | inline uint64_t popcount(uint64_t x) 58 | { 59 | #if USE_POPCNT 60 | return intrinsics::popcount(x); 61 | #else 62 | return bytes_sum(byte_counts(x)); 63 | #endif 64 | } 65 | 66 | inline uint64_t reverse_bytes(uint64_t x) 67 | { 68 | return intrinsics::byteswap64(x); 69 | } 70 | 71 | inline uint64_t reverse_bits(uint64_t x) 72 | { 73 | x = ((x >> 1) & magic_mask_1) | ((x & magic_mask_1) << 1); 74 | x = ((x >> 2) & magic_mask_2) | ((x & magic_mask_2) << 2); 75 | x = ((x >> 4) & magic_mask_3) | ((x & magic_mask_3) << 4); 76 | return reverse_bytes(x); 77 | } 78 | 79 | inline uint64_t select_in_word(const uint64_t x, const uint64_t k) 80 | { 81 | assert(k < popcount(x)); 82 | 83 | uint64_t byte_sums = byte_counts(x) * ones_step_8; 84 | 85 | const uint64_t k_step_8 = k * ones_step_8; 86 | const uint64_t geq_k_step_8 = (((k_step_8 | msbs_step_8) - byte_sums) & msbs_step_8); 87 | #if USE_POPCNT 88 | const uint64_t place = intrinsics::popcount(geq_k_step_8) * 8; 89 | #else 90 | const uint64_t place = ((geq_k_step_8 >> 7) * ones_step_8 >> 53) & ~uint64_t(0x7); 91 | #endif 92 | const uint64_t byte_rank = k - (((byte_sums << 8 ) >> place) & uint64_t(0xFF)); 93 | return place + tables::select_in_byte[((x >> place) & 0xFF ) | (byte_rank << 8)]; 94 | } 95 | 96 | inline uint64_t same_msb(uint64_t x, uint64_t y) 97 | { 98 | return (x ^ y) <= (x & y); 99 | } 100 | 101 | namespace detail { 102 | // Adapted from LSB of Chess Programming Wiki 103 | static const uint8_t debruijn64_mapping[64] = { 104 | 63, 0, 58, 1, 59, 47, 53, 2, 105 | 60, 39, 48, 27, 54, 33, 42, 3, 106 | 61, 51, 37, 40, 49, 18, 28, 20, 107 | 55, 30, 34, 11, 43, 14, 22, 4, 108 | 62, 57, 46, 52, 38, 26, 32, 41, 109 | 50, 36, 17, 19, 29, 10, 13, 21, 110 | 56, 45, 25, 31, 35, 16, 9, 12, 111 | 44, 24, 15, 8, 23, 7, 6, 5 112 | }; 113 | static const uint64_t debruijn64 = 0x07EDD5E59A4E28C2ULL; 114 | } 115 | 116 | // return the position of the single bit set in the word x 117 | inline uint8_t bit_position(uint64_t x) 118 | { 119 | assert(popcount(x) == 1); 120 | return detail::debruijn64_mapping 121 | [(x * detail::debruijn64) >> 58]; 122 | } 123 | 124 | inline uint8_t msb(uint64_t x, unsigned long& ret) 125 | { 126 | return intrinsics::bsr64(&ret, x); 127 | } 128 | 129 | inline uint8_t msb(uint64_t x) 130 | { 131 | assert(x); 132 | unsigned long ret = -1U; 133 | msb(x, ret); 134 | return (uint8_t)ret; 135 | } 136 | 137 | inline uint8_t lsb(uint64_t x, unsigned long& ret) 138 | { 139 | return intrinsics::bsf64(&ret, x); 140 | } 141 | 142 | inline uint8_t lsb(uint64_t x) 143 | { 144 | assert(x); 145 | unsigned long ret = -1U; 146 | lsb(x, ret); 147 | return (uint8_t)ret; 148 | } 149 | 150 | }} -------------------------------------------------------------------------------- /include/gpu_ic/utils/cuda_utils.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2018-present Antonio Mallia 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | 22 | #define CUDA_CHECK_ERROR(err) __cudaSafeCall(err, __FILE__, __LINE__) 23 | 24 | inline void __cudaSafeCall(cudaError err, const char *file, const int line) { 25 | if (cudaSuccess != err) { 26 | std::ostringstream stringStream; 27 | stringStream << "cudaSafeCall() failed at " << file << ":" << line << " : " 28 | << cudaGetErrorString(err); 29 | throw(std::runtime_error(stringStream.str())); 30 | } 31 | } 32 | 33 | __global__ 34 | void warmUpGPU() 35 | { 36 | // do nothing 37 | } 38 | 39 | __device__ uint32_t extract(const uint32_t *in, size_t offset, size_t bit) { 40 | int firstBit = offset; 41 | int lastBit = firstBit + bit - 1; 42 | uint32_t packed = in[firstBit / 32]; 43 | int firstBitInPacked = firstBit % 32; 44 | uint32_t packedOverflow = in[lastBit / 32]; 45 | bool isOverflowing = lastBit % 32 < firstBitInPacked; 46 | int lastBitInPackedOverflow = !isOverflowing ? -1 : lastBit % 32; 47 | uint32_t outFromPacked = 48 | ((packed >> firstBitInPacked) & (0xFFFFFFFF >> (32 - (bit - lastBitInPackedOverflow - 1)))); 49 | uint32_t outFromOverflow = (packedOverflow & (0xFFFFFFFF >> (32 - lastBitInPackedOverflow - 1))) 50 | << (bit - lastBitInPackedOverflow - 1); 51 | return outFromPacked | outFromOverflow; 52 | } 53 | -------------------------------------------------------------------------------- /include/gpu_ic/utils/index.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "mappable_vector.hpp" 4 | #include "bit_vector.hpp" 5 | 6 | namespace gpu_ic { 7 | 8 | // template 9 | class index { 10 | public: 11 | index() 12 | : m_size(0) 13 | {} 14 | 15 | class builder { 16 | public: 17 | builder(uint64_t num_docs) 18 | { 19 | m_num_docs = num_docs; 20 | m_endpoints.push_back(0); 21 | } 22 | 23 | template 24 | void add_posting_list(uint64_t n, DocsIterator docs_begin, Encoder encoder_function, bool compress_freqs) 25 | { 26 | if (!n) throw std::invalid_argument("List must be nonempty"); 27 | tight_variable_byte::encode_single(n, m_lists); 28 | DocsIterator docs_it(docs_begin); 29 | std::vector docs_buf(n); 30 | 31 | uint32_t last_doc(*docs_it++);; 32 | for (size_t i = 1; i < n; ++i) { 33 | uint32_t doc(*docs_it++); 34 | if(not compress_freqs) { 35 | docs_buf[i] = doc - last_doc - 1; 36 | } else { 37 | docs_buf[i] = doc - 1; 38 | } 39 | last_doc = doc; 40 | } 41 | 42 | std::vector encoded_values(n*4+1024); 43 | size_t compressedsize = encoder_function(encoded_values.data(), docs_buf.data(), docs_buf.size()); 44 | encoded_values.resize(compressedsize); 45 | encoded_values.shrink_to_fit(); 46 | m_lists.insert(m_lists.end(), encoded_values.data(), encoded_values.data() + encoded_values.size()); 47 | m_endpoints.push_back(m_lists.size()); 48 | } 49 | 50 | 51 | size_t build(index& sq) 52 | { 53 | sq.m_size = m_endpoints.size() - 1; 54 | sq.m_num_docs = m_num_docs; 55 | sq.m_lists.steal(m_lists); 56 | sq.m_endpoints.steal(m_endpoints); 57 | return sq.m_lists.size(); 58 | } 59 | 60 | private: 61 | size_t m_num_docs; 62 | std::vector m_endpoints; 63 | std::vector m_lists; 64 | }; 65 | 66 | size_t size() const 67 | { 68 | return m_size; 69 | } 70 | 71 | uint64_t num_docs() const 72 | { 73 | return m_num_docs; 74 | } 75 | 76 | 77 | size_t get_data(std::vector &data, size_t i) const 78 | { 79 | assert(i < size()); 80 | uint32_t n; 81 | auto data_begin = tight_variable_byte::decode(m_lists.data() + m_endpoints[i], &n, 1); 82 | data.insert(data.end(), data_begin, m_lists.data() + m_endpoints[i+1] ); 83 | return n; 84 | } 85 | 86 | void warmup(size_t i) const 87 | { 88 | assert(i < size()); 89 | // compact_elias_fano::enumerator endpoints(m_endpoints, 0, 90 | // m_lists.size(), m_size, 91 | // m_params); 92 | 93 | auto begin = m_endpoints[i]; 94 | auto end = m_lists.size(); 95 | if (i + 1 != size()) { 96 | end = m_endpoints[i + 1]; 97 | } 98 | 99 | volatile uint32_t tmp; 100 | for (size_t i = begin; i != end; ++i) { 101 | tmp = m_lists[i]; 102 | } 103 | (void)tmp; 104 | } 105 | 106 | void swap(index& other) 107 | { 108 | std::swap(m_size, other.m_size); 109 | m_endpoints.swap(other.m_endpoints); 110 | m_lists.swap(other.m_lists); 111 | } 112 | 113 | template 114 | void map(Visitor& visit) 115 | { 116 | visit 117 | (m_size, "m_size") 118 | (m_num_docs, "m_num_docs") 119 | (m_endpoints, "m_endpoints") 120 | (m_lists, "m_lists"); 121 | } 122 | 123 | private: 124 | size_t m_size; 125 | size_t m_num_docs; 126 | mapper::mappable_vector m_endpoints; 127 | mapper::mappable_vector m_lists; 128 | }; 129 | } -------------------------------------------------------------------------------- /include/gpu_ic/utils/index.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "mappable_vector.hpp" 4 | #include "bit_vector.hpp" 5 | 6 | #include "posting_list.hpp" 7 | 8 | 9 | 10 | 11 | namespace gpu_ic { 12 | 13 | // template 14 | template 15 | class index { 16 | public: 17 | index() 18 | : m_size(0) 19 | {} 20 | 21 | class builder { 22 | public: 23 | builder(uint64_t num_docs) 24 | { 25 | m_num_docs = num_docs; 26 | m_endpoints.push_back(0); 27 | } 28 | 29 | template 30 | void add_posting_list(uint64_t n, DocsIterator docs_begin, Codec codec, bool compress_freqs) 31 | { 32 | if (!n) throw std::invalid_argument("List must be nonempty"); 33 | posting_list::write(m_lists, n, docs_begin, codec, compress_freqs); 34 | m_endpoints.push_back(m_lists.size()); 35 | } 36 | 37 | 38 | size_t build(index& sq) 39 | { 40 | sq.m_size = m_endpoints.size() - 1; 41 | sq.m_num_docs = m_num_docs; 42 | sq.m_lists.steal(m_lists); 43 | sq.m_endpoints.steal(m_endpoints); 44 | return sq.m_lists.size(); 45 | } 46 | 47 | private: 48 | size_t m_num_docs; 49 | std::vector m_endpoints; 50 | std::vector m_lists; 51 | }; 52 | 53 | size_t size() const 54 | { 55 | return m_size; 56 | } 57 | 58 | uint64_t num_docs() const 59 | { 60 | return m_num_docs; 61 | } 62 | 63 | typedef typename posting_list::document_enumerator document_enumerator; 64 | 65 | document_enumerator operator[](size_t i) const 66 | { 67 | assert(i < size()); 68 | auto endpoint = m_endpoints[i]; 69 | auto len = m_endpoints[i+1] - endpoint; 70 | return document_enumerator(m_lists.data() + endpoint, len, m_codec); 71 | } 72 | 73 | size_t get_data(std::vector &data, size_t i) const 74 | { 75 | assert(i < size()); 76 | uint32_t n; 77 | auto data_begin = tight_variable_byte::decode(m_lists.data() + m_endpoints[i], &n, 1); 78 | data.insert(data.end(), data_begin, m_lists.data() + m_endpoints[i+1] ); 79 | return n; 80 | } 81 | 82 | void warmup(size_t i) const 83 | { 84 | assert(i < size()); 85 | // compact_elias_fano::enumerator endpoints(m_endpoints, 0, 86 | // m_lists.size(), m_size, 87 | // m_params); 88 | 89 | auto begin = m_endpoints[i]; 90 | auto end = m_lists.size(); 91 | if (i + 1 != size()) { 92 | end = m_endpoints[i + 1]; 93 | } 94 | 95 | volatile uint32_t tmp; 96 | for (size_t i = begin; i != end; ++i) { 97 | tmp = m_lists[i]; 98 | } 99 | (void)tmp; 100 | } 101 | 102 | void swap(index& other) 103 | { 104 | std::swap(m_size, other.m_size); 105 | m_endpoints.swap(other.m_endpoints); 106 | m_lists.swap(other.m_lists); 107 | } 108 | 109 | template 110 | void map(Visitor& visit) 111 | { 112 | visit 113 | (m_size, "m_size") 114 | (m_num_docs, "m_num_docs") 115 | (m_endpoints, "m_endpoints") 116 | (m_lists, "m_lists"); 117 | } 118 | 119 | private: 120 | size_t m_size; 121 | size_t m_num_docs; 122 | mapper::mappable_vector m_endpoints; 123 | mapper::mappable_vector m_lists; 124 | Codec m_codec; 125 | }; 126 | } -------------------------------------------------------------------------------- /include/gpu_ic/utils/intrinsics.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #if defined(__SSE4_2__) 6 | #define USE_POPCNT 1 7 | #else 8 | #define USE_POPCNT 0 9 | #endif 10 | 11 | #if defined(__GNUC__) || defined(__clang__) 12 | #define __INTRIN_INLINE inline __attribute__((__always_inline__)) 13 | #elif defined(_MSC_VER) 14 | #define __INTRIN_INLINE inline __forceinline 15 | #else 16 | #define __INTRIN_INLINE inline 17 | #endif 18 | 19 | namespace gpu_ic { 20 | namespace intrinsics { 21 | 22 | __INTRIN_INLINE uint64_t byteswap64(uint64_t value) { 23 | #if defined(__GNUC__) || defined(__clang__) 24 | return __builtin_bswap64(value); 25 | #elif defined(_MSC_VER) 26 | return _byteswap_uint64(value); 27 | #else 28 | #error Unsupported platform 29 | #endif 30 | } 31 | 32 | __INTRIN_INLINE bool bsf64(unsigned long *const index, const uint64_t mask) { 33 | #if defined(__GNUC__) || defined(__clang__) 34 | if (mask) { 35 | *index = (unsigned long)__builtin_ctzll(mask); 36 | return true; 37 | } else { 38 | return false; 39 | } 40 | #elif defined(_MSC_VER) 41 | return _BitScanForward64(index, mask) != 0; 42 | #else 43 | #error Unsupported platform 44 | #endif 45 | } 46 | 47 | __INTRIN_INLINE bool bsr64(unsigned long *const index, const uint64_t mask) { 48 | #if defined(__GNUC__) || defined(__clang__) 49 | if (mask) { 50 | *index = (unsigned long)(63 - __builtin_clzll(mask)); 51 | return true; 52 | } else { 53 | return false; 54 | } 55 | #elif defined(_MSC_VER) 56 | return _BitScanReverse64(index, mask) != 0; 57 | #else 58 | #error Unsupported platform 59 | #endif 60 | } 61 | 62 | template 63 | __INTRIN_INLINE void prefetch(T const *ptr) { 64 | #if defined(__SSE__) 65 | _mm_prefetch((const char *)ptr, _MM_HINT_T0); 66 | #endif 67 | } 68 | 69 | #if USE_POPCNT 70 | 71 | __INTRIN_INLINE uint64_t popcount(uint64_t x) { return uint64_t(_mm_popcnt_u64(x)); } 72 | 73 | #endif /* USE_POPCNT */ 74 | 75 | } // namespace intrinsics 76 | } // namespace succinct -------------------------------------------------------------------------------- /include/gpu_ic/utils/mappable_vector.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "boost/utility.hpp" 7 | #include "boost/range.hpp" 8 | #include "boost/function.hpp" 9 | #include "boost/lambda/bind.hpp" 10 | #include "boost/lambda/construct.hpp" 11 | 12 | #include "intrinsics.hpp" 13 | 14 | namespace gpu_ic { namespace mapper { 15 | 16 | namespace detail { 17 | class freeze_visitor; 18 | class map_visitor; 19 | class sizeof_visitor; 20 | } 21 | 22 | typedef boost::function deleter_t; 23 | 24 | template // T must be a POD 25 | class mappable_vector { 26 | public: 27 | typedef T value_type; 28 | typedef const T* iterator; 29 | typedef const T* const_iterator; 30 | 31 | mappable_vector() 32 | : m_data(0) 33 | , m_size(0) 34 | , m_deleter() 35 | {} 36 | mappable_vector(const mappable_vector &) = delete; 37 | mappable_vector &operator=(const mappable_vector &) = delete; 38 | 39 | template 40 | mappable_vector(Range const& from) 41 | : m_data(0) 42 | , m_size(0) 43 | { 44 | size_t size = boost::size(from); 45 | T* data = new T[size]; 46 | m_deleter = boost::lambda::bind(boost::lambda::delete_array(), data); 47 | 48 | std::copy(boost::begin(from), 49 | boost::end(from), 50 | data); 51 | m_data = data; 52 | m_size = size; 53 | } 54 | 55 | ~mappable_vector() { 56 | if (m_deleter) { 57 | m_deleter(); 58 | } 59 | } 60 | 61 | void swap(mappable_vector& other) { 62 | using std::swap; 63 | swap(m_data, other.m_data); 64 | swap(m_size, other.m_size); 65 | swap(m_deleter, other.m_deleter); 66 | } 67 | 68 | void clear() { 69 | mappable_vector().swap(*this); 70 | } 71 | 72 | void steal(std::vector& vec) { 73 | clear(); 74 | m_size = vec.size(); 75 | if (m_size) { 76 | std::vector* new_vec = new std::vector; 77 | new_vec->swap(vec); 78 | m_deleter = boost::lambda::bind(boost::lambda::delete_ptr(), new_vec); 79 | m_data = &(*new_vec)[0]; 80 | } 81 | } 82 | 83 | template 84 | void assign(Range const& from) { 85 | clear(); 86 | mappable_vector(from).swap(*this); 87 | } 88 | 89 | uint64_t size() const { 90 | return m_size; 91 | } 92 | 93 | inline const_iterator begin() const { 94 | return m_data; 95 | } 96 | 97 | inline const_iterator end() const { 98 | return m_data + m_size; 99 | } 100 | 101 | inline T const& operator[](uint64_t i) const { 102 | assert(i < m_size); 103 | return m_data[i]; 104 | } 105 | 106 | inline T const* data() const { 107 | return m_data; 108 | } 109 | 110 | inline void prefetch(size_t i) const { 111 | intrinsics::prefetch(m_data + i); 112 | } 113 | 114 | friend class detail::freeze_visitor; 115 | friend class detail::map_visitor; 116 | friend class detail::sizeof_visitor; 117 | 118 | protected: 119 | const T* m_data; 120 | uint64_t m_size; 121 | deleter_t m_deleter; 122 | }; 123 | 124 | }} -------------------------------------------------------------------------------- /include/gpu_ic/utils/mapper.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "mio/mmap.hpp" 7 | 8 | #include "mappable_vector.hpp" 9 | 10 | namespace gpu_ic { 11 | namespace mapper { 12 | 13 | struct map_flags { 14 | enum { warmup = 1 }; 15 | }; 16 | 17 | struct size_node; 18 | typedef std::shared_ptr size_node_ptr; 19 | 20 | struct size_node { 21 | size_node() : size(0) {} 22 | 23 | std::string name; 24 | size_t size; 25 | std::vector children; 26 | 27 | void dump(std::ostream &os = std::cerr, size_t depth = 0) { 28 | os << std::string(depth * 4, ' ') << name << ": " << size << '\n'; 29 | for (size_t i = 0; i < children.size(); ++i) { 30 | children[i]->dump(os, depth + 1); 31 | } 32 | } 33 | }; 34 | 35 | namespace detail { 36 | class freeze_visitor { 37 | public: 38 | freeze_visitor(std::ofstream &fout, uint64_t flags) 39 | : m_fout(fout), m_flags(flags), m_written(0) { 40 | // Save freezing flags 41 | m_fout.write(reinterpret_cast(&m_flags), sizeof(m_flags)); 42 | m_written += sizeof(m_flags); 43 | } 44 | 45 | freeze_visitor(const freeze_visitor &) = delete; 46 | freeze_visitor &operator=(const freeze_visitor &) = delete; 47 | 48 | template 49 | typename std::enable_if::value, freeze_visitor &>::type operator()( 50 | T &val, const char * /* friendly_name */) { 51 | val.map(*this); 52 | return *this; 53 | } 54 | 55 | template 56 | typename std::enable_if::value, freeze_visitor &>::type operator()( 57 | T &val, const char * /* friendly_name */) { 58 | m_fout.write(reinterpret_cast(&val), sizeof(T)); 59 | m_written += sizeof(T); 60 | return *this; 61 | } 62 | 63 | template 64 | freeze_visitor &operator()(mappable_vector &vec, const char * /* friendly_name */) { 65 | (*this)(vec.m_size, "size"); 66 | 67 | size_t n_bytes = static_cast(vec.m_size * sizeof(T)); 68 | m_fout.write(reinterpret_cast(vec.m_data), long(n_bytes)); 69 | m_written += n_bytes; 70 | 71 | return *this; 72 | } 73 | 74 | size_t written() const { return m_written; } 75 | 76 | protected: 77 | std::ofstream &m_fout; 78 | const uint64_t m_flags; 79 | uint64_t m_written; 80 | }; 81 | 82 | class map_visitor { 83 | public: 84 | map_visitor(const char *base_address, uint64_t flags) 85 | : m_base(base_address), m_cur(m_base), m_flags(flags) { 86 | m_freeze_flags = *reinterpret_cast(m_cur); 87 | m_cur += sizeof(m_freeze_flags); 88 | } 89 | 90 | map_visitor(const map_visitor &) = delete; 91 | map_visitor &operator=(const map_visitor &) = delete; 92 | 93 | template 94 | typename std::enable_if::value, map_visitor &>::type operator()( 95 | T &val, const char * /* friendly_name */) { 96 | val.map(*this); 97 | return *this; 98 | } 99 | 100 | template 101 | typename std::enable_if::value, map_visitor &>::type operator()( 102 | T &val, const char * /* friendly_name */) { 103 | val = *reinterpret_cast(m_cur); 104 | m_cur += sizeof(T); 105 | return *this; 106 | } 107 | 108 | template 109 | map_visitor &operator()(mappable_vector &vec, const char * /* friendly_name */) { 110 | vec.clear(); 111 | (*this)(vec.m_size, "size"); 112 | 113 | vec.m_data = reinterpret_cast(m_cur); 114 | size_t bytes = vec.m_size * sizeof(T); 115 | 116 | if (m_flags & map_flags::warmup) { 117 | T foo; 118 | volatile T *bar = &foo; 119 | for (size_t i = 0; i < vec.m_size; ++i) { 120 | *bar = vec.m_data[i]; 121 | } 122 | } 123 | 124 | m_cur += bytes; 125 | return *this; 126 | } 127 | 128 | size_t bytes_read() const { return size_t(m_cur - m_base); } 129 | 130 | protected: 131 | const char *m_base; 132 | const char *m_cur; 133 | const uint64_t m_flags; 134 | uint64_t m_freeze_flags; 135 | }; 136 | 137 | class sizeof_visitor { 138 | public: 139 | sizeof_visitor(bool with_tree = false) : m_size(0) { 140 | if (with_tree) { 141 | m_cur_size_node = std::make_shared(); 142 | } 143 | } 144 | 145 | sizeof_visitor(const sizeof_visitor &) = delete; 146 | sizeof_visitor &operator=(const sizeof_visitor &) = delete; 147 | 148 | template 149 | typename std::enable_if::value, sizeof_visitor &>::type operator()( 150 | T &val, const char *friendly_name) { 151 | size_t checkpoint = m_size; 152 | size_node_ptr parent_node; 153 | if (m_cur_size_node) { 154 | parent_node = m_cur_size_node; 155 | m_cur_size_node = make_node(friendly_name); 156 | } 157 | 158 | val.map(*this); 159 | 160 | if (m_cur_size_node) { 161 | m_cur_size_node->size = m_size - checkpoint; 162 | m_cur_size_node = parent_node; 163 | } 164 | return *this; 165 | } 166 | 167 | template 168 | typename std::enable_if::value, sizeof_visitor &>::type operator()( 169 | T & /* val */, const char * /* friendly_name */) { 170 | // don't track PODs in the size tree (they are constant sized) 171 | m_size += sizeof(T); 172 | return *this; 173 | } 174 | 175 | template 176 | sizeof_visitor &operator()(mappable_vector &vec, const char *friendly_name) { 177 | size_t checkpoint = m_size; 178 | (*this)(vec.m_size, "size"); 179 | m_size += static_cast(vec.m_size * sizeof(T)); 180 | 181 | if (m_cur_size_node) { 182 | make_node(friendly_name)->size = m_size - checkpoint; 183 | } 184 | 185 | return *this; 186 | } 187 | 188 | size_t size() const { return m_size; } 189 | 190 | size_node_ptr size_tree() const { 191 | assert(m_cur_size_node); 192 | return m_cur_size_node; 193 | } 194 | 195 | protected: 196 | size_node_ptr make_node(const char *name) { 197 | size_node_ptr node = std::make_shared(); 198 | m_cur_size_node->children.push_back(node); 199 | node->name = name; 200 | return node; 201 | } 202 | 203 | size_t m_size; 204 | size_node_ptr m_cur_size_node; 205 | }; 206 | 207 | } // namespace detail 208 | 209 | template 210 | size_t freeze(T &val, 211 | std::ofstream &fout, 212 | uint64_t flags = 0, 213 | const char *friendly_name = "") { 214 | detail::freeze_visitor freezer(fout, flags); 215 | freezer(val, friendly_name); 216 | return freezer.written(); 217 | } 218 | 219 | template 220 | size_t freeze(T &val, 221 | const char *filename, 222 | uint64_t flags = 0, 223 | const char *friendly_name = "") { 224 | std::ofstream fout(filename, std::ios::binary); 225 | return freeze(val, fout, flags, friendly_name); 226 | } 227 | 228 | template 229 | size_t map(T &val, 230 | const char *base_address, 231 | uint64_t flags = 0, 232 | const char *friendly_name = "") { 233 | detail::map_visitor mapper(base_address, flags); 234 | mapper(val, friendly_name); 235 | return mapper.bytes_read(); 236 | } 237 | 238 | template 239 | size_t map(T &val, 240 | const mio::mmap_source &m, 241 | uint64_t flags = 0, 242 | const char *friendly_name = "") { 243 | return map(val, m.data(), flags, friendly_name); 244 | } 245 | 246 | template 247 | size_t size_of(T &val) { 248 | detail::sizeof_visitor sizer; 249 | sizer(val, ""); 250 | return sizer.size(); 251 | } 252 | 253 | template 254 | size_node_ptr size_tree_of(T &val, const char *friendly_name = "") { 255 | detail::sizeof_visitor sizer(true); 256 | sizer(val, friendly_name); 257 | assert(sizer.size_tree()->children.size()); 258 | return sizer.size_tree()->children[0]; 259 | } 260 | 261 | } // namespace mapper 262 | } // namespace gpu_ic -------------------------------------------------------------------------------- /include/gpu_ic/utils/posting_list.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "tight_variable_byte.hpp" 4 | 5 | namespace gpu_ic { 6 | 7 | struct posting_list { 8 | 9 | template 10 | static void write(std::vector& out, uint32_t n, DocsIterator docs_begin, Codec codec, bool compress_freqs) { 11 | tight_variable_byte::encode_single(n, out); 12 | 13 | DocsIterator docs_it(docs_begin); 14 | std::vector docs_buf(n); 15 | 16 | uint32_t last_doc(*docs_it++);; 17 | for (size_t i = 1; i < n; ++i) { 18 | uint32_t doc(*docs_it++); 19 | if(not compress_freqs) { 20 | docs_buf[i] = doc - last_doc - 1; 21 | } 22 | else { 23 | docs_buf[i] = doc - 1; 24 | } 25 | last_doc = doc; 26 | } 27 | 28 | size_t compressedsize = 0; 29 | std::vector encoded_values(n*4+1024); 30 | codec.encodeArray(docs_buf.data(), n, reinterpret_cast(encoded_values.data()), compressedsize); 31 | out.insert(out.end(), encoded_values.data(), encoded_values.data() + compressedsize*4); 32 | } 33 | 34 | class document_enumerator { 35 | public: 36 | 37 | template 38 | document_enumerator(uint8_t const* data, uint64_t len, Codec codec) 39 | : m_n(0) 40 | , m_base(tight_variable_byte::decode(data, &m_n, 1)) 41 | , m_len(len) 42 | { 43 | m_docs_buf.resize(m_n); 44 | decode_docs_block(codec); 45 | 46 | } 47 | 48 | void next() 49 | { 50 | ++m_pos_in_block; 51 | m_cur_docid = m_docs_buf[m_pos_in_block]; 52 | } 53 | 54 | 55 | uint64_t docid() const 56 | { 57 | return m_cur_docid; 58 | } 59 | 60 | 61 | uint64_t position() const 62 | { 63 | return m_pos_in_block; 64 | } 65 | 66 | uint64_t size() const 67 | { 68 | return m_n; 69 | } 70 | 71 | private: 72 | 73 | template 74 | void decode_docs_block(Codec codec) 75 | { 76 | 77 | size_t n =m_n; 78 | codec.decodeArray(reinterpret_cast(m_base), m_len/4, reinterpret_cast(m_docs_buf.data()), n); 79 | 80 | m_pos_in_block = 0; 81 | m_cur_docid = m_docs_buf[0]; 82 | 83 | } 84 | 85 | uint32_t m_n; 86 | uint8_t const* m_base; 87 | uint64_t m_len; 88 | 89 | uint32_t m_pos_in_block; 90 | uint32_t m_cur_docid; 91 | 92 | 93 | std::vector m_docs_buf; 94 | 95 | }; 96 | 97 | }; 98 | } -------------------------------------------------------------------------------- /include/gpu_ic/utils/progress.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace gpu_ic { 9 | 10 | class progress { 11 | 12 | public: 13 | progress(const std::string &name, size_t goal) : m_name(name) { 14 | if (goal == 0) { 15 | throw std::runtime_error("goal must be positive"); 16 | } 17 | m_goal = goal; 18 | } 19 | ~progress() { 20 | m_status.notify_one(); 21 | std::unique_lock lock(m_mut); 22 | print_status(); 23 | std::cerr << std::endl; 24 | } 25 | 26 | void update(size_t inc) { 27 | std::unique_lock lock(m_mut); 28 | m_count += inc; 29 | print_status(); 30 | } 31 | 32 | private: 33 | std::string m_name; 34 | size_t m_count = 0; 35 | size_t m_goal = 0; 36 | 37 | std::chrono::time_point m_start = std::chrono::steady_clock::now(); 38 | 39 | std::mutex m_mut; 40 | std::condition_variable m_status; 41 | 42 | void print_status() { 43 | size_t progress = (100 * m_count) / m_goal; 44 | std::chrono::seconds elapsed = std::chrono::duration_cast( 45 | std::chrono::steady_clock::now() - m_start); 46 | std::cerr << '\r' << m_name << ": " << progress << "% ["; 47 | format_interval(std::cerr, elapsed); 48 | std::cerr << "]"; 49 | } 50 | 51 | std::ostream& format_interval(std::ostream& out, std::chrono::seconds time) { 52 | using std::chrono::hours; 53 | using std::chrono::minutes; 54 | using std::chrono::seconds; 55 | hours h = std::chrono::duration_cast(time); 56 | minutes m = std::chrono::duration_cast(time - h); 57 | seconds s = std::chrono::duration_cast(time - h - m); 58 | if (h.count() > 0) { out << h.count() << "h "; } 59 | if (m.count() > 0) { out << m.count() << "m "; } 60 | out << s.count() << "s"; 61 | return out; 62 | } 63 | }; 64 | 65 | } 66 | -------------------------------------------------------------------------------- /include/gpu_ic/utils/tables.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace gpu_ic { namespace tables { 6 | 7 | const uint8_t select_in_byte[2048] = { 8 | 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 9 | 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 10 | 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 11 | 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 12 | 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 13 | 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 14 | 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 8, 8, 8, 1, 8, 2, 2, 1, 8, 3, 3, 1, 3, 2, 2, 1, 8, 15 | 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 8, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 16 | 4, 3, 3, 1, 3, 2, 2, 1, 8, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1, 6, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 17 | 1, 6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 8, 7, 7, 1, 7, 2, 18 | 2, 1, 7, 3, 3, 1, 3, 2, 2, 1, 7, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 7, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 19 | 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 7, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1, 6, 4, 4, 1, 20 | 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 21 | 1, 3, 2, 2, 1, 8, 8, 8, 8, 8, 8, 8, 2, 8, 8, 8, 3, 8, 3, 3, 2, 8, 8, 8, 4, 8, 4, 4, 2, 8, 4, 4, 3, 4, 3, 3, 2, 8, 8, 22 | 8, 5, 8, 5, 5, 2, 8, 5, 5, 3, 5, 3, 3, 2, 8, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2, 8, 8, 8, 6, 8, 6, 6, 2, 8, 23 | 6, 6, 3, 6, 3, 3, 2, 8, 6, 6, 4, 6, 4, 4, 2, 6, 4, 4, 3, 4, 3, 3, 2, 8, 6, 6, 5, 6, 5, 5, 2, 6, 5, 5, 3, 5, 3, 3, 2, 24 | 6, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2, 8, 8, 8, 7, 8, 7, 7, 2, 8, 7, 7, 3, 7, 3, 3, 2, 8, 7, 7, 4, 7, 4, 4, 25 | 2, 7, 4, 4, 3, 4, 3, 3, 2, 8, 7, 7, 5, 7, 5, 5, 2, 7, 5, 5, 3, 5, 3, 3, 2, 7, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 26 | 3, 2, 8, 7, 7, 6, 7, 6, 6, 2, 7, 6, 6, 3, 6, 3, 3, 2, 7, 6, 6, 4, 6, 4, 4, 2, 6, 4, 4, 3, 4, 3, 3, 2, 7, 6, 6, 5, 6, 27 | 5, 5, 2, 6, 5, 5, 3, 5, 3, 3, 2, 6, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 28 | 8, 8, 8, 3, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 4, 8, 4, 4, 3, 8, 8, 8, 8, 8, 8, 8, 5, 8, 8, 8, 5, 8, 5, 5, 3, 8, 8, 8, 29 | 5, 8, 5, 5, 4, 8, 5, 5, 4, 5, 4, 4, 3, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 6, 8, 6, 6, 3, 8, 8, 8, 6, 8, 6, 6, 4, 8, 6, 30 | 6, 4, 6, 4, 4, 3, 8, 8, 8, 6, 8, 6, 6, 5, 8, 6, 6, 5, 6, 5, 5, 3, 8, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, 8, 31 | 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 3, 8, 8, 8, 7, 8, 7, 7, 4, 8, 7, 7, 4, 7, 4, 4, 3, 8, 8, 8, 7, 8, 7, 7, 5, 32 | 8, 7, 7, 5, 7, 5, 5, 3, 8, 7, 7, 5, 7, 5, 5, 4, 7, 5, 5, 4, 5, 4, 4, 3, 8, 8, 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 6, 33 | 3, 8, 7, 7, 6, 7, 6, 6, 4, 7, 6, 6, 4, 6, 4, 4, 3, 8, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 3, 7, 6, 6, 5, 6, 5, 34 | 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 35 | 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 8, 8, 8, 8, 8, 8, 8, 5, 8, 8, 8, 5, 8, 5, 5, 4, 8, 8, 8, 8, 36 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 6, 8, 6, 6, 4, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 37 | 6, 8, 6, 6, 5, 8, 8, 8, 6, 8, 6, 6, 5, 8, 6, 6, 5, 6, 5, 5, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 38 | 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 4, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 5, 8, 8, 8, 7, 8, 7, 7, 5, 8, 39 | 7, 7, 5, 7, 5, 5, 4, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 6, 8, 8, 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 6, 4, 40 | 8, 8, 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 6, 5, 8, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 4, 8, 8, 8, 8, 8, 8, 8, 41 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 42 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 43 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 6, 44 | 8, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 45 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 46 | 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 6, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 6, 8, 47 | 8, 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 48 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 49 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 50 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 51 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 52 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 53 | 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 6, 8, 8, 54 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 55 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 56 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 57 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 58 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 59 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 60 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7 61 | }; 62 | 63 | }} -------------------------------------------------------------------------------- /include/gpu_ic/utils/tight_variable_byte.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | class tight_variable_byte { 4 | public: 5 | template 6 | static uint8_t extract7bits(const uint32_t val) { 7 | return static_cast((val >> (7 * i)) & ((1U << 7) - 1)); 8 | } 9 | 10 | template 11 | static uint8_t extract7bitsmaskless(const uint32_t val) { 12 | return static_cast((val >> (7 * i))); 13 | } 14 | 15 | static void encode(const uint32_t *in, const size_t length, uint8_t *out, size_t &nvalue) { 16 | uint8_t *bout = out; 17 | for (size_t k = 0; k < length; ++k) { 18 | const uint32_t val(in[k]); 19 | /** 20 | * Code below could be shorter. Whether it could be faster 21 | * depends on your compiler and machine. 22 | */ 23 | if (val < (1U << 7)) { 24 | *bout = static_cast(val | (1U << 7)); 25 | ++bout; 26 | } else if (val < (1U << 14)) { 27 | *bout = extract7bits<0>(val); 28 | ++bout; 29 | *bout = extract7bitsmaskless<1>(val) | (1U << 7); 30 | ++bout; 31 | } else if (val < (1U << 21)) { 32 | *bout = extract7bits<0>(val); 33 | ++bout; 34 | *bout = extract7bits<1>(val); 35 | ++bout; 36 | *bout = extract7bitsmaskless<2>(val) | (1U << 7); 37 | ++bout; 38 | } else if (val < (1U << 28)) { 39 | *bout = extract7bits<0>(val); 40 | ++bout; 41 | *bout = extract7bits<1>(val); 42 | ++bout; 43 | *bout = extract7bits<2>(val); 44 | ++bout; 45 | *bout = extract7bitsmaskless<3>(val) | (1U << 7); 46 | ++bout; 47 | } else { 48 | *bout = extract7bits<0>(val); 49 | ++bout; 50 | *bout = extract7bits<1>(val); 51 | ++bout; 52 | *bout = extract7bits<2>(val); 53 | ++bout; 54 | *bout = extract7bits<3>(val); 55 | ++bout; 56 | *bout = extract7bitsmaskless<4>(val) | (1U << 7); 57 | ++bout; 58 | } 59 | } 60 | nvalue = bout - out; 61 | } 62 | 63 | static void encode_single(uint32_t val, std::vector &out) { 64 | uint8_t buf[5]; 65 | size_t nvalue; 66 | encode(&val, 1, buf, nvalue); 67 | out.insert(out.end(), buf, buf + nvalue); 68 | } 69 | 70 | static uint8_t const *decode(const uint8_t *in, uint32_t *out, size_t n) { 71 | const uint8_t *inbyte = in; 72 | for (size_t i = 0; i < n; ++i) { 73 | unsigned int shift = 0; 74 | for (uint32_t v = 0;; shift += 7) { 75 | uint8_t c = *inbyte++; 76 | v += ((c & 127) << shift); 77 | if ((c & 128)) { 78 | *out++ = v; 79 | break; 80 | } 81 | } 82 | } 83 | return inbyte; 84 | } 85 | }; 86 | -------------------------------------------------------------------------------- /include/gpu_ic/utils/utils.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2018-present Antonio Mallia 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | 22 | namespace utils { 23 | 24 | template 25 | inline void do_not_optimize_away(T&& datum) { 26 | asm volatile("" : "+r" (datum)); 27 | } 28 | 29 | inline void delta_encode(uint32_t *in, size_t n) { 30 | for (size_t i = n - 1; i > 0; --i) { 31 | in[i] -= in[i - 1]; 32 | } 33 | } 34 | 35 | inline void delta_decode(uint32_t *in, size_t n) { 36 | for (size_t i = 1; i < n; ++i) { 37 | in[i] += in[i - 1]; 38 | } 39 | } 40 | 41 | /* 42 | * Computes the number of bits required to store the given integer value. 43 | */ 44 | inline constexpr uint_fast8_t bits(size_t value) { 45 | return value == 0 ? 1U : (64 - __builtin_clzll(value)); 46 | } 47 | 48 | } // namespace utils -------------------------------------------------------------------------------- /plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amallia/gpu-integers-compression/a120ed89d4c5eed5761402147525ef480cdcdc93/plot.png -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(compress_index compress_index.cpp) 2 | target_link_libraries(compress_index 3 | gpu_integers_compression 4 | CLI11 5 | ) 6 | 7 | add_executable(perf_decode perf_decode.cpp) 8 | target_link_libraries(perf_decode 9 | gpu_integers_compression 10 | CLI11 11 | ) 12 | 13 | if(BUILD_CUDA) 14 | CUDA_ADD_EXECUTABLE(cuda_compress_index cuda_compress_index.cu) 15 | target_link_libraries(cuda_compress_index 16 | gpu_integers_compression 17 | CLI11 18 | ) 19 | CUDA_ADD_EXECUTABLE(cuda_perf_decode cuda_perf_decode.cu) 20 | target_link_libraries(cuda_perf_decode 21 | gpu_integers_compression 22 | CLI11 23 | ) 24 | endif() 25 | -------------------------------------------------------------------------------- /src/compress_index.cpp: -------------------------------------------------------------------------------- 1 | #include "CLI/CLI.hpp" 2 | #include "../external/FastPFor/headers/codecfactory.h" 3 | #include "gpu_ic/utils/binary_freq_collection.hpp" 4 | #include "gpu_ic/utils/progress.hpp" 5 | #include "gpu_ic/utils/bit_ostream.hpp" 6 | #include "gpu_ic/utils/bit_istream.hpp" 7 | #include "gpu_ic/utils/tight_variable_byte.hpp" 8 | #include "gpu_ic/utils/index.hpp" 9 | #include "gpu_ic/utils/mapper.hpp" 10 | #include "mio/mmap.hpp" 11 | 12 | using namespace gpu_ic; 13 | using namespace FastPForLib; 14 | 15 | template 16 | void verify_index(InputCollection const &input, 17 | const std::string &filename, bool compress_freqs) { 18 | 19 | Codec codec; 20 | gpu_ic::index coll; 21 | mio::mmap_source m; 22 | std::error_code error; 23 | m.map(filename, error); 24 | mapper::map(coll, m); 25 | 26 | { 27 | progress progress("Verify index", input.size()); 28 | 29 | size_t i =0; 30 | for (auto const &plist : input) { 31 | auto docs_it = compress_freqs ? plist.freqs.begin() : plist.docs.begin(); 32 | 33 | std::vector values(plist.docs.size()); 34 | uint32_t last_doc(*docs_it++);; 35 | for (size_t j = 1; j < plist.docs.size(); ++j) { 36 | uint32_t doc(*docs_it++); 37 | if(not compress_freqs){ 38 | values[j] = doc - last_doc - 1; 39 | } 40 | else{ 41 | values[j] = doc - 1; 42 | } 43 | last_doc = doc; 44 | } 45 | 46 | std::vector tmp; 47 | auto n = coll.get_data(tmp, i); 48 | std::vector decode_values(n); 49 | codec.decodeArray(reinterpret_cast(tmp.data()), tmp.size()/4, reinterpret_cast(decode_values.data()), n); 50 | 51 | if(n != plist.docs.size()) 52 | { 53 | std::cerr << "Error: wrong list length. List: " << i << ", size: " << n << ", real_size: " << plist.docs.size() << std::endl; 54 | std::abort(); 55 | } 56 | 57 | for (size_t j = 0; j < n; ++j) { 58 | if(decode_values[j] != values[j]) { 59 | std::cerr << "Error: wrong decoded value. List: " << i << ", position: " << j << ", element: " << decode_values[j] << ", real_element: " << values[j] << std::endl; 60 | std::abort(); 61 | } 62 | } 63 | progress.update(1); 64 | i+=1; 65 | } 66 | } 67 | 68 | } 69 | 70 | template 71 | void create_collection(InputCollection const &input, 72 | const std::string &output_filename, 73 | Codec &codec, bool compress_freqs) { 74 | 75 | typename gpu_ic::index::builder builder(input.num_docs()); 76 | size_t postings = 0; 77 | { 78 | progress progress("Create index", input.size()); 79 | 80 | for (auto const &plist : input) { 81 | size_t size = plist.docs.size(); 82 | if(not compress_freqs) { 83 | builder.add_posting_list(size, plist.docs.begin(), codec, compress_freqs); 84 | } 85 | else { 86 | builder.add_posting_list(size, plist.freqs.begin(), codec, compress_freqs); 87 | } 88 | postings += size; 89 | progress.update(1); 90 | } 91 | } 92 | 93 | gpu_ic::index coll; 94 | auto data_len = builder.build(coll); 95 | auto byte= mapper::freeze(coll, output_filename.c_str()); 96 | 97 | 98 | double bits_per_doc = data_len * 8.0 / postings; 99 | std::cout << "Documents: " << postings << ", Total size bytes: " << byte << ", bits/doc: " << bits_per_doc << std::endl; 100 | 101 | verify_index(input, output_filename, compress_freqs); 102 | } 103 | 104 | 105 | int main(int argc, char const *argv[]) 106 | { 107 | std::string type; 108 | std::string input_basename; 109 | std::string output_filename; 110 | bool compress_freqs = false; 111 | 112 | CLI::App app{"compress_index - a tool for compressing an index."}; 113 | app.add_option("-t,--type", type, "Index type")->required(); 114 | app.add_option("-c,--collection", input_basename, "Collection basename")->required(); 115 | app.add_option("-o,--output", output_filename, "Output filename")->required(); 116 | app.add_flag("--freqs", compress_freqs, "Compress freqs instead of docs"); 117 | 118 | CLI11_PARSE(app, argc, argv); 119 | 120 | binary_freq_collection input(input_basename.c_str()); 121 | if (type == "simdbp") { 122 | CompositeCodec codec; 123 | create_collection(input, output_filename, codec, compress_freqs); 124 | } else if (type == "streamvbyte") { 125 | StreamVByte codec; 126 | create_collection(input, output_filename, codec, compress_freqs); 127 | } else if (type == "bp") { 128 | CompositeCodec codec; 129 | create_collection(input, output_filename, codec, compress_freqs); 130 | } else if (type == "varintgb") { 131 | VarIntGB<> codec; 132 | create_collection(input, output_filename, codec, compress_freqs); 133 | } else { 134 | std::cerr << "Unknown type" << std::endl; 135 | } 136 | 137 | return 0; 138 | } 139 | -------------------------------------------------------------------------------- /src/cuda_compress_index.cu: -------------------------------------------------------------------------------- 1 | #include "CLI/CLI.hpp" 2 | #include "gpu_ic/utils/binary_freq_collection.hpp" 3 | #include "gpu_ic/utils/progress.hpp" 4 | #include "gpu_ic/utils/tight_variable_byte.hpp" 5 | #include "gpu_ic/utils/index.cuh" 6 | #include "gpu_ic/utils/mapper.hpp" 7 | #include "mio/mmap.hpp" 8 | #include "gpu_ic/cuda_bp.cuh" 9 | #include "gpu_ic/cuda_vbyte.cuh" 10 | 11 | using namespace gpu_ic; 12 | 13 | template 14 | void verify_index(InputCollection const &input, 15 | const std::string &filename, Decoder decoder_function, bool compress_freqs) { 16 | 17 | // Codec codec; 18 | gpu_ic::index coll; 19 | mio::mmap_source m; 20 | std::error_code error; 21 | m.map(filename, error); 22 | mapper::map(coll, m); 23 | 24 | { 25 | progress progress("Verify index", input.size()); 26 | 27 | size_t i =0; 28 | for (auto const &plist : input) { 29 | auto docs_it = compress_freqs ? plist.freqs.begin() : plist.docs.begin(); 30 | 31 | std::vector values(plist.docs.size()); 32 | uint32_t last_doc(*docs_it++);; 33 | for (size_t j = 1; j < plist.docs.size(); ++j) { 34 | uint32_t doc(*docs_it++); 35 | if(not compress_freqs) { 36 | values[j] = doc - last_doc - 1; 37 | } else { 38 | values[j] = doc - 1; 39 | } 40 | last_doc = doc; 41 | } 42 | 43 | std::vector tmp; 44 | auto n = coll.get_data(tmp, i); 45 | std::vector decode_values(n); 46 | 47 | CUDA_CHECK_ERROR(cudaSetDevice(0)); 48 | warmUpGPU<<<1, 1>>>(); 49 | 50 | uint8_t * d_encoded; 51 | CUDA_CHECK_ERROR(cudaMalloc((void **)&d_encoded, tmp.size() * sizeof(uint8_t))); 52 | CUDA_CHECK_ERROR(cudaMemcpy(d_encoded, tmp.data(), tmp.size() * sizeof(uint8_t), cudaMemcpyHostToDevice)); 53 | 54 | uint32_t * d_decoded; 55 | CUDA_CHECK_ERROR(cudaMalloc((void **)&d_decoded, values.size() * sizeof(uint32_t))); 56 | decoder_function(d_decoded, d_encoded, decode_values.size()); 57 | CUDA_CHECK_ERROR(cudaMemcpy(decode_values.data(), d_decoded, values.size() * sizeof(uint32_t), cudaMemcpyDeviceToHost)); 58 | 59 | cudaFree(d_encoded); 60 | cudaFree(d_decoded); 61 | 62 | if(n != plist.docs.size()) 63 | { 64 | std::cerr << "Error: wrong list length. List: " << i << ", size: " << n << ", real_size: " << plist.docs.size() << std::endl; 65 | std::abort(); 66 | } 67 | 68 | for (size_t j = 0; j < n; ++j) { 69 | if(decode_values[j] != values[j]) { 70 | std::cerr << "Error: wrong decoded value. List: " << i << ", position: " << j << ", element: " << decode_values[j] << ", real_element: " << values[j] << std::endl; 71 | std::abort(); 72 | } 73 | } 74 | progress.update(1); 75 | i+=1; 76 | } 77 | } 78 | 79 | } 80 | 81 | template 82 | void create_collection(InputCollection const &input, 83 | const std::string &output_filename, 84 | Encoder &encoder_function, Decoder &decoder_function, bool compress_freqs) { 85 | 86 | typename gpu_ic::index::builder builder(input.num_docs()); 87 | size_t postings = 0; 88 | { 89 | progress progress("Create index", input.size()); 90 | 91 | for (auto const &plist : input) { 92 | size_t size = plist.docs.size(); 93 | if(not compress_freqs) { 94 | builder.add_posting_list(size, plist.docs.begin(), encoder_function, compress_freqs); 95 | } 96 | else { 97 | builder.add_posting_list(size, plist.freqs.begin(), encoder_function, compress_freqs); 98 | } 99 | 100 | postings += size; 101 | progress.update(1); 102 | } 103 | } 104 | 105 | gpu_ic::index coll; 106 | auto data_len = builder.build(coll); 107 | auto byte= mapper::freeze(coll, output_filename.c_str()); 108 | 109 | 110 | double bits_per_doc = data_len * 8.0 / postings; 111 | std::cout << "Documents: " << postings << ", total size bytes: " << byte << ", bits/doc: " << bits_per_doc << std::endl; 112 | 113 | verify_index(input, output_filename, decoder_function, compress_freqs); 114 | } 115 | 116 | 117 | int main(int argc, char** argv) 118 | { 119 | std::string type; 120 | std::string input_basename; 121 | std::string output_filename; 122 | bool compress_freqs = false; 123 | 124 | CLI::App app{"compress_index - a tool for compressing an index."}; 125 | app.add_option("-t,--type", type, "Index type")->required(); 126 | app.add_option("-c,--collection", input_basename, "Collection basename")->required(); 127 | app.add_option("-o,--output", output_filename, "Output filename")->required(); 128 | app.add_flag("--freqs", compress_freqs, "Compress freqs instead of docs"); 129 | CLI11_PARSE(app, argc, argv); 130 | 131 | binary_freq_collection input(input_basename.c_str()); 132 | if (type == "cuda_bp") { 133 | create_collection(input, output_filename, cuda_bp::encode<>, cuda_bp::decode<>, compress_freqs); 134 | } else if (type == "cuda_bp64") { 135 | create_collection(input, output_filename, cuda_bp::encode<64>, cuda_bp::decode<64>, compress_freqs); 136 | } else if (type == "cuda_bp128") { 137 | create_collection(input, output_filename, cuda_bp::encode<128>, cuda_bp::decode<128>, compress_freqs); 138 | } else if (type == "cuda_bp256") { 139 | create_collection(input, output_filename, cuda_bp::encode<256>, cuda_bp::decode<256>, compress_freqs); 140 | } else if (type == "cuda_bp512") { 141 | create_collection(input, output_filename, cuda_bp::encode<512>, cuda_bp::decode<512>, compress_freqs); 142 | } else if (type == "cuda_bp1024") { 143 | create_collection(input, output_filename, cuda_bp::encode<1024>, cuda_bp::decode<1024>, compress_freqs); 144 | } else if (type == "cuda_vbyte") { 145 | create_collection(input, output_filename, cuda_vbyte::encode<>, cuda_vbyte::decode<>, compress_freqs); 146 | } else if (type == "cuda_vbyte1024") { 147 | create_collection(input, output_filename, cuda_vbyte::encode<1024>, cuda_vbyte::decode<1024>, compress_freqs); 148 | } else { 149 | std::cerr << "Unknown type" << std::endl; 150 | } 151 | 152 | return 0; 153 | } 154 | -------------------------------------------------------------------------------- /src/cuda_perf_decode.cu: -------------------------------------------------------------------------------- 1 | #include "CLI/CLI.hpp" 2 | #include "gpu_ic/cuda_bp.cuh" 3 | #include "gpu_ic/cuda_vbyte.cuh" 4 | 5 | #include "gpu_ic/utils/binary_freq_collection.hpp" 6 | #include "gpu_ic/utils/progress.hpp" 7 | #include "gpu_ic/utils/tight_variable_byte.hpp" 8 | #include "gpu_ic/utils/index.cuh" 9 | #include "gpu_ic/utils/mapper.hpp" 10 | #include "gpu_ic/utils/utils.hpp" 11 | #include "mio/mmap.hpp" 12 | #include 13 | #include 14 | 15 | using namespace gpu_ic; 16 | using clock_type = std::chrono::high_resolution_clock; 17 | 18 | template 19 | void perftest(const std::string &filename, Decoder &decoder_function, const std::vector &terms) 20 | { 21 | gpu_ic::index coll; 22 | mio::mmap_source m; 23 | std::error_code error; 24 | m.map(filename, error); 25 | mapper::map(coll, m); 26 | 27 | std::vector>> long_lists; 28 | long_lists.reserve(terms.size()); 29 | for(auto&& t :terms) { 30 | std::vector tmp; 31 | auto n = coll.get_data(tmp, t); 32 | long_lists.push_back(std::make_pair(n, tmp)); 33 | } 34 | CUDA_CHECK_ERROR(cudaSetDevice(0)); 35 | warmUpGPU<<<1, 1>>>(); 36 | std::cout << "Scanning " << long_lists.size() << " posting lists" << std::endl; 37 | std::chrono::duration elapsed(0); 38 | size_t postings = 0; 39 | for (auto i: long_lists) { 40 | uint8_t * d_encoded; 41 | CUDA_CHECK_ERROR(cudaMalloc((void **)&d_encoded, i.second.size() * sizeof(uint8_t))); 42 | CUDA_CHECK_ERROR(cudaMemcpy(d_encoded, i.second.data(), i.second.size() * sizeof(uint8_t), cudaMemcpyHostToDevice)); 43 | 44 | std::vector decode_values(i.first); 45 | uint32_t * d_decoded; 46 | CUDA_CHECK_ERROR(cudaMalloc((void **)&d_decoded, decode_values.size() * sizeof(uint32_t))); 47 | CUDA_CHECK_ERROR(cudaDeviceSynchronize()); 48 | 49 | auto start = clock_type::now(); 50 | decoder_function(d_decoded, d_encoded, decode_values.size()); 51 | cudaDeviceSynchronize(); 52 | auto end = clock_type::now(); 53 | 54 | CUDA_CHECK_ERROR(cudaMemcpy(decode_values.data(), d_decoded, decode_values.size() * sizeof(uint32_t), cudaMemcpyDeviceToHost)); 55 | 56 | CUDA_CHECK_ERROR(cudaFree(d_encoded)); 57 | CUDA_CHECK_ERROR(cudaFree(d_decoded)); 58 | 59 | elapsed += end - start; 60 | 61 | for (size_t j = 0; j < i.first; ++j) { 62 | // std::cerr << decode_values[j] << std::endl; 63 | utils::do_not_optimize_away(decode_values[j]); 64 | } 65 | postings += decode_values.size(); 66 | } 67 | 68 | double next_ns = elapsed.count() / postings * 1000000000; 69 | double b_int_s = postings / elapsed.count() / 1000000; 70 | std::cout << "Performed " << postings << " next()" 71 | << " in " << elapsed.count() << " [sec], " 72 | << std::fixed << std::setprecision(2) 73 | << next_ns << " [ns] x posting, " 74 | << b_int_s << " M ints/sec" 75 | << std::endl; 76 | 77 | } 78 | 79 | bool read_query(std::vector &ret, std::istream &is = std::cin) { 80 | ret.clear(); 81 | std::string line; 82 | if (!std::getline(is, line)) 83 | return false; 84 | std::istringstream iline(line); 85 | uint32_t term_id; 86 | while (iline >> term_id) { 87 | ret.push_back(term_id); 88 | } 89 | return true; 90 | } 91 | 92 | int main(int argc, char const *argv[]) 93 | { 94 | std::string type; 95 | std::string index_basename; 96 | std::string query_basename; 97 | 98 | CLI::App app{"compress_index - a tool for compressing an index."}; 99 | app.add_option("-t,--type", type, "Index type")->required(); 100 | app.add_option("-i,--index", index_basename, "Index basename")->required(); 101 | app.add_option("-q,--query", query_basename, "Query basename")->required(); 102 | CLI11_PARSE(app, argc, argv); 103 | 104 | std::vector terms; 105 | std::filebuf fb; 106 | size_t queries_num = 0; 107 | if (fb.open(query_basename, std::ios::in)) { 108 | std::istream is(&fb); 109 | std::vector q; 110 | while (read_query(q, is)) { 111 | queries_num+=1; 112 | terms.insert(terms.end(), q.begin(), q.end()); 113 | } 114 | } 115 | if (type == "cuda_bp") { 116 | perftest(index_basename, cuda_bp::decode<>, terms); 117 | } else if (type == "cuda_bp64") { 118 | perftest(index_basename, cuda_bp::decode<64>, terms); 119 | } else if (type == "cuda_bp128") { 120 | perftest(index_basename, cuda_bp::decode<128>, terms); 121 | } else if (type == "cuda_bp256") { 122 | perftest(index_basename, cuda_bp::decode<256>, terms); 123 | } else if (type == "cuda_bp512") { 124 | perftest(index_basename, cuda_bp::decode<512>, terms); 125 | } else if (type == "cuda_bp1024") { 126 | perftest(index_basename, cuda_bp::decode<1024>, terms); 127 | } else if (type == "cuda_vbyte") { 128 | perftest(index_basename, cuda_vbyte::decode<>, terms); 129 | } else if (type == "cuda_vbyte1024") { 130 | perftest(index_basename, cuda_vbyte::decode<1024>, terms); 131 | } else { 132 | std::cerr << "Unknown type" << std::endl; 133 | } 134 | 135 | // std::cout << "Queries: " << queries_num << ", terms: " << terms.size() << std::endl; 136 | 137 | 138 | return 0; 139 | } 140 | -------------------------------------------------------------------------------- /src/perf_decode.cpp: -------------------------------------------------------------------------------- 1 | #include "CLI/CLI.hpp" 2 | #include "../external/FastPFor/headers/codecfactory.h" 3 | #include "gpu_ic/utils/binary_freq_collection.hpp" 4 | #include "gpu_ic/utils/progress.hpp" 5 | #include "gpu_ic/utils/bit_ostream.hpp" 6 | #include "gpu_ic/utils/bit_istream.hpp" 7 | #include "gpu_ic/utils/tight_variable_byte.hpp" 8 | #include "gpu_ic/utils/index.hpp" 9 | #include "gpu_ic/utils/mapper.hpp" 10 | #include "gpu_ic/utils/utils.hpp" 11 | #include "mio/mmap.hpp" 12 | #include 13 | #include 14 | 15 | using namespace gpu_ic; 16 | using namespace FastPForLib; 17 | using clock_type = std::chrono::high_resolution_clock; 18 | 19 | template 20 | void perftest(const std::string &filename, const std::vector &terms) 21 | { 22 | Codec codec; 23 | gpu_ic::index coll; 24 | mio::mmap_source m; 25 | std::error_code error; 26 | m.map(filename, error); 27 | mapper::map(coll, m); 28 | 29 | std::vector>> long_lists; 30 | long_lists.reserve(terms.size()); 31 | for(auto&& t : terms) { 32 | std::vector tmp; 33 | auto n = coll.get_data(tmp, t); 34 | long_lists.push_back(std::make_pair(n, tmp)); 35 | } 36 | 37 | std::cout << "Scanning " << long_lists.size() << " posting list" << std::endl; 38 | std::chrono::duration elapsed(0); 39 | size_t postings = 0; 40 | for (auto i: long_lists) { 41 | std::vector decode_values(i.first); 42 | auto start = clock_type::now(); 43 | size_t n = 0; 44 | codec.decodeArray(reinterpret_cast(i.second.data()), i.second.size()/4, reinterpret_cast(decode_values.data()), n); 45 | auto end = clock_type::now(); 46 | elapsed += end - start; 47 | if(n != i.first) { 48 | std::cerr << "Error: number of decoded values " << n << ", actual number of values" << i.first << std::endl; 49 | } 50 | for (size_t i = 0; i < n; ++i) { 51 | utils::do_not_optimize_away(decode_values[i]); 52 | } 53 | postings += decode_values.size(); 54 | } 55 | 56 | double next_ns = elapsed.count() / postings * 1000000000; 57 | double b_int_s = postings / elapsed.count() / 1000000; 58 | std::cout << "Performed " << postings << " next()" 59 | << " in " << elapsed.count() << " [sec], " 60 | << std::fixed << std::setprecision(2) 61 | << next_ns << " [ns] x posting, " 62 | << b_int_s << " M ints/sec" 63 | << std::endl; 64 | 65 | } 66 | 67 | bool read_query(std::vector &ret, std::istream &is = std::cin) { 68 | ret.clear(); 69 | std::string line; 70 | if (!std::getline(is, line)) 71 | return false; 72 | std::istringstream iline(line); 73 | uint32_t term_id; 74 | while (iline >> term_id) { 75 | ret.push_back(term_id); 76 | } 77 | return true; 78 | } 79 | 80 | int main(int argc, char const *argv[]) 81 | { 82 | std::string type; 83 | std::string index_basename; 84 | std::string query_basename; 85 | 86 | CLI::App app{"compress_index - a tool for compressing an index."}; 87 | app.add_option("-t,--type", type, "Index type")->required(); 88 | app.add_option("-i,--index", index_basename, "Index basename")->required(); 89 | app.add_option("-q,--query", query_basename, "Query basename")->required(); 90 | CLI11_PARSE(app, argc, argv); 91 | 92 | std::vector terms; 93 | std::filebuf fb; 94 | size_t queries_num = 0; 95 | if (fb.open(query_basename, std::ios::in)) { 96 | std::istream is(&fb); 97 | std::vector q; 98 | while (read_query(q, is)) { 99 | queries_num+=1; 100 | terms.insert(terms.end(), q.begin(), q.end()); 101 | } 102 | } 103 | 104 | if (type == "simdbp") { 105 | CompositeCodec codec; 106 | perftest(index_basename, terms); 107 | } else if (type == "streamvbyte") { 108 | StreamVByte codec; 109 | perftest(index_basename, terms); 110 | } else if (type == "bp") { 111 | CompositeCodec codec; 112 | perftest(index_basename, terms); 113 | } else if (type == "varintgb") { 114 | VarIntGB<> codec; 115 | perftest(index_basename, terms); 116 | } else { 117 | std::cerr << "Unknown type" << std::endl; 118 | } 119 | 120 | 121 | 122 | // std::cout << "Queries: " << queries_num << ", terms: " << terms.size() << std::endl; 123 | 124 | 125 | return 0; 126 | } 127 | -------------------------------------------------------------------------------- /test_data/queries: -------------------------------------------------------------------------------- 1 | 101587 61936 2 | 40429 86328 3 | 13975 94987 102912 75488 86157 4 | 80811 110278 90269 96541 5 | 33726 6 | 78401 68238 7 | 59451 82510 8 | 110622 102912 53265 66945 43418 101818 99022 54523 54209 9 | 67842 54513 67848 10 | 55900 91909 11 | 51079 89883 12 | 38616 96982 13 | 97986 43403 14 | 106967 75552 59184 15 | 86328 82481 95555 80147 16 | 101785 47930 17 | 44232 103219 18 | 90882 72383 19 | 48145 68857 20 | 73102 55872 68283 21 | 43460 110362 22 | 46586 23 | 47320 33596 24 | 101682 72197 25 | 62885 43748 26 | 110278 44879 27 | 62574 93388 40150 68583 28 | 102046 74112 29 | 65953 111200 30 | 101365 17496 110642 53842 31 | 82777 83431 41152 44915 32 | 60341 49248 34323 95878 67486 75119 33 | 102133 112621 65989 34 | 60740 78250 62198 35 | 60392 75877 86281 36 | 67574 37 | 33856 88404 38 | 40975 39 | 97369 110949 40 | 110717 76695 110770 74156 102912 54599 42353 111450 41 | 73411 82481 72583 79520 46235 42 | 40013 42353 42958 106267 43 | 51571 51834 82481 91489 44 | 46410 47753 45 | 81496 33252 59377 46 | 80219 72531 82632 47 | 61559 110479 71821 48 | 46352 86758 75773 49 | 105328 44427 5924 86157 50 | 82607 103402 98558 51 | 59519 47436 39332 52 | 105329 61936 53 | 74447 49248 49285 54 | 67262 62044 105677 67262 62044 105677 96886 55 | 113086 52033 56 | 69774 67486 50806 57 | 43974 96023 91015 58 | 62557 59 | 86738 96807 40429 59978 57905 60 | 99001 78599 61 | 65446 91071 50240 93962 111030 62 | 55612 111457 63 | 82620 79303 111530 102324 97353 68820 34390 112715 66631 71126 69016 64 | 97366 85132 65 | 92457 91889 66 | 111200 67486 84677 67 | 86157 93388 82481 61684 41505 70086 8468 48343 68 | 111450 93388 91851 67486 94022 38961 69 | 102133 97188 47852 70 | 60392 44792 47436 39332 71 | 109782 78596 68754 42738 72 | 71780 82481 102046 91015 65989 73 | 102503 62083 74 | 34247 44390 75 | 42771 63843 76 | 93479 77 | 110622 68820 102133 47977 82481 102133 43302 78 | 45777 102533 61690 79 | 58684 42983 80 | 102133 39983 82481 77197 34202 76695 81 | 91753 40749 82 | 47487 57873 62029 83 | 33229 44941 84 | 69805 31550 42004 85 | 79610 98398 86 | 30300 32436 71869 59978 93645 94610 106016 87 | 90013 88 | 86938 74830 44915 89 | 69359 100705 58774 78596 102889 90 | 58625 48720 82481 78623 91 | 78500 49248 80811 90144 56796 92 | 33708 42738 93 | 93788 70008 93879 102339 96015 94 | 68581 58195 59978 53338 34202 88081 95 | 69805 96470 93944 96 | 102133 72383 48169 67558 97 | 110622 68820 53187 98 | 100035 68289 45194 79365 99 | 56694 100 | 92489 84496 47977 101 | 47675 44915 102 | 74156 53113 103 | 48792 82481 44782 44145 82481 104965 88209 104 | 50265 53793 95978 105 | 54599 34202 76462 106 | 52857 107 | 57681 47478 108 | 66014 102912 48819 58131 68462 98077 59953 109 | 60120 34194 110 | 64274 69016 83392 74156 69016 111 | 80432 102046 80422 112 | 51590 94716 79520 113 | 60317 75609 79072 114 | 76897 73806 115 | 101585 95555 116 | 99306 68335 68551 117 | 91214 95878 59978 96921 53338 93388 67486 77217 118 | 102133 57803 93670 44596 119 | 69571 74156 65063 120 | 57729 47034 101846 45930 121 | 70609 65356 122 | 71712 89029 105677 40967 99737 123 | 97478 32942 90144 56796 124 | 99856 42059 111730 125 | 101268 80644 126 | 75039 101681 127 | 81398 55245 84949 104433 128 | 68820 102133 40683 80689 51060 31550 46819 40683 129 | 93959 97650 33229 95458 130 | 61690 62697 131 | 110278 44713 132 | 93788 69773 49248 49285 94399 133 | 52165 78514 89883 134 | 47089 82481 75567 135 | 58663 58634 69640 136 | 53889 105983 96013 105677 67486 59951 137 | 68645 58676 95458 103402 44145 138 | 59451 97116 139 | 90435 56089 88388 47753 140 | 66631 72410 141 | 65814 63815 75496 142 | 104388 44850 33229 47302 143 | 102555 87083 95997 91738 144 | 78567 65741 59978 93645 145 | 84890 98474 56035 146 | 66945 112832 74156 97319 95496 102176 60392 147 | 106967 33286 148 | 38616 76506 86773 149 | 98388 63026 150 | 69800 76231 151 | 60392 82179 82481 71537 152 | 41996 110299 153 | 42585 78960 46337 154 | 104317 72842 83942 57392 155 | 49314 47382 156 | 49251 59940 67558 157 | 102133 41029 47521 64342 158 | 44297 71101 159 | 96035 63790 99413 160 | 80377 83553 71627 161 | 33385 103552 51209 162 | 32556 34202 57081 163 | 104322 103029 164 | 79660 103590 98779 87320 165 | 62029 30298 4807 96598 26877 6386 46406 47487 166 | 87869 111161 80913 68238 167 | 98289 85861 98077 168 | 88154 110278 68583 60392 169 | 97600 96472 96062 170 | 80377 112825 47089 89876 88225 171 | 86000 101610 67910 172 | 54191 58195 173 | 106830 82481 104506 76023 58520 174 | 100072 175 | 64131 51040 92214 101985 176 | 86537 60870 177 | 88435 110278 32606 178 | 81950 47436 39332 179 | 100437 87304 100018 180 | 72377 87092 181 | 42250 182 | 44241 59978 93645 67486 59451 183 | 104801 98449 184 | 65447 82481 95754 92013 80811 185 | 31550 109770 82984 102133 78408 78623 186 | 54550 67486 67203 8802 187 | 110770 84205 43628 75415 38658 61157 100705 188 | 74433 49248 101444 38817 189 | 69571 61327 190 | 56809 41152 191 | 41911 103874 192 | 50855 82984 45058 47750 193 | 72231 54729 194 | 41173 195 | 105871 62567 62697 59978 54935 196 | 49806 65959 197 | 46444 47487 198 | 60845 91919 199 | 32007 200 | 102095 112839 201 | 43302 44961 73912 202 | 110349 45930 203 | 57771 64563 110245 96541 204 | 97919 48164 102749 205 | 94508 59978 63248 206 | 91705 73102 50393 207 | 60392 98612 101985 47427 67203 208 | 110786 86769 39667 109901 103219 2671 209 | 63254 47673 97604 210 | 82697 75944 103402 211 | 97593 34202 62207 47753 59978 46369 212 | 71089 34175 213 | 33300 76282 214 | 85795 33745 215 | 65959 80377 112825 216 | 48754 89457 50481 97213 217 | 45286 218 | 77016 65807 219 | 93959 89635 220 | 4542 84803 221 | 65543 222 | 105922 80724 60551 86294 105677 103960 223 | 69628 42585 33229 73293 224 | 106928 47521 67701 110389 225 | 96585 51814 226 | 109945 94508 82481 88549 227 | 59995 89564 49516 55913 228 | 98449 87992 69227 40277 85111 229 | 49254 65741 73764 230 | 46248 12608 60458 231 | 102133 51198 113242 232 | 45612 76695 233 | 43422 64630 234 | 86157 71692 75182 235 | 68689 62558 236 | 85853 60484 91015 67486 80377 69613 237 | 66333 96160 238 | 111542 39667 65741 239 | 65741 57563 73126 240 | 90595 97823 53778 45773 93388 79216 241 | 8859 242 | 66309 97919 243 | 84734 94508 60458 44782 244 | 34281 67486 33941 245 | 80377 69613 53287 246 | 46556 86987 247 | 69032 55929 52484 248 | 95458 60836 65741 88572 249 | 80913 46579 72575 41346 32477 250 | 70461 251 | 63837 93388 49024 78067 252 | 42353 51339 253 | 93176 254 | 97489 84852 255 | 84672 89564 256 | 67558 257 | 27281 258 | 47647 80143 78250 259 | 97863 32177 260 | 112916 61891 82620 261 | 39717 40712 64889 262 | 39749 80410 263 | 111497 49248 264 | 105871 51834 82481 54935 83069 85130 265 | 61508 109936 102679 266 | 111457 85054 267 | 33883 268 | 72160 95997 269 | 87439 82519 270 | 39363 100394 84617 271 | 57929 105065 68394 272 | 34267 273 | 75721 98492 42738 82481 49134 274 | 77175 38658 275 | 98955 97248 96035 276 | 48062 99737 93880 47223 277 | 65741 86670 278 | 85319 71012 279 | 79365 104515 40277 66631 71573 54383 93388 79365 54383 100705 280 | 85853 64590 281 | 67567 282 | 94508 113242 44381 32606 283 | 59451 98492 53114 82481 49053 284 | 110952 80377 69613 82179 58076 9365 285 | 103393 84803 67486 47979 97986 286 | 32942 33944 87059 96541 287 | 111530 102912 77013 97353 288 | 44941 79216 45804 289 | 64169 51039 290 | 46607 100605 59978 79216 291 | 98705 45882 34202 111635 64585 292 | 32747 31550 85853 293 | 103368 63248 294 | 52853 112626 295 | 86783 72044 59439 296 | 49066 95458 297 | 44596 98492 48757 101985 298 | 43653 65886 96216 93536 299 | 63864 9072 103744 300 | 69032 84983 95868 99381 301 | 90640 102142 106822 80377 112825 302 | 47521 76492 303 | 80377 69613 80410 304 | 59951 99019 305 | 99373 67486 78960 306 | 110684 111455 79303 69453 53612 73754 307 | 43987 86092 308 | 34522 84496 49472 309 | 70624 310 | 102339 34202 74890 39919 48343 311 | 83993 48669 91087 312 | 51400 49583 313 | 106403 47089 99045 314 | 65959 55753 71627 75361 315 | 85065 89402 47930 316 | 42490 317 | 91013 102912 44347 60870 318 | 60392 88156 53847 319 | 76756 41520 104515 93388 31550 320 | 112799 41183 68820 96935 102181 102133 321 | 98819 49251 79216 322 | 95793 96987 323 | 80811 74112 324 | 40157 76848 43843 79303 101688 325 | 68271 101635 326 | 87885 64601 327 | 45967 104367 83015 60120 79315 328 | 9174 59995 57368 329 | 69553 106830 330 | 63974 331 | 84852 51834 78464 106255 332 | 54264 96107 333 | 104405 96293 48186 59978 110677 83392 81520 102265 334 | 99045 110663 102265 95217 78960 67486 69297 77095 335 | 47977 67266 336 | 44782 80410 84689 337 | 74964 64286 39332 338 | 79610 64619 9174 46410 47753 339 | 52853 65959 340 | 61566 70878 341 | 112601 79303 92489 342 | 94679 41646 32241 343 | 41650 83906 78567 344 | 55987 74044 63248 345 | 44878 92539 93143 346 | 56498 77200 347 | 39750 97650 348 | 87869 111450 349 | 60392 85801 94916 350 | 60830 351 | 60392 57206 82481 88464 111542 90847 352 | 93959 78586 353 | 69805 13974 6756 354 | 71860 355 | 86110 45512 356 | 56640 49248 94508 53047 357 | 32804 34202 75808 358 | 86610 19275 359 | 89012 360 | 40240 74112 361 | 66014 102912 68183 31550 45860 57755 362 | 97578 111530 59978 60484 363 | 61293 82481 87731 364 | 51656 97353 365 | 60612 366 | 90144 56796 67486 63326 82713 367 | 77811 60870 368 | 93925 76278 369 | 94904 85497 370 | 102168 371 | 95831 88277 372 | 86113 96015 373 | 80811 65959 374 | 75799 65907 86157 375 | 42395 84494 376 | 8682 102476 377 | 64756 70537 378 | 68910 379 | 97356 48943 380 | 94679 381 | 89613 63041 67486 42857 66839 382 | 105922 82549 88153 87992 383 | 41650 105871 41152 384 | 69291 54520 63814 385 | 91754 74719 110639 386 | 71730 49248 58828 387 | 48928 43179 63334 388 | 89621 98558 32804 87398 83459 389 | 59451 71899 101813 47753 390 | 73414 102912 50745 93388 78250 44381 91787 391 | 69227 78542 392 | 102133 104677 78266 80410 84689 393 | 89283 65959 110507 103834 32807 394 | 112590 46758 53831 48169 395 | 99008 89437 60535 78623 396 | 80377 112825 95668 80643 47521 397 | 92739 398 | 102339 91795 82984 103402 399 | 66204 49248 49285 400 | 105922 102133 61091 52558 13828 91223 42958 401 | 93190 50806 402 | 103189 101119 85189 403 | 99378 63218 59978 31550 58181 87083 63176 404 | 71101 405 | 59451 91355 83446 406 | 85003 82481 48062 96921 86799 59978 85314 407 | 70911 408 | 82834 111457 409 | 40467 46414 410 | 53778 76438 411 | 82070 69904 80410 412 | 66246 81952 413 | 66014 102912 48510 31550 97640 95173 414 | 86157 82481 62521 63041 102133 110949 96530 415 | 103446 416 | 64855 60146 417 | 73885 44611 39332 418 | 34393 49569 91087 419 | 92007 59954 420 | 68789 95458 421 | 55364 75285 72096 33432 422 | 48731 33252 423 | 71102 58520 53718 86328 424 | 61647 34202 47415 56096 67486 102133 54523 74719 425 | 46630 106255 94508 426 | 82620 79303 75285 66631 74156 65357 97142 427 | 34281 67486 33941 89437 428 | 52672 104474 70970 429 | 54577 62065 430 | 53573 431 | 52309 44879 432 | 103552 67486 43388 433 | 91754 33634 434 | 59978 75428 82915 80081 435 | 88154 75471 102912 47977 82481 33353 436 | 61625 103347 437 | 44893 67486 102133 32674 438 | 73783 66358 439 | 81507 75476 440 | 10647 42254 66853 441 | 110622 66945 112850 53338 61784 42284 44381 31550 102533 442 | 48669 88081 67695 443 | 48343 67486 41597 67702 444 | 52079 59451 65664 68070 41158 445 | 90953 109804 446 | 89575 60535 102133 90144 111591 447 | 32556 84936 448 | 96392 46410 449 | 102133 50714 106909 47753 78623 450 | 55245 53484 49285 451 | 110291 452 | 94680 44882 44056 57457 66113 103219 453 | 98492 40732 82481 102046 454 | 60392 102116 112832 45169 455 | 46535 86587 39212 81926 456 | 111389 457 | 41389 49248 71537 61559 458 | 110230 48030 75739 74830 459 | 69876 51553 106251 90144 98492 39842 460 | 90144 39012 461 | 51039 43703 462 | 85497 98558 91767 463 | 97425 51021 87059 464 | 47089 82481 83588 33353 465 | 65959 34202 78553 59978 93645 67486 97478 102046 466 | 57916 49248 85515 88846 90374 467 | 88374 53793 468 | 65938 79568 58828 469 | 80926 85619 470 | 85975 102535 471 | 106830 61241 40467 472 | 83918 40702 91015 88438 473 | 80443 474 | 44189 44824 475 | 46556 48087 88438 476 | 33972 80525 477 | 65768 478 | 46328 34202 102133 46758 57613 78623 479 | 61684 45612 480 | 65357 481 | 46999 96987 90144 56796 33003 482 | 96267 32199 483 | 49569 45169 40150 88323 484 | 107372 40601 23611 485 | 46406 59377 486 | 56006 39992 67486 487 | 65356 86281 488 | 43616 95458 489 | 68565 94045 40702 490 | 86066 68381 33262 491 | 106170 65745 492 | 53576 55403 493 | 79075 494 | 85577 43189 495 | 46414 93766 496 | 96392 61241 497 | 101688 46344 498 | 45147 58429 96216 96676 499 | 39485 49251 500 | 43537 501 | -------------------------------------------------------------------------------- /test_data/test_collection.docs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amallia/gpu-integers-compression/a120ed89d4c5eed5761402147525ef480cdcdc93/test_data/test_collection.docs -------------------------------------------------------------------------------- /test_data/test_collection.freqs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amallia/gpu-integers-compression/a120ed89d4c5eed5761402147525ef480cdcdc93/test_data/test_collection.freqs --------------------------------------------------------------------------------