├── .clang-format ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── LICENSE ├── README.md ├── external └── CMakeLists.txt ├── include ├── hft │ ├── README.md │ ├── common.hpp │ ├── darray.hpp │ ├── fenwick.hpp │ └── fenwick │ │ ├── bitf.hpp │ │ ├── bitl.hpp │ │ ├── bytef.hpp │ │ ├── bytel.hpp │ │ ├── fenwick_tree.hpp │ │ ├── fixedf.hpp │ │ ├── fixedl.hpp │ │ ├── hybrid.hpp │ │ ├── typef.hpp │ │ └── typel.hpp ├── mranisz │ ├── README.md │ └── shared │ │ ├── rank.hpp │ │ └── select.hpp ├── mutable_bitmap.hpp ├── poppy │ ├── README.md │ ├── bitmap.h │ ├── popcount.h │ └── shared.h ├── rank_select_algorithms │ ├── common.hpp │ ├── rank.hpp │ └── select.hpp ├── segment_tree │ ├── avx2 │ │ ├── node128.hpp │ │ ├── node32.hpp │ │ ├── node64.hpp │ │ ├── search_common.hpp │ │ ├── segment_tree.hpp │ │ ├── segment_tree_macros.hpp │ │ └── tables.hpp │ └── avx512 │ │ ├── node128.hpp │ │ ├── node256.hpp │ │ ├── node512.hpp │ │ ├── print_table.py │ │ ├── segment_tree.hpp │ │ ├── segment_tree_macros.hpp │ │ └── tables.hpp ├── types.hpp └── util.hpp ├── src ├── CMakeLists.txt ├── collect_results.sh ├── collect_results_mutable_bitmap.py ├── perf_compressed_bitmap.cpp ├── perf_hft.cpp ├── perf_immutable_bitmap.cpp ├── perf_mutable_bitmap.cpp ├── perf_popcnt256.cpp ├── perf_popcnt512.cpp ├── perf_popcnt_for_avx_select256.cpp ├── perf_popcnt_for_avx_select512.cpp ├── perf_prefix_sum.cpp ├── perf_prefix_sum256.cpp ├── perf_prefix_sum512.cpp ├── perf_rank256.cpp ├── perf_rank512.cpp ├── perf_rank64.cpp ├── perf_search256.cpp ├── perf_search512.cpp ├── perf_select256.cpp ├── perf_select512.cpp └── perf_select64.cpp └── test ├── test_common.hpp ├── test_mutable_bitmap.cpp ├── test_mutable_bitmap.hpp ├── test_node.hpp ├── test_nodes.cpp ├── test_rank256.cpp ├── test_rank512.cpp ├── test_rank_select_algorithms.hpp ├── test_segment_tree.cpp ├── test_select256.cpp ├── test_select512.cpp └── test_tree.hpp /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | # BasedOnStyle: Google 4 | AccessModifierOffset: -4 5 | AlignAfterOpenBracket: Align 6 | AlignConsecutiveAssignments: false 7 | AlignConsecutiveDeclarations: false 8 | AlignEscapedNewlines: Left 9 | AlignOperands: true 10 | AlignTrailingComments: true 11 | AllowAllParametersOfDeclarationOnNextLine: true 12 | AllowShortBlocksOnASingleLine: true 13 | AllowShortCaseLabelsOnASingleLine: false 14 | AllowShortFunctionsOnASingleLine: Empty 15 | AllowShortIfStatementsOnASingleLine: true 16 | AllowShortLoopsOnASingleLine: true 17 | AlwaysBreakAfterDefinitionReturnType: None 18 | AlwaysBreakAfterReturnType: None 19 | AlwaysBreakBeforeMultilineStrings: true 20 | AlwaysBreakTemplateDeclarations: Yes 21 | BinPackArguments: true 22 | BinPackParameters: true 23 | BraceWrapping: 24 | AfterClass: false 25 | AfterControlStatement: false 26 | AfterEnum: false 27 | AfterFunction: false 28 | AfterNamespace: false 29 | AfterObjCDeclaration: false 30 | AfterStruct: false 31 | AfterUnion: false 32 | AfterExternBlock: false 33 | BeforeCatch: false 34 | BeforeElse: false 35 | IndentBraces: false 36 | SplitEmptyFunction: true 37 | SplitEmptyRecord: true 38 | SplitEmptyNamespace: true 39 | BreakBeforeBinaryOperators: None 40 | BreakBeforeBraces: Attach 41 | BreakBeforeInheritanceComma: false 42 | BreakInheritanceList: BeforeComma 43 | BreakBeforeTernaryOperators: true 44 | BreakConstructorInitializersBeforeComma: true 45 | BreakConstructorInitializers: BeforeComma 46 | BreakAfterJavaFieldAnnotations: false 47 | BreakStringLiterals: true 48 | ColumnLimit: 80 49 | CommentPragmas: '^ IWYU pragma:' 50 | CompactNamespaces: false 51 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 52 | ConstructorInitializerIndentWidth: 4 53 | ContinuationIndentWidth: 4 54 | Cpp11BracedListStyle: true 55 | DerivePointerAlignment: false 56 | DisableFormat: false 57 | ExperimentalAutoDetectBinPacking: false 58 | FixNamespaceComments: true 59 | ForEachMacros: 60 | - foreach 61 | - Q_FOREACH 62 | - BOOST_FOREACH 63 | IncludeBlocks: Preserve 64 | IncludeCategories: 65 | - Regex: '^' 66 | Priority: 2 67 | - Regex: '^<.*\.h>' 68 | Priority: 1 69 | - Regex: '^<.*' 70 | Priority: 2 71 | - Regex: '.*' 72 | Priority: 3 73 | IncludeIsMainRegex: '([-_](test|unittest))?$' 74 | IndentCaseLabels: true 75 | IndentPPDirectives: None 76 | IndentWidth: 4 77 | IndentWrappedFunctionNames: false 78 | JavaScriptQuotes: Leave 79 | JavaScriptWrapImports: true 80 | KeepEmptyLinesAtTheStartOfBlocks: false 81 | MacroBlockBegin: '' 82 | MacroBlockEnd: '' 83 | MaxEmptyLinesToKeep: 1 84 | NamespaceIndentation: None 85 | ObjCBinPackProtocolList: Never 86 | ObjCBlockIndentWidth: 2 87 | ObjCSpaceAfterProperty: false 88 | ObjCSpaceBeforeProtocolList: true 89 | PenaltyBreakAssignment: 2 90 | PenaltyBreakBeforeFirstCallParameter: 1 91 | PenaltyBreakComment: 300 92 | PenaltyBreakFirstLessLess: 120 93 | PenaltyBreakString: 1000 94 | PenaltyBreakTemplateDeclaration: 10 95 | PenaltyExcessCharacter: 1000000 96 | PenaltyReturnTypeOnItsOwnLine: 200 97 | PointerAlignment: Left 98 | RawStringFormats: 99 | - Language: Cpp 100 | Delimiters: 101 | - cc 102 | - CC 103 | - cpp 104 | - Cpp 105 | - CPP 106 | - 'c++' 107 | - 'C++' 108 | CanonicalDelimiter: '' 109 | BasedOnStyle: google 110 | - Language: TextProto 111 | Delimiters: 112 | - pb 113 | - PB 114 | - proto 115 | - PROTO 116 | EnclosingFunctions: 117 | - EqualsProto 118 | - EquivToProto 119 | - PARSE_PARTIAL_TEXT_PROTO 120 | - PARSE_TEST_PROTO 121 | - PARSE_TEXT_PROTO 122 | - ParseTextOrDie 123 | - ParseTextProtoOrDie 124 | CanonicalDelimiter: '' 125 | BasedOnStyle: google 126 | ReflowComments: true 127 | SortIncludes: false 128 | SortUsingDeclarations: false 129 | SpaceAfterCStyleCast: false 130 | SpaceAfterTemplateKeyword: true 131 | SpaceBeforeAssignmentOperators: true 132 | SpaceBeforeCpp11BracedList: false 133 | SpaceBeforeCtorInitializerColon: true 134 | SpaceBeforeInheritanceColon: true 135 | SpaceBeforeParens: ControlStatements 136 | SpaceBeforeRangeBasedForLoopColon: true 137 | SpaceInEmptyParentheses: false 138 | SpacesBeforeTrailingComments: 2 139 | SpacesInAngles: false 140 | SpacesInContainerLiterals: true 141 | SpacesInCStyleCastParentheses: false 142 | SpacesInParentheses: false 143 | SpacesInSquareBrackets: false 144 | Standard: Auto 145 | StatementMacros: 146 | - Q_UNUSED 147 | - QT_REQUIRE_VERSION 148 | TabWidth: 8 149 | UseTab: Never 150 | ... 151 | 152 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | build* 3 | old -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "external/essentials"] 2 | path = external/essentials 3 | url = https://github.com/jermp/essentials.git 4 | [submodule "external/doctest"] 5 | path = external/doctest 6 | url = https://github.com/onqtam/doctest.git 7 | [submodule "external/cmd_line_parser"] 8 | path = external/cmd_line_parser 9 | url = https://github.com/jermp/cmd_line_parser.git 10 | [submodule "external/succinct"] 11 | path = external/succinct 12 | url = https://github.com/ot/succinct.git 13 | [submodule "external/sdsl-lite"] 14 | path = external/sdsl-lite 15 | url = https://github.com/simongog/sdsl-lite.git 16 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | project(MRS) # Mutable Rank/Select 3 | 4 | if (NOT CMAKE_BUILD_TYPE) 5 | set(CMAKE_BUILD_TYPE "Release") 6 | endif() 7 | 8 | MESSAGE(STATUS "CMAKE_BUILD_TYPE: " ${CMAKE_BUILD_TYPE}) 9 | 10 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) 11 | 12 | if (DISABLE_AVX) 13 | MESSAGE(STATUS "SIMD AVX disabled") 14 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDISABLE_AVX") 15 | endif() 16 | 17 | if (AVX512) 18 | MESSAGE(STATUS "SIMD AVX512 enabled") 19 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mbmi2 -mavx512f -mavx512bw -mavx512vl -mavx512vpopcntdq -DAVX512") 20 | endif() 21 | 22 | if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") 23 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") 24 | endif () 25 | 26 | if (UNIX) 27 | 28 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17") 29 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") 30 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") 31 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb") 32 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-missing-braces") 33 | 34 | if (USE_SANITIZERS) 35 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer") 36 | endif() 37 | 38 | endif() 39 | 40 | MESSAGE(STATUS "Compiling with ${CMAKE_CXX_FLAGS}") 41 | 42 | # For succinct 43 | find_package(Boost 1.42.0 COMPONENTS unit_test_framework iostreams system filesystem REQUIRED) 44 | include_directories(${Boost_INCLUDE_DIRS}) 45 | link_directories(${Boost_LIBRARY_DIRS}) 46 | 47 | include_directories(${MRS_SOURCE_DIR}/include) 48 | 49 | add_subdirectory(external) 50 | add_subdirectory(src) 51 | 52 | enable_testing() 53 | file(GLOB TEST_SOURCES test/test_*.cpp) 54 | foreach(TEST_SRC ${TEST_SOURCES}) 55 | get_filename_component (TEST_SRC_NAME ${TEST_SRC} NAME_WE) # without extension 56 | add_executable(${TEST_SRC_NAME} ${TEST_SRC}) 57 | add_test(${TEST_SRC_NAME} ${TEST_SRC_NAME}) 58 | endforeach(TEST_SRC) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright 2020 Giulio Ermanno Pibiri and Shunsuke Kanda 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included 13 | in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 | OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Rank/Select Queries over Mutable Bitmaps 2 | ======================================== 3 | 4 | A C++ library providing rank/select queries over mutable bitmaps. 5 | 6 | Given a *mutable* bitmap B[0..u) where n bits are set, the *rank/select problem* asks for a data structure built from B that supports: 7 | 8 | - rank(i) is the number of bits set in B[0..i], for 0 ≤ i < u. 9 | - select(i) is the position of the i-th bit set, for 0 ≤ i < n. 10 | - flip(i) toggles B[i], for 0 ≤ i < u. 11 | - access(i) = B[i], for 0 ≤ i < u. 12 | 13 | The input bitmap is partitioned into blocks and a tree index is built over them. 14 | The tree index implemented in the library is an optimized b-ary Segment-Tree with 15 | SIMD AVX2/AVX-512 instructions. 16 | You can test a block size of 256 or 512 bits, and various rank/select algorithms for the blocks such as broadword techniques, CPU intrinsics, and SIMD instructions. 17 | 18 | 19 | For a description and anlysis of all these data structures, see the paper [Rank/Select Queries over Mutable Bitmaps](https://www.sciencedirect.com/science/article/pii/S0306437921000235), by Giulio Ermanno Pibiri and Shunsuke Kanda, Information Systems (INFOSYS), 2021. 20 | 21 | Please cite this paper if you use the library. 22 | 23 | Compiling the code 24 | ------------------ 25 | 26 | The code is tested on Linux with `gcc` 7.4 and 9.2.1; on Mac 10.14 with `clang` 10.0.0 and 11.0.0. 27 | To build the code, [`CMake`](https://cmake.org/) is required. 28 | 29 | Clone the repository with 30 | 31 | git clone --recursive https://github.com/jermp/mutable_rank_select.git 32 | 33 | If you have cloned the repository without `--recursive`, you will need to perform the following commands before 34 | compiling: 35 | 36 | git submodule init 37 | git submodule update 38 | 39 | To compile the code for a release environment (see file `CMakeLists.txt` for the used compilation flags), it is sufficient to do the following: 40 | 41 | mkdir build 42 | cd build 43 | cmake .. 44 | make -j 45 | 46 | By default, SIMD AVX instructions are enabled (flag `-DDISABLE_AVX=Off`). If you want to 47 | disable them (although your compiler has proper support), you can compile with 48 | 49 | cmake .. -DDISABLE_AVX=On 50 | make -j 51 | 52 | The library also exploits the new AVX512 instruction set. If you have proper support, 53 | you can enable those instructions with 54 | 55 | cmake .. -DAVX512=On 56 | make -j 57 | 58 | For the best of performance, we recommend compiling with: 59 | 60 | cmake .. -DCMAKE_BUILD_TYPE=Release -DUSE_SANITIZERS=Off -DDISABLE_AVX=Off -DAVX512=On 61 | make -j 62 | 63 | For a testing environment, use the following instead: 64 | 65 | mkdir debug_build 66 | cd debug_build 67 | cmake .. -DCMAKE_BUILD_TYPE=Debug -DUSE_SANITIZERS=On 68 | make -j 69 | 70 | Benchmarks 71 | --------- 72 | 73 | To benchmark the running time of rank, select, and flip for the disired data structure, use the program `src/perf_mutable_bitmap`. Running the program without arguments will show what arguments are required. (See also the file `src/perf_mutable_bitmap.cpp` for a list of available data structure types.) 74 | 75 | Below we show some examples. 76 | 77 | - The command 78 | 79 | ``` 80 | ./perf_mutable_bitmap avx2_256_a rank 0.3 81 | ``` 82 | 83 | will benchmark the speed of rank queries for the b-ary Segment-Tree data structure with AVX2 instructions for the bitmap of density 30%, where the block size is 256. The bitmap is tested by varying the size from 29 to 232. The suffix `_a` indicates the type of rank algorithms for a small bitmap (See `include/types.hpp` for the details). 84 | 85 | - The command 86 | 87 | ``` 88 | ./perf_mutable_bitmap avx512_512_b select 0.5 89 | ``` 90 | 91 | will benchmark the speed of select queries for the b-ary Segment-Tree data structure with AVX-512 instructions for the bitmap of density 50%, where the block size is 512. 92 | 93 | - The command 94 | 95 | ``` 96 | ./perf_mutable_bitmap avx512_256_c flip 0.8 97 | ``` 98 | 99 | will benchmark the speed of flip queries for the b-ary Segment-Tree data structure with AVX-512 instructions for the bitmap of density 80%, where the block size is 256. 100 | 101 | Unit tests 102 | ----------- 103 | 104 | The unit tests are written using [doctest](https://github.com/onqtam/doctest). 105 | 106 | After compilation, it is advised 107 | to run the unit tests with: 108 | 109 | make test 110 | -------------------------------------------------------------------------------- /external/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(sdsl-lite EXCLUDE_FROM_ALL) 2 | add_subdirectory(succinct EXCLUDE_FROM_ALL) 3 | -------------------------------------------------------------------------------- /include/hft/README.md: -------------------------------------------------------------------------------- 1 | Hybrid Compact Fenwick Trees 2 | ---- 3 | 4 | Library implemented by Marchini and Vigna, available [here](https://github.com/pacman616/hybrid-fenwick-tree). 5 | 6 | The data structures are decribed in the paper *[Compact Fenwick trees for dynamic ranking and selection](https://arxiv.org/pdf/1904.12370.pdf)* by the same authors. -------------------------------------------------------------------------------- /include/hft/darray.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __DARRAY_HPP__ 2 | #define __DARRAY_HPP__ 3 | 4 | #include "common.hpp" 5 | #include 6 | #include 7 | #include 8 | 9 | namespace hft { 10 | 11 | /** 12 | * class DArray - Dinamically-allocated fixed-sized array with hugepages support 13 | * 14 | * This class is a wrapper of mmap. It supports hugepages [1], transparent 15 | * hugepags [2]. It guarantees to return page-aligned pointers. It has four 16 | * behaviors you can choose by defining their name: 17 | * - HFT_FORCEHUGE: every request allocs hugepages; 18 | * - HFT_FORCENOHUGE: every request allocs standard 4k pages (non-transparent) 19 | * - HFT_HUGE: every request of at least 2MB allocs hugepages while smaller use 20 | * 4k pages; 21 | * 22 | * By default (without defining any of the above) every request of at least 2MB 23 | * allocs 4k pages, but they are transparently defragmented into hugepages by 24 | * khugepaged thanks to a call to madvice. 25 | * 26 | * [1] https://www.kernel.org/doc/html/latest/admin-guide/mm/hugetlbpage.html 27 | * [2] https://www.kernel.org/doc/html/latest/admin-guide/mm/transhuge.html 28 | * 29 | */ 30 | template 31 | class DArray { 32 | public: 33 | static constexpr int PROT = PROT_READ | PROT_WRITE; 34 | static constexpr int FLAGS = MAP_PRIVATE | MAP_ANONYMOUS; 35 | 36 | private: 37 | size_t Size = 0; 38 | T* Buffer = nullptr; 39 | 40 | public: 41 | DArray() = default; 42 | 43 | explicit DArray(size_t length) : Size(length) { 44 | const size_t space = page_aligned(Size); 45 | if (space) { 46 | // #ifdef HFT_FORCE_HUGETLBPAGE 47 | // void *mem = mmap(nullptr, space, PROT, FLAGS | MAP_HUGETLB, 48 | // -1, 0); assert(mem != MAP_FAILED && "mmap failed"); 49 | // #elif HFT_DISABLE_TRANSHUGE 50 | // void *mem = mmap(nullptr, space, PROT, FLAGS, -1, 0); 51 | // assert(mem != MAP_FAILED && "mmap failed"); 52 | // #else 53 | void* mem = mmap(nullptr, space, PROT, FLAGS, -1, 0); 54 | assert(mem != MAP_FAILED && "mmap failed"); 55 | 56 | // int adv = madvise(mem, space, MADV_HUGEPAGE); 57 | // assert(adv == 0 && "madvise failed"); 58 | // #endif 59 | 60 | Buffer = static_cast(mem); 61 | } 62 | } 63 | 64 | DArray(DArray&& oth) 65 | : Size(std::exchange(oth.Size, 0)) 66 | , Buffer(std::exchange(oth.Buffer, nullptr)) {} 67 | 68 | DArray& operator=(DArray&& oth) { 69 | swap(*this, oth); 70 | return *this; 71 | } 72 | 73 | ~DArray() { 74 | if (Buffer != nullptr) { 75 | int result = munmap(Buffer, page_aligned(Size)); 76 | assert(result == 0 && "mmunmap failed"); 77 | } 78 | } 79 | 80 | friend void swap(DArray& first, DArray& second) noexcept { 81 | std::swap(first.Size, second.Size); 82 | std::swap(first.Buffer, second.Buffer); 83 | } 84 | 85 | inline T* get() const { 86 | return Buffer; 87 | } 88 | 89 | inline T& operator[](size_t i) const { 90 | return Buffer[i]; 91 | }; 92 | 93 | inline size_t size() const { 94 | return Size; 95 | } 96 | 97 | size_t bitCount() const { 98 | return sizeof(DArray) * 8 + page_aligned(Size) * 8; 99 | } 100 | 101 | private: 102 | static size_t page_aligned(size_t size) { 103 | #ifdef HFT_FORCE_HUGETLBPAGE 104 | return ((2 * 1024 * 1024 - 1) | (size * sizeof(T) - 1)) + 1; 105 | #else 106 | return ((4 * 1024 - 1) | (size * sizeof(T) - 1)) + 1; 107 | #endif 108 | } 109 | 110 | friend std::ostream& operator<<(std::ostream& os, const DArray& darray) { 111 | const uint64_t nsize = hton(static_cast(darray.Size)); 112 | os.write((char*)&nsize, sizeof(uint64_t)); 113 | 114 | for (size_t i = 0; i < darray.Size; ++i) { 115 | const T value = hton(darray[i]); 116 | os.write((char*)&value, sizeof(T)); 117 | } 118 | 119 | return os; 120 | } 121 | 122 | friend std::istream& operator>>(std::istream& is, DArray& darray) { 123 | uint64_t nsize; 124 | is.read((char*)&nsize, sizeof(uint64_t)); 125 | 126 | darray = DArray(ntoh(nsize)); 127 | 128 | for (size_t i = 0; i < darray.size(); ++i) { 129 | is.read((char*)&darray[i], sizeof(T)); 130 | darray[i] = ntoh(darray[i]); 131 | } 132 | 133 | return is; 134 | } 135 | }; // namespace hft 136 | 137 | } // namespace hft 138 | 139 | #endif // __DARRAY_HPP__ 140 | -------------------------------------------------------------------------------- /include/hft/fenwick.hpp: -------------------------------------------------------------------------------- 1 | #include "fenwick/fenwick_tree.hpp" 2 | 3 | #include "fenwick/fixedf.hpp" 4 | #include "fenwick/fixedl.hpp" 5 | 6 | #include "fenwick/typef.hpp" 7 | #include "fenwick/typel.hpp" 8 | 9 | #include "fenwick/bytef.hpp" 10 | #include "fenwick/bytel.hpp" 11 | 12 | #include "fenwick/bitf.hpp" 13 | #include "fenwick/bitl.hpp" 14 | 15 | #include "fenwick/hybrid.hpp" 16 | -------------------------------------------------------------------------------- /include/hft/fenwick/bitf.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __FENWICK_BITF_HPP__ 2 | #define __FENWICK_BITF_HPP__ 3 | #include "fenwick_tree.hpp" 4 | #include 5 | 6 | namespace hft::fenwick { 7 | 8 | /** 9 | * class BitF - bit compression and classical node layout. 10 | * @sequence: sequence of integers. 11 | * @size: number of elements. 12 | * @BOUND: maximum value that @sequence can store. 13 | * 14 | */ 15 | template class BitF : public FenwickTree { 16 | public: 17 | static constexpr size_t BOUNDSIZE = ceil_log2_plus1(BOUND); 18 | static constexpr size_t STARTING_OFFSET = 1; 19 | static constexpr size_t END_PADDING = 56; 20 | static_assert(BOUNDSIZE >= 1 && BOUNDSIZE <= 55, "Some nodes will span on multiple words"); 21 | 22 | protected: 23 | size_t Size; 24 | DArray Tree; 25 | 26 | public: 27 | BitF(uint64_t sequence[], size_t size) 28 | : Size(size), Tree((first_bit_after(size) + END_PADDING + 7) >> 3) { 29 | for (size_t idx = 1; idx <= size; idx++) 30 | addToPartialFrequency(idx, sequence[idx - 1]); 31 | 32 | for (size_t m = 2; m <= size; m <<= 1) 33 | for (size_t idx = m; idx <= size; idx += m) 34 | addToPartialFrequency(idx, getPartialFrequency(idx - m / 2)); 35 | } 36 | 37 | virtual uint64_t prefix(size_t idx) const { 38 | uint64_t sum = 0; 39 | 40 | while (idx != 0) { 41 | sum += getPartialFrequency(idx); 42 | idx = clear_rho(idx); 43 | } 44 | 45 | return sum; 46 | } 47 | 48 | virtual void add(size_t idx, int64_t inc) { 49 | while (idx <= Size) { 50 | addToPartialFrequency(idx, inc); 51 | idx += mask_rho(idx); 52 | } 53 | } 54 | 55 | using FenwickTree::find; 56 | virtual size_t find(uint64_t *val) const { 57 | size_t node = 0; 58 | 59 | for (size_t m = mask_lambda(Size); m != 0; m >>= 1) { 60 | if (node + m > Size) 61 | continue; 62 | 63 | const uint64_t value = getPartialFrequency(node + m); 64 | 65 | if (*val >= value) { 66 | node += m; 67 | *val -= value; 68 | } 69 | } 70 | 71 | return node; 72 | } 73 | 74 | using FenwickTree::compFind; 75 | virtual size_t compFind(uint64_t *val) const { 76 | size_t node = 0; 77 | 78 | for (size_t m = mask_lambda(Size); m != 0; m >>= 1) { 79 | if (node + m > Size) 80 | continue; 81 | 82 | const int height = rho(node + m); 83 | const uint64_t value = (BOUND << height) - getPartialFrequency(node + m); 84 | 85 | if (*val >= value) { 86 | node += m; 87 | *val -= value; 88 | } 89 | } 90 | 91 | return node; 92 | } 93 | 94 | virtual size_t size() const { return Size; } 95 | 96 | virtual size_t bitCount() const { 97 | return sizeof(BitF) * 8 + Tree.bitCount() - sizeof(Tree); 98 | } 99 | 100 | private: 101 | // TODO: try the last micro-improvement (email 05/05/19 09:51) 102 | inline static size_t holes(size_t idx) { return STARTING_OFFSET + (idx >> 14) * 64; } 103 | 104 | inline static size_t first_bit_after(size_t idx) { 105 | return (BOUNDSIZE + 1) * idx - popcount(idx) + holes(idx); 106 | } 107 | 108 | inline uint64_t getPartialFrequency(size_t idx) const { 109 | const uint64_t mask = (UINT64_C(1) << (BOUNDSIZE + rho(idx))) - 1; 110 | idx--; 111 | const uint64_t prod = (BOUNDSIZE + 1) * idx; 112 | const uint64_t pos = prod - popcount(idx) + holes(idx); 113 | 114 | uint64_t t; 115 | if ((prod + (BOUNDSIZE + 1)) % 64 == 0) { 116 | memcpy(&t, (uint64_t *)&Tree[0] + pos / 64, 8); 117 | return t >> (pos % 64) & mask; 118 | } else { 119 | memcpy(&t, &Tree[0] + pos / 8, 8); 120 | return t >> (pos % 8) & mask; 121 | } 122 | } 123 | 124 | inline void addToPartialFrequency(size_t idx, uint64_t value) { 125 | idx--; 126 | const uint64_t prod = (BOUNDSIZE + 1) * idx; 127 | const uint64_t pos = prod - popcount(idx) + holes(idx); 128 | 129 | uint64_t t; 130 | if ((prod + (BOUNDSIZE + 1)) % 64 == 0) { 131 | uint64_t *const p = (uint64_t *)&Tree[0] + pos / 64; 132 | memcpy(&t, p, 8); 133 | t += value << (pos % 64); 134 | memcpy(p, &t, 8); 135 | } else { 136 | uint8_t *const p = &Tree[0] + pos / 8; 137 | memcpy(&t, p, 8); 138 | t += value << (pos % 8); 139 | memcpy(p, &t, 8); 140 | } 141 | } 142 | 143 | friend std::ostream &operator<<(std::ostream &os, const BitF &ft) { 144 | const uint64_t nsize = hton((uint64_t)ft.Size); 145 | os.write((char *)&nsize, sizeof(uint64_t)); 146 | 147 | return os << ft.Tree; 148 | } 149 | 150 | friend std::istream &operator>>(std::istream &is, BitF &ft) { 151 | uint64_t nsize; 152 | is.read((char *)(&nsize), sizeof(uint64_t)); 153 | ft.Size = ntoh(nsize); 154 | 155 | return is >> ft.Tree; 156 | } 157 | }; 158 | 159 | } // namespace hft::fenwick 160 | 161 | #endif // __FENWICK_BITF_HPP__ 162 | -------------------------------------------------------------------------------- /include/hft/fenwick/bitl.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __FENWICK_BITL_HPP__ 2 | #define __FENWICK_BITL_HPP__ 3 | 4 | #include "fenwick_tree.hpp" 5 | 6 | namespace hft::fenwick { 7 | 8 | /** 9 | * class BitL - bit compression and level ordered node layout. 10 | * @sequence: sequence of integers. 11 | * @size: number of elements. 12 | * @BOUND: maximum value that @sequence can store. 13 | * 14 | */ 15 | template class BitL : public FenwickTree { 16 | public: 17 | static constexpr size_t BOUNDSIZE = ceil_log2_plus1(BOUND); 18 | static_assert(BOUNDSIZE >= 1 && BOUNDSIZE <= 64, "Leaves can't be stored in a 64-bit word"); 19 | 20 | protected: 21 | size_t Size, Levels; 22 | unique_ptr Level; 23 | DArray Tree; 24 | 25 | public: 26 | BitL(uint64_t sequence[], size_t size) 27 | : Size(size), Levels(size != 0 ? lambda(size) + 2 : 1), Level(make_unique(Levels)) { 28 | Level[0] = 0; 29 | for (size_t i = 1; i < Levels; i++) 30 | Level[i] = ((size + (1ULL << (i - 1))) / (1ULL << i)) * (BOUNDSIZE - 1 + i) + Level[i - 1]; 31 | 32 | Tree = DArray((Level[Levels - 1] >> 3) + 8); // +8 for safety 33 | 34 | for (size_t l = 0; l < Levels - 1; l++) { 35 | for (size_t node = 1ULL << l; node <= size; node += 1ULL << (l + 1)) { 36 | size_t sequence_idx = node - 1; 37 | uint64_t value = sequence[sequence_idx]; 38 | 39 | for (size_t j = 0; j < l; j++) { 40 | sequence_idx >>= 1; 41 | const size_t lowpos = Level[j] + (BOUNDSIZE + j) * sequence_idx; 42 | value += bitread(&Tree[lowpos / 8], lowpos % 8, BOUNDSIZE + j); 43 | } 44 | 45 | const size_t highpos = Level[l] + (BOUNDSIZE + l) * (node >> (l + 1)); 46 | bitwrite_inc(&Tree[highpos / 8], highpos % 8, BOUNDSIZE + l, value); 47 | } 48 | } 49 | } 50 | 51 | virtual uint64_t prefix(size_t idx) const { 52 | uint64_t sum = 0; 53 | 54 | while (idx != 0) { 55 | const int height = rho(idx); 56 | const size_t pos = Level[height] + (idx >> (1 + height)) * (BOUNDSIZE + height); 57 | sum += bitread(&Tree[pos / 8], pos % 8, BOUNDSIZE + height); 58 | 59 | idx = clear_rho(idx); 60 | } 61 | 62 | return sum; 63 | } 64 | 65 | virtual void add(size_t idx, int64_t inc) { 66 | while (idx <= Size) { 67 | const int height = rho(idx); 68 | const size_t pos = Level[height] + (idx >> (1 + height)) * (BOUNDSIZE + height); 69 | bitwrite_inc(&Tree[pos / 8], pos % 8, BOUNDSIZE + height, inc); 70 | 71 | idx += mask_rho(idx); 72 | } 73 | } 74 | 75 | using FenwickTree::find; 76 | virtual size_t find(uint64_t *val) const { 77 | size_t node = 0, idx = 0; 78 | 79 | for (size_t height = Levels - 2; height != SIZE_MAX; height--) { 80 | const size_t pos = Level[height] + idx * (BOUNDSIZE + height); 81 | 82 | idx <<= 1; 83 | 84 | if (pos >= Level[height + 1]) 85 | continue; 86 | 87 | const uint64_t value = bitread(&Tree[pos / 8], pos % 8, BOUNDSIZE + height); 88 | 89 | if (*val >= value) { 90 | idx++; 91 | *val -= value; 92 | node += 1ULL << height; 93 | } 94 | } 95 | 96 | return min(node, Size); 97 | } 98 | 99 | using FenwickTree::compFind; 100 | virtual size_t compFind(uint64_t *val) const { 101 | size_t node = 0, idx = 0; 102 | 103 | for (size_t height = Levels - 2; height != SIZE_MAX; height--) { 104 | const size_t pos = Level[height] + idx * (BOUNDSIZE + height); 105 | 106 | idx <<= 1; 107 | 108 | if (pos >= Level[height + 1]) 109 | continue; 110 | 111 | const uint64_t value = 112 | (BOUND << height) - bitread(&Tree[pos / 8], pos % 8, BOUNDSIZE + height); 113 | 114 | if (*val >= value) { 115 | idx++; 116 | *val -= value; 117 | node += 1ULL << height; 118 | } 119 | } 120 | 121 | return min(node, Size); 122 | } 123 | 124 | virtual size_t size() const { return Size; } 125 | 126 | virtual size_t bitCount() const { 127 | return sizeof(BitL) * 8 + Tree.bitCount() - sizeof(Tree) + 128 | Levels * sizeof(size_t) * 8; 129 | } 130 | 131 | private: 132 | friend std::ostream &operator<<(std::ostream &os, const BitL &ft) { 133 | const uint64_t nsize = hton((uint64_t)ft.Size); 134 | os.write((char *)&nsize, sizeof(uint64_t)); 135 | 136 | const uint64_t nlevels = hton((uint64_t)ft.Levels); 137 | os.write((char *)&nlevels, sizeof(uint64_t)); 138 | 139 | for (size_t i = 0; i < ft.Levels; ++i) { 140 | const uint64_t nlevel = hton((uint64_t)ft.Level[i]); 141 | os.write((char *)&nlevel, sizeof(uint64_t)); 142 | } 143 | 144 | return os << ft.Tree; 145 | } 146 | 147 | friend std::istream &operator>>(std::istream &is, BitL &ft) { 148 | uint64_t nsize; 149 | is.read((char *)(&nsize), sizeof(uint64_t)); 150 | ft.Size = ntoh(nsize); 151 | 152 | uint64_t nlevels; 153 | is.read((char *)&nlevels, sizeof(uint64_t)); 154 | ft.Levels = ntoh(nlevels); 155 | 156 | ft.Level = make_unique(ft.Levels); 157 | for (size_t i = 0; i < ft.Levels; ++i) { 158 | uint64_t nlevel; 159 | is.read((char *)&nlevel, sizeof(uint64_t)); 160 | ft.Level[i] = ntoh(nlevel); 161 | } 162 | 163 | return is >> ft.Tree; 164 | } 165 | }; 166 | 167 | } // namespace hft::fenwick 168 | 169 | #endif // __FENWICK_BITL_HPP__ 170 | -------------------------------------------------------------------------------- /include/hft/fenwick/bytef.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __FENWICK_BYTEF_HPP__ 2 | #define __FENWICK_BYTEF_HPP__ 3 | 4 | #include "fenwick_tree.hpp" 5 | 6 | namespace hft::fenwick { 7 | 8 | /** 9 | * class ByteF - byte compression and classical node layout. 10 | * @sequence: sequence of integers. 11 | * @size: number of elements. 12 | * @BOUND: maximum value that @sequence can store. 13 | * 14 | */ 15 | template class ByteF : public FenwickTree { 16 | public: 17 | static constexpr size_t BOUNDSIZE = ceil_log2_plus1(BOUND); 18 | static_assert(BOUNDSIZE >= 1 && BOUNDSIZE <= 64, "Leaves can't be stored in a 64-bit word"); 19 | 20 | protected: 21 | size_t Size; 22 | DArray Tree; 23 | 24 | public: 25 | ByteF(uint64_t sequence[], size_t size) : Size(size), Tree(pos(size + 1) + 8) { 26 | for (size_t i = 1; i <= size; i++) 27 | bytewrite(&Tree[pos(i)], bytesize(i), sequence[i - 1]); 28 | 29 | for (size_t m = 2; m <= size; m <<= 1) { 30 | for (size_t idx = m; idx <= size; idx += m) { 31 | const uint64_t left = byteread(&Tree[pos(idx)], bytesize(idx)); 32 | const uint64_t right = byteread(&Tree[pos(idx - m / 2)], bytesize(idx - m / 2)); 33 | bytewrite(&Tree[pos(idx)], bytesize(idx), left + right); 34 | } 35 | } 36 | } 37 | 38 | virtual uint64_t prefix(size_t idx) const { 39 | uint64_t sum = 0; 40 | 41 | while (idx != 0) { 42 | sum += byteread(&Tree[pos(idx)], bytesize(idx)); 43 | idx = clear_rho(idx); 44 | } 45 | 46 | return sum; 47 | } 48 | 49 | virtual void add(size_t idx, int64_t inc) { 50 | while (idx <= Size) { 51 | bytewrite_inc(&Tree[pos(idx)], inc); 52 | idx += mask_rho(idx); 53 | } 54 | } 55 | 56 | using FenwickTree::find; 57 | virtual size_t find(uint64_t *val) const { 58 | size_t node = 0; 59 | 60 | for (size_t m = mask_lambda(Size); m != 0; m >>= 1) { 61 | if (node + m > Size) 62 | continue; 63 | 64 | const uint64_t value = byteread(&Tree[pos(node + m)], bytesize(node + m)); 65 | 66 | if (*val >= value) { 67 | node += m; 68 | *val -= value; 69 | } 70 | } 71 | 72 | return node; 73 | } 74 | 75 | using FenwickTree::compFind; 76 | virtual size_t compFind(uint64_t *val) const { 77 | size_t node = 0; 78 | 79 | for (size_t m = mask_lambda(Size); m != 0; m >>= 1) { 80 | if (node + m > Size) 81 | continue; 82 | 83 | const uint64_t value = 84 | (BOUND << rho(node + m)) - byteread(&Tree[pos(node + m)], bytesize(node + m)); 85 | 86 | if (*val >= value) { 87 | node += m; 88 | *val -= value; 89 | } 90 | } 91 | 92 | return node; 93 | } 94 | 95 | virtual size_t size() const { return Size; } 96 | 97 | virtual size_t bitCount() const { 98 | return sizeof(ByteF) * 8 + Tree.bitCount() - sizeof(Tree); 99 | } 100 | 101 | private: 102 | static inline size_t bytesize(size_t idx) { return ((rho(idx) + BOUNDSIZE - 1) >> 3) + 1; } 103 | 104 | static inline size_t holes(size_t idx) { 105 | // Exhaustive benchmarking shows it is better to use no holes on (relatively) small trees, 106 | // but we expect holes to be handy again in (very) big trees 107 | if (BOUNDSIZE >= 32) 108 | return 0; 109 | 110 | #ifdef HFT_DISABLE_TRANSHUGE 111 | return (idx >> (18 + (64 - BOUNDSIZE) % 8)); 112 | #else 113 | return (idx >> (28 + (64 - BOUNDSIZE) % 8)); 114 | #endif 115 | } 116 | 117 | static inline size_t pos(size_t idx) { 118 | idx--; 119 | constexpr size_t NEXTBYTE = ((BOUNDSIZE - 1) | (8 - 1)) + 1; 120 | 121 | constexpr size_t SMALL = ((BOUNDSIZE - 1) >> 3) + 1; 122 | constexpr size_t MEDIUM = NEXTBYTE - BOUNDSIZE + 1; 123 | constexpr size_t LARGE = MEDIUM + 8; 124 | 125 | constexpr size_t MULTIPLIER = 8 - SMALL - 1; 126 | 127 | return idx * SMALL + (idx >> MEDIUM) + (idx >> LARGE) * MULTIPLIER + holes(idx); 128 | } 129 | 130 | friend std::ostream &operator<<(std::ostream &os, const ByteF &ft) { 131 | uint64_t nsize = hton((uint64_t)ft.Size); 132 | os.write((char *)&nsize, sizeof(uint64_t)); 133 | 134 | return os << ft.Tree; 135 | } 136 | 137 | friend std::istream &operator>>(std::istream &is, ByteF &ft) { 138 | uint64_t nsize; 139 | is.read((char *)(&nsize), sizeof(uint64_t)); 140 | ft.Size = ntoh(nsize); 141 | 142 | return is >> ft.Tree; 143 | } 144 | }; 145 | 146 | } // namespace hft::fenwick 147 | 148 | #endif // __FENWICK_BYTEF_HPP__ 149 | -------------------------------------------------------------------------------- /include/hft/fenwick/bytel.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __FENWICK_BYTEL_HPP__ 2 | #define __FENWICK_BYTEL_HPP__ 3 | 4 | #include "fenwick_tree.hpp" 5 | 6 | namespace hft::fenwick { 7 | 8 | /** 9 | * class ByteL - byte compression and level ordered node layout. 10 | * @sequence: sequence of integers. 11 | * @size: number of elements. 12 | * @BOUND: maximum value that @sequence can store. 13 | * 14 | */ 15 | template class ByteL : public FenwickTree { 16 | public: 17 | static constexpr size_t BOUNDSIZE = ceil_log2_plus1(BOUND); 18 | static_assert(BOUNDSIZE >= 1 && BOUNDSIZE <= 64, "Leaves can't be stored in a 64-bit word"); 19 | 20 | protected: 21 | size_t Size, Levels; 22 | unique_ptr Level; 23 | DArray Tree; 24 | 25 | public: 26 | ByteL(uint64_t sequence[], size_t size) 27 | : Size(size), Levels(size != 0 ? lambda(size) + 2 : 1), Level(make_unique(Levels)) { 28 | Level[0] = 0; 29 | for (size_t i = 1; i < Levels; i++) 30 | Level[i] = ((size + (1ULL << (i - 1))) / (1ULL << i)) * heightsize(i - 1) + Level[i - 1]; 31 | 32 | Tree = DArray(Level[Levels - 1] + 8); // +8 for safety 33 | 34 | for (size_t l = 0; l < Levels - 1; l++) { 35 | for (size_t node = 1ULL << l; node <= Size; node += 1ULL << (l + 1)) { 36 | size_t sequence_idx = node - 1; 37 | uint64_t value = sequence[sequence_idx]; 38 | for (size_t j = 0; j < l; j++) { 39 | sequence_idx >>= 1; 40 | const size_t lowpos = Level[j] + heightsize(j) * sequence_idx; 41 | const uint64_t low = byteread(&Tree[lowpos], heightsize(j)); 42 | 43 | value += low; 44 | } 45 | 46 | const size_t highpos = Level[l] + heightsize(l) * (node >> (l + 1)); 47 | bytewrite(&Tree[highpos], heightsize(l), value); 48 | } 49 | } 50 | } 51 | 52 | virtual uint64_t prefix(size_t idx) const { 53 | uint64_t sum = 0; 54 | 55 | while (idx != 0) { 56 | const int height = rho(idx); 57 | const size_t isize = heightsize(height); 58 | const size_t pos = Level[height] + (idx >> (1 + height)) * isize; 59 | 60 | sum += byteread(&Tree[pos], isize); 61 | idx = clear_rho(idx); 62 | } 63 | 64 | return sum; 65 | } 66 | 67 | virtual void add(size_t idx, int64_t inc) { 68 | while (idx <= Size) { 69 | const int height = rho(idx); 70 | const size_t isize = heightsize(height); 71 | const size_t pos = Level[height] + (idx >> (1 + height)) * isize; 72 | 73 | bytewrite_inc(&Tree[pos], inc); 74 | idx += mask_rho(idx); 75 | } 76 | } 77 | 78 | using FenwickTree::find; 79 | virtual size_t find(uint64_t *val) const { 80 | size_t node = 0, idx = 0; 81 | 82 | for (int height = Levels - 2; height >= 0; --height) { 83 | const size_t isize = heightsize(height); 84 | const size_t pos = Level[height] + idx * heightsize(height); 85 | 86 | idx <<= 1; 87 | 88 | if (pos >= Level[height + 1]) 89 | continue; 90 | 91 | const uint64_t value = byteread(&Tree[pos], isize); 92 | 93 | if (*val >= value) { 94 | idx++; 95 | *val -= value; 96 | node += 1ULL << height; 97 | } 98 | } 99 | 100 | return min(node, Size); 101 | } 102 | 103 | using FenwickTree::compFind; 104 | virtual size_t compFind(uint64_t *val) const { 105 | size_t node = 0, idx = 0; 106 | 107 | for (size_t height = Levels - 2; height != SIZE_MAX; height--) { 108 | const size_t isize = heightsize(height); 109 | const size_t pos = Level[height] + idx * heightsize(height); 110 | 111 | idx <<= 1; 112 | 113 | if (pos >= Level[height + 1]) 114 | continue; 115 | 116 | const uint64_t value = (BOUND << height) - byteread(&Tree[pos], isize); 117 | 118 | if (*val >= value) { 119 | idx++; 120 | *val -= value; 121 | node += 1ULL << height; 122 | } 123 | } 124 | 125 | return min(node, Size); 126 | } 127 | 128 | virtual size_t size() const { return Size; } 129 | 130 | virtual size_t bitCount() const { 131 | return sizeof(ByteL) * 8 + Tree.bitCount() - sizeof(Tree) + 132 | Levels * sizeof(size_t) * 8; 133 | } 134 | 135 | private: 136 | static inline size_t heightsize(size_t height) { return ((height + BOUNDSIZE - 1) >> 3) + 1; } 137 | 138 | friend std::ostream &operator<<(std::ostream &os, const ByteL &ft) { 139 | const uint64_t nsize = hton((uint64_t)ft.Size); 140 | os.write((char *)&nsize, sizeof(uint64_t)); 141 | 142 | const uint64_t nlevels = hton((uint64_t)ft.Levels); 143 | os.write((char *)&nlevels, sizeof(uint64_t)); 144 | 145 | for (size_t i = 0; i < ft.Levels; ++i) { 146 | const uint64_t nlevel = hton((uint64_t)ft.Level[i]); 147 | os.write((char *)&nlevel, sizeof(uint64_t)); 148 | } 149 | 150 | return os << ft.Tree; 151 | } 152 | 153 | friend std::istream &operator>>(std::istream &is, ByteL &ft) { 154 | uint64_t nsize; 155 | is.read((char *)(&nsize), sizeof(uint64_t)); 156 | ft.Size = ntoh(nsize); 157 | 158 | uint64_t nlevels; 159 | is.read((char *)&nlevels, sizeof(uint64_t)); 160 | ft.Levels = ntoh(nlevels); 161 | 162 | ft.Level = make_unique(ft.Levels); 163 | for (size_t i = 0; i < ft.Levels; ++i) { 164 | uint64_t nlevel; 165 | is.read((char *)&nlevel, sizeof(uint64_t)); 166 | ft.Level[i] = ntoh(nlevel); 167 | } 168 | 169 | return is >> ft.Tree; 170 | } 171 | }; 172 | 173 | } // namespace hft::fenwick 174 | 175 | #endif // __FENWICK_BYTEL_HPP__ 176 | -------------------------------------------------------------------------------- /include/hft/fenwick/fenwick_tree.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __FENWICK_FENWICKTREE_HPP__ 2 | #define __FENWICK_FENWICKTREE_HPP__ 3 | 4 | #include "../common.hpp" 5 | #include "../darray.hpp" 6 | 7 | namespace hft::fenwick { 8 | 9 | /** 10 | * FenwickTree - Fenwick Tree data structure interface. 11 | * @sequence: An integer vector. 12 | * @size: The length of the sequence. 13 | * @BOUND: maximum value that @sequence can store. 14 | * 15 | * This data structure indices starts from 1 and ends in @size. 16 | * 17 | */ 18 | class FenwickTree { 19 | public: 20 | virtual ~FenwickTree() = default; 21 | 22 | /** 23 | * prefix() - Compute the prefix sum. 24 | * @idx: Length of the prefix sum. 25 | * 26 | * Sum the elements in the range (0 .. @idx], returns zero when @idx 27 | * is zero. 28 | * 29 | */ 30 | virtual uint64_t prefix(size_t idx) const = 0; 31 | 32 | /** 33 | * add() - Increment an element of the sequence (not the tree). 34 | * @idx: Index (starting from 1) of the element. 35 | * @inc: Value to sum. 36 | * 37 | * You are allowed to use negative values for the increment, but keep in mind you should respect 38 | * the structure boundaries. 39 | * 40 | */ 41 | virtual void add(size_t idx, int64_t inc) = 0; 42 | 43 | /** 44 | * find() - Search the index of the closest (less or equal than) prefix. 45 | * @val: Prefix to search. 46 | * 47 | * If @val is an l-value reference its value will be changed with the distance between the found 48 | * and the searched prefix (i.e. the difference between the prefix and @val). 49 | * 50 | * This method returns zero if such an element doesn't exists (i.e. there are no prefixes that are 51 | * greater or equal to @val). 52 | * 53 | */ 54 | virtual size_t find(uint64_t *val) const = 0; 55 | size_t find(uint64_t val) const { return find(&val); } 56 | 57 | /** 58 | * compFind() - Complement find. 59 | * @val: Prefix to search. 60 | * 61 | * This method search the index whose its prefix its the closest to MAXVAL-@val. MAXVAL is the 62 | * maximum possibile value for such a prefix (@sequence is therefore bounded). 63 | * 64 | * The same considerations made for FenwickTree::find() holds. 65 | * 66 | */ 67 | virtual size_t compFind(uint64_t *val) const = 0; 68 | size_t compFind(uint64_t val) const { return compFind(&val); } 69 | 70 | /** 71 | * size() - Returns the length of the sequence. 72 | * 73 | */ 74 | virtual size_t size() const = 0; 75 | 76 | /** 77 | * bitCount() - Estimation of the size (in bits) of this structure. 78 | * 79 | */ 80 | virtual size_t bitCount() const = 0; 81 | 82 | /** 83 | * Each FenwickTree is serializable and deserializable with: 84 | * - friend std::ostream &operator<<(std::ostream &os, const FenwickTree &ft); 85 | * - friend std::istream &operator>>(std::istream &is, FenwickTree &ft); 86 | * 87 | * The data is stored and loaded with the network (big-endian) byte order to guarantee 88 | * compatibility on different architectures. 89 | * 90 | * The serialized data follows the compression and node ordering of the specific Fenwick tree 91 | * without any compatibility layer (e.g. if you serialize a FixedF, you cannot deserialize the 92 | * very same data with a ByteL). 93 | * 94 | */ 95 | }; 96 | 97 | } // namespace hft::fenwick 98 | 99 | #endif // __FENWICK_FENWICKTREE_HPP__ 100 | -------------------------------------------------------------------------------- /include/hft/fenwick/fixedf.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __FENWICK_FIXED_HPP__ 2 | #define __FENWICK_FIXED_HPP__ 3 | 4 | #include "fenwick_tree.hpp" 5 | 6 | namespace hft::fenwick { 7 | 8 | /** 9 | * class FixedF - no compression and classical node layout. 10 | * @sequence: sequence of integers. 11 | * @size: number of elements. 12 | * @BOUND: maximum value that @sequence can store. 13 | * 14 | */ 15 | template class FixedF : public FenwickTree { 16 | public: 17 | static constexpr size_t BOUNDSIZE = ceil_log2_plus1(BOUND); 18 | static_assert(BOUNDSIZE >= 1 && BOUNDSIZE <= 64, "Leaves can't be stored in a 64-bit word"); 19 | 20 | protected: 21 | size_t Size; 22 | DArray Tree; 23 | 24 | public: 25 | FixedF(uint64_t sequence[], size_t size) : Size(size), Tree(pos(size) + 1) { 26 | for (size_t j = 1; j <= size; j++) 27 | Tree[pos(j)] = sequence[j - 1]; 28 | 29 | for (size_t m = 2; m <= size; m <<= 1) { 30 | for (size_t idx = m; idx <= size; idx += m) 31 | Tree[pos(idx)] += Tree[pos(idx - m / 2)]; 32 | } 33 | } 34 | 35 | virtual uint64_t prefix(size_t idx) const { 36 | uint64_t sum = 0; 37 | 38 | while (idx != 0) { 39 | sum += Tree[pos(idx)]; 40 | idx = clear_rho(idx); 41 | } 42 | 43 | return sum; 44 | } 45 | 46 | virtual void add(size_t idx, int64_t inc) { 47 | while (idx <= Size) { 48 | Tree[pos(idx)] += inc; 49 | idx += mask_rho(idx); 50 | } 51 | } 52 | 53 | using FenwickTree::find; 54 | virtual size_t find(uint64_t *val) const { 55 | size_t node = 0; 56 | 57 | for (size_t m = mask_lambda(Size); m != 0; m >>= 1) { 58 | if (node + m > Size) 59 | continue; 60 | 61 | uint64_t value = Tree[pos(node + m)]; 62 | 63 | if (*val >= value) { 64 | node += m; 65 | *val -= value; 66 | } 67 | } 68 | 69 | return node; 70 | } 71 | 72 | using FenwickTree::compFind; 73 | virtual size_t compFind(uint64_t *val) const { 74 | size_t node = 0; 75 | 76 | for (size_t m = mask_lambda(Size); m != 0; m >>= 1) { 77 | if (node + m > Size) 78 | continue; 79 | 80 | uint64_t value = (BOUND << rho(node + m)) - Tree[pos(node + m)]; 81 | 82 | if (*val >= value) { 83 | node += m; 84 | *val -= value; 85 | } 86 | } 87 | 88 | return node; 89 | } 90 | 91 | virtual size_t size() const { return Size; } 92 | 93 | virtual size_t bitCount() const { 94 | return sizeof(FixedF) * 8 + Tree.bitCount() - sizeof(Tree); 95 | } 96 | 97 | private: 98 | static inline size_t holes(size_t idx) { return idx >> 14; } 99 | 100 | static inline size_t pos(size_t idx) { return idx + holes(idx); } 101 | 102 | friend std::ostream &operator<<(std::ostream &os, const FixedF &ft) { 103 | uint64_t nsize = hton((uint64_t)ft.Size); 104 | os.write((char *)&nsize, sizeof(uint64_t)); 105 | 106 | return os << ft.Tree; 107 | } 108 | 109 | friend std::istream &operator>>(std::istream &is, FixedF &ft) { 110 | uint64_t nsize; 111 | is.read((char *)(&nsize), sizeof(uint64_t)); 112 | 113 | ft.Size = ntoh(nsize); 114 | return is >> ft.Tree; 115 | } 116 | }; 117 | 118 | } // namespace hft::fenwick 119 | 120 | #endif // __FENWICK_FIXED_HPP__ 121 | -------------------------------------------------------------------------------- /include/hft/fenwick/fixedl.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __FENWICK_FIXEDL_HPP__ 2 | #define __FENWICK_FIXEDL_HPP__ 3 | 4 | #include "fenwick_tree.hpp" 5 | 6 | namespace hft::fenwick { 7 | 8 | /** 9 | * class FixedL - no compression and level-ordered node layout. 10 | * @sequence: sequence of integers. 11 | * @size: number of elements. 12 | * @BOUND: maximum value that @sequence can store. 13 | * 14 | */ 15 | template class FixedL : public FenwickTree { 16 | public: 17 | static constexpr size_t BOUNDSIZE = ceil_log2_plus1(BOUND); 18 | static_assert(BOUNDSIZE >= 1 && BOUNDSIZE <= 64, "Leaves can't be stored in a 64-bit word"); 19 | 20 | protected: 21 | size_t Size, Levels; 22 | unique_ptr Level; 23 | DArray Tree; 24 | 25 | public: 26 | FixedL(uint64_t sequence[], size_t size) 27 | : Size(size), Levels(size != 0 ? lambda(size) + 2 : 1), Level(make_unique(Levels)) { 28 | Level[0] = 0; 29 | for (size_t i = 1; i < Levels; i++) 30 | Level[i] = ((size + (1ULL << (i - 1))) / (1ULL << i)) + Level[i - 1]; 31 | 32 | Tree = DArray(Level[Levels - 1]); 33 | 34 | for (size_t l = 0; l < Levels - 1; l++) { 35 | for (size_t node = 1ULL << l; node <= size; node += 1ULL << (l + 1)) { 36 | size_t sequence_idx = node - 1; 37 | uint64_t value = sequence[sequence_idx]; 38 | for (size_t j = 0; j < l; j++) { 39 | sequence_idx >>= 1; 40 | value += Tree[Level[j] + sequence_idx]; 41 | } 42 | 43 | Tree[Level[l] + (node >> (l + 1))] = value; 44 | } 45 | } 46 | } 47 | 48 | virtual uint64_t prefix(size_t idx) const { 49 | uint64_t sum = 0; 50 | 51 | while (idx != 0) { 52 | const int height = rho(idx); 53 | size_t level_idx = idx >> (1 + height); 54 | sum += Tree[Level[height] + level_idx]; 55 | 56 | idx = clear_rho(idx); 57 | } 58 | 59 | return sum; 60 | } 61 | 62 | virtual void add(size_t idx, int64_t inc) { 63 | while (idx <= Size) { 64 | const int height = rho(idx); 65 | size_t level_idx = idx >> (1 + height); 66 | Tree[Level[height] + level_idx] += inc; 67 | 68 | idx += mask_rho(idx); 69 | } 70 | } 71 | 72 | using FenwickTree::find; 73 | virtual size_t find(uint64_t *val) const { 74 | size_t node = 0, idx = 0; 75 | 76 | for (size_t height = Levels - 2; height != SIZE_MAX; height--) { 77 | const size_t pos = Level[height] + idx; 78 | 79 | idx <<= 1; 80 | 81 | if (pos >= Level[height + 1]) 82 | continue; 83 | 84 | uint64_t value = Tree[pos]; 85 | if (*val >= value) { 86 | idx++; 87 | *val -= value; 88 | node += 1ULL << height; 89 | } 90 | } 91 | 92 | return min(node, Size); 93 | } 94 | 95 | using FenwickTree::compFind; 96 | virtual size_t compFind(uint64_t *val) const { 97 | size_t node = 0, idx = 0; 98 | 99 | for (size_t height = Levels - 2; height != SIZE_MAX; height--) { 100 | const size_t pos = Level[height] + idx; 101 | 102 | idx <<= 1; 103 | 104 | if (pos >= Level[height + 1]) 105 | continue; 106 | 107 | uint64_t value = (BOUND << height) - Tree[pos]; 108 | if (*val >= value) { 109 | idx++; 110 | *val -= value; 111 | node += 1ULL << height; 112 | } 113 | } 114 | 115 | return min(node, Size); 116 | } 117 | 118 | virtual size_t size() const { return Size; } 119 | 120 | virtual size_t bitCount() const { 121 | return sizeof(FixedL) * 8 + Tree.bitCount() - sizeof(Tree) + 122 | Levels * sizeof(size_t) * 8; 123 | } 124 | 125 | private: 126 | friend std::ostream &operator<<(std::ostream &os, const FixedL &ft) { 127 | const uint64_t nsize = hton((uint64_t)ft.Size); 128 | os.write((char *)&nsize, sizeof(uint64_t)); 129 | 130 | const uint64_t nlevels = hton((uint64_t)ft.Levels); 131 | os.write((char *)&nlevels, sizeof(uint64_t)); 132 | 133 | for (size_t i = 0; i < ft.Levels; ++i) { 134 | const uint64_t nlevel = hton((uint64_t)ft.Level[i]); 135 | os.write((char *)&nlevel, sizeof(uint64_t)); 136 | } 137 | 138 | return os << ft.Tree; 139 | } 140 | 141 | friend std::istream &operator>>(std::istream &is, FixedL &ft) { 142 | uint64_t nsize; 143 | is.read((char *)(&nsize), sizeof(uint64_t)); 144 | ft.Size = ntoh(nsize); 145 | 146 | uint64_t nlevels; 147 | is.read((char *)&nlevels, sizeof(uint64_t)); 148 | ft.Levels = ntoh(nlevels); 149 | 150 | ft.Level = make_unique(ft.Levels); 151 | for (size_t i = 0; i < ft.Levels; ++i) { 152 | uint64_t nlevel; 153 | is.read((char *)&nlevel, sizeof(uint64_t)); 154 | ft.Level[i] = ntoh(nlevel); 155 | } 156 | 157 | return is >> ft.Tree; 158 | } 159 | }; 160 | 161 | } // namespace hft::fenwick 162 | 163 | #endif // __FENWICK_FIXEDL_HPP__ 164 | -------------------------------------------------------------------------------- /include/hft/fenwick/hybrid.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __FENWICK_HYBRID_HPP__ 2 | #define __FENWICK_HYBRID_HPP__ 3 | 4 | #include "fenwick_tree.hpp" 5 | #include 6 | 7 | namespace hft::fenwick { 8 | 9 | template