├── .gitignore ├── vcpkg.json ├── cmake ├── BTreeConfig.cmake.in └── modules │ └── FindVersionHeader.cmake ├── version.h.in ├── test ├── fc_catch2.h ├── rwtest.cpp ├── perftest_no_simd.cpp ├── perftest_string.cpp ├── perftest.cpp ├── test_statistics.cpp ├── test_statistics.h └── unittest.cpp ├── include └── fc │ ├── details.h │ ├── mmfile.h │ ├── disk_btree.h │ ├── mmfile_nix.h │ ├── disk_fixed_alloc.h │ ├── comp.h │ ├── mmfile_win.h │ └── btree.h ├── .clang-format ├── .github └── workflows │ └── action-cpp.yml ├── CMakeLists.txt ├── LICENSE.md └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | /cmake-build-*/ 2 | /.idea/ 3 | /.vscode/ 4 | /vcpkg/ 5 | CMakeLists.txt.user 6 | vcpkg 7 | VERSION.txt 8 | -------------------------------------------------------------------------------- /vcpkg.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "btree", 3 | "version-string": "1.0.0", 4 | "license": "Apache-2.0", 5 | "dependencies": [ 6 | "catch2" 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /cmake/BTreeConfig.cmake.in: -------------------------------------------------------------------------------- 1 | @PACKAGE_INIT@ 2 | 3 | include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake") 4 | check_required_components("@PROJECT_NAME@") -------------------------------------------------------------------------------- /version.h.in: -------------------------------------------------------------------------------- 1 | #ifndef VERSION_H_IN 2 | #define VERSION_H_IN 3 | 4 | #cmakedefine MAJOR "@MAJOR@" 5 | #cmakedefine MINOR "@MINOR@" 6 | #cmakedefine PATCH "@PATCH@" 7 | #cmakedefine COMMITTER_FULLSHA "@COMMITTER_FULLSHA@" 8 | #cmakedefine COMMITTER_SHORTSHA "@COMMITTER_SHORTSHA@" 9 | #cmakedefine COMMITTER_DATE "@COMMITTER_DATE@" 10 | 11 | #endif // VERSION_H_IN 12 | -------------------------------------------------------------------------------- /test/fc_catch2.h: -------------------------------------------------------------------------------- 1 | #ifndef FC_CATCH2_H 2 | #define FC_CATCH2_H 3 | 4 | #ifndef CATCH_CONFIG_ENABLE_BENCHMARKING 5 | #define CATCH_CONFIG_ENABLE_BENCHMARKING 6 | #endif // CATCH_CONFIG_ENABLE_BENCHMARKING 7 | 8 | #ifdef CATCH2_OLD 9 | #include 10 | #else 11 | #include 12 | #include 13 | #endif 14 | 15 | #endif // FC_CATCH2_H 16 | -------------------------------------------------------------------------------- /include/fc/details.h: -------------------------------------------------------------------------------- 1 | #ifndef FC_DETAILS_H 2 | #define FC_DETAILS_H 3 | 4 | #include 5 | #include 6 | 7 | namespace frozenca { 8 | 9 | template 10 | concept Containable = std::is_same_v, T>; 11 | 12 | template 13 | concept DiskAllocable = 14 | std::is_same_v, T> && 15 | std::is_trivially_copyable_v && (sizeof(T) % alignof(T) == 0); 16 | 17 | using attr_t = std::int32_t; 18 | 19 | } // namespace frozenca 20 | #endif // FC_DETAILS_H 21 | -------------------------------------------------------------------------------- /test/rwtest.cpp: -------------------------------------------------------------------------------- 1 | #define _CONTROL_IN_TEST 2 | 3 | #include "fc_catch2.h" 4 | #include 5 | #include 6 | 7 | #include "fc/btree.h" 8 | 9 | TEST_CASE("rw-test") { 10 | namespace fc = frozenca; 11 | fc::BTreeSet btree_out; 12 | 13 | constexpr int n = 100; 14 | 15 | for (int i = 0; i < n; ++i) { 16 | REQUIRE_NOTHROW(btree_out.insert(i)); 17 | } 18 | { 19 | std::ofstream ofs{"btree.bin", std::ios_base::out | std::ios_base::binary | 20 | std::ios_base::trunc}; 21 | ofs << btree_out; 22 | } 23 | 24 | fc::BTreeSet btree_in; 25 | { 26 | std::ifstream ifs{"btree.bin", std::ios_base::in | std::ios_base::binary}; 27 | ifs >> btree_in; 28 | } 29 | 30 | for (int i = 0; i < n; ++i) { 31 | REQUIRE(btree_in.contains(i)); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /cmake/modules/FindVersionHeader.cmake: -------------------------------------------------------------------------------- 1 | find_package(Git) 2 | 3 | if (NOT DEFINED COMMITTER_FULLSHA) 4 | execute_process( 5 | COMMAND ${GIT_EXECUTABLE} log -n 1 --pretty=format:%H 6 | OUTPUT_VARIABLE COMMITTER_FULLSHA 7 | OUTPUT_STRIP_TRAILING_WHITESPACE 8 | WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} 9 | ) 10 | endif () 11 | 12 | if (NOT DEFINED COMMITTER_SHORTSHA) 13 | execute_process( 14 | COMMAND ${GIT_EXECUTABLE} log -n 1 --pretty=format:%h 15 | OUTPUT_VARIABLE COMMITTER_SHORTSHA 16 | OUTPUT_STRIP_TRAILING_WHITESPACE 17 | WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} 18 | ) 19 | endif () 20 | 21 | if (NOT DEFINED COMMITTER_DATE) 22 | execute_process( 23 | COMMAND ${GIT_EXECUTABLE} log -n 1 --pretty=format:%ci 24 | OUTPUT_VARIABLE COMMITTER_DATE 25 | OUTPUT_STRIP_TRAILING_WHITESPACE 26 | WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} 27 | ) 28 | endif () 29 | configure_file(${CMAKE_SOURCE_DIR}/version.h.in ${CMAKE_BINARY_DIR}/version.h @ONLY) 30 | -------------------------------------------------------------------------------- /test/perftest_no_simd.cpp: -------------------------------------------------------------------------------- 1 | #if defined(FC_USE_SIMD) 2 | #undef FC_USE_SIMD 3 | #define FC_USE_SIMD 0 4 | #endif // FC_USE_SIMD 5 | 6 | #include "fc_catch2.h" 7 | #include "fc/disk_btree.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "test_statistics.h" 13 | 14 | TEST_CASE("perftest-no-simd") { 15 | namespace fc = frozenca; 16 | std::unordered_map result; 17 | 18 | auto generate_values = []() { 19 | std::vector::value_type> v(1'000'000); 20 | std::iota(v.begin(), v.end(), 0); 21 | return v; 22 | }; 23 | 24 | BENCHMARK("frozenca::BTreeSet test (don't use SIMD)") { 25 | fc::BTreeSet btree; 26 | result.emplace("BTreeSet(no-simd)", tree_perf_test(btree, generate_values())); 27 | }; 28 | 29 | for (const auto &[key, value] : result) { 30 | INFO("----------------"); 31 | INFO(key); 32 | std::stringstream ss; 33 | value.print_stats(ss); 34 | INFO(ss.str()); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /test/perftest_string.cpp: -------------------------------------------------------------------------------- 1 | #if defined(FC_USE_SIMD) 2 | #undef FC_USE_SIMD 3 | #define FC_USE_SIMD 0 4 | #endif // FC_USE_SIMD 5 | 6 | #include "fc_catch2.h" 7 | #include "fc/btree.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "test_statistics.h" 13 | 14 | 15 | TEST_CASE("perftest-string") { 16 | namespace fc = frozenca; 17 | constexpr int max_n = 1'000'000; 18 | constexpr int max_length = 50; 19 | 20 | std::unordered_map result; 21 | auto str_vec = generate_random_strings(max_n, max_length, false); 22 | 23 | BENCHMARK("Balanced tree test - warmap") { 24 | fc::BTreeSet btree; 25 | tree_perf_test(btree, str_vec); 26 | }; 27 | 28 | BENCHMARK("frozenca::BTreeSet string (fanout 64)") { 29 | fc::BTreeSet btree; 30 | result.emplace("BTreeSet(64)", tree_perf_test(btree, str_vec)); 31 | }; 32 | BENCHMARK("frozenca::BTreeSet string (fanout 128)") { 33 | fc::BTreeSet btree; 34 | result.emplace("BTreeSet(128)", tree_perf_test(btree, str_vec)); 35 | }; 36 | BENCHMARK("std::set string") { 37 | std::set rbtree; 38 | result.emplace("std::set", tree_perf_test(rbtree, str_vec)); 39 | }; 40 | 41 | for (const auto &[key, value] : result) { 42 | INFO("----------------"); 43 | INFO(key); 44 | std::stringstream ss; 45 | value.print_stats(ss); 46 | INFO(ss.str()); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /include/fc/mmfile.h: -------------------------------------------------------------------------------- 1 | #ifndef __FC_MMFILE_H__ 2 | #define __FC_MMFILE_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #if _WIN32 || _WIN64 9 | #include "fc/mmfile_win.h" 10 | #else 11 | #include "fc/mmfile_nix.h" 12 | #endif 13 | 14 | namespace frozenca { 15 | 16 | class MemoryMappedFile { 17 | public: 18 | static inline constexpr std::size_t new_file_size_ = 19 | MemoryMappedFileImpl::new_file_size_; 20 | using handle_type = MemoryMappedFileImpl::handle_type; 21 | using path_type = MemoryMappedFileImpl::path_type; 22 | 23 | private: 24 | MemoryMappedFileImpl impl_; 25 | 26 | public: 27 | MemoryMappedFile(const std::filesystem::path &path, 28 | std::size_t init_file_size = new_file_size_, 29 | bool trunc = false) 30 | : impl_{path, init_file_size, trunc} {} 31 | 32 | ~MemoryMappedFile() noexcept = default; 33 | 34 | public: 35 | void resize(std::size_t new_size) { impl_.resize(new_size); } 36 | 37 | [[nodiscard]] std::size_t size() const noexcept { return impl_.size(); } 38 | 39 | [[nodiscard]] void *data() noexcept { return impl_.data(); } 40 | 41 | [[nodiscard]] const void *data() const noexcept { return impl_.data(); } 42 | 43 | friend bool operator==(const MemoryMappedFile &mmfile1, 44 | const MemoryMappedFile &mmfile2) { 45 | return mmfile1.impl_ == mmfile2.impl_; 46 | } 47 | 48 | friend bool operator!=(const MemoryMappedFile &mmfile1, 49 | const MemoryMappedFile &mmfile2) { 50 | return !(mmfile1 == mmfile2); 51 | } 52 | }; 53 | 54 | } // namespace frozenca 55 | 56 | #endif //__FC_MMFILE_H__ 57 | -------------------------------------------------------------------------------- /test/perftest.cpp: -------------------------------------------------------------------------------- 1 | #if defined(__x86_64__) || defined(_M_X64) 2 | #if defined(FC_USE_SIMD) 3 | #undef FC_USE_SIMD 4 | #define FC_USE_SIMD 1 5 | #endif // FC_USE_SIMD 6 | #endif 7 | 8 | #include "fc_catch2.h" 9 | #include "fc/disk_btree.h" 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "test_statistics.h" 15 | 16 | TEST_CASE("perftest") { 17 | namespace fc = frozenca; 18 | std::unordered_map result; 19 | 20 | auto generate_values = []() { 21 | std::vector::value_type> v(1'000'000); 22 | std::iota(v.begin(), v.end(), 0); 23 | return v; 24 | }; 25 | 26 | BENCHMARK("Balanced tree test - warmap") { 27 | fc::BTreeSet btree; 28 | tree_perf_test(btree, generate_values()); 29 | }; 30 | BENCHMARK("frozenca::BTreeSet test (fanout 64)") { 31 | fc::BTreeSet btree; 32 | result.emplace("BTreeSet(64)", tree_perf_test(btree, generate_values())); 33 | }; 34 | BENCHMARK("frozenca::BTreeSet test (fanout 96)") { 35 | fc::BTreeSet btree; 36 | result.emplace("BTreeSet(96)", tree_perf_test(btree, generate_values())); 37 | }; 38 | BENCHMARK("frozenca::DiskBTreeSet test (fanout 128)") { 39 | fc::DiskBTreeSet btree("database.bin", 1UL << 25UL, true); 40 | result.emplace("DiskBTreeSet(128)", tree_perf_test(btree, generate_values())); 41 | }; 42 | BENCHMARK("frozenca::BTreeSet test (fanout 128)") { 43 | fc::BTreeSet btree; 44 | result.emplace("BTreeSet(128)", tree_perf_test(btree, generate_values())); 45 | }; 46 | BENCHMARK("std::set test") { 47 | std::set rbtree; 48 | result.emplace("std::set", tree_perf_test(rbtree, generate_values())); 49 | }; 50 | 51 | for (const auto &[key, value] : result) { 52 | INFO("----------------"); 53 | INFO(key); 54 | std::stringstream ss; 55 | value.print_stats(ss); 56 | INFO(ss.str()); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /test/test_statistics.cpp: -------------------------------------------------------------------------------- 1 | #include "test_statistics.h" 2 | #include 3 | #include 4 | #include 5 | 6 | stats get_statistics(std::vector &v) { 7 | auto n = std::ssize(v); 8 | if (n == 0) { 9 | return {}; 10 | } 11 | stats s; 12 | s.average = std::accumulate(v.begin(), v.end(), 0.0f) / n; 13 | float variance = 0.0f; 14 | for (auto value : v) { 15 | variance += std::pow(value - s.average, 2.0f); 16 | } 17 | variance /= n; 18 | s.stdev = std::sqrt(variance); 19 | std::ranges::sort(v); 20 | s.percentile_95 = *(v.begin() + (19 * n / 20)); 21 | s.percentile_99 = *(v.begin() + (99 * n / 100)); 22 | s.percentile_999 = *(v.begin() + (999 * n / 1000)); 23 | return s; 24 | } 25 | 26 | void perf_result::print_stats(std::ostream &os) const { 27 | auto print = [this, &os](const std::string &stat_name, const stats &stat) { 28 | os << "\tTime to " << stat_name << " " << values_cnt << " elements:\n" 29 | << "\tAverage : " << stat.average << "ms,\n" 30 | << "\tStdev : " << stat.stdev << "ms,\n" 31 | << "\t95% : " << stat.percentile_95 << "ms,\n" 32 | << "\t99% : " << stat.percentile_99 << "ms,\n" 33 | << "\t99.9% : " << stat.percentile_999 << "ms,\n"; 34 | }; 35 | print("insert", insert); 36 | print("find", find); 37 | print("erase", erase); 38 | } 39 | 40 | std::vector generate_random_strings(int max_n, int max_length, bool allow_duplicates) { 41 | std::vector res; 42 | 43 | std::mt19937 gen(std::random_device{}()); 44 | std::uniform_int_distribution length_dist(1, max_length); 45 | std::uniform_int_distribution ch_dist(32, 126); 46 | 47 | for (int i = 0; i < max_n; ++i) { 48 | int len = length_dist(gen); 49 | std::string s; 50 | for (int l = 0; l < len; ++l) { 51 | s += static_cast(ch_dist(gen)); 52 | } 53 | res.push_back(std::move(s)); 54 | } 55 | 56 | if (!allow_duplicates) { 57 | std::ranges::sort(res); 58 | auto ret = std::ranges::unique(res); 59 | res.erase(ret.begin(), ret.end()); 60 | } 61 | 62 | return res; 63 | } 64 | -------------------------------------------------------------------------------- /include/fc/disk_btree.h: -------------------------------------------------------------------------------- 1 | #ifndef __FC_DISK_BTREE_H__ 2 | #define __FC_DISK_BTREE_H__ 3 | 4 | #include "fc/btree.h" 5 | #include "fc/disk_fixed_alloc.h" 6 | 7 | namespace frozenca { 8 | 9 | template 11 | class DiskBTreeBase 12 | : public BTreeBase { 13 | public: 14 | using Base = BTreeBase; 15 | using Node = typename Base::node_type; 16 | 17 | private: 18 | MemoryMappedFile mm_file_; 19 | MemoryResourceFixed mem_res_; 20 | 21 | public: 22 | explicit DiskBTreeBase(const MemoryMappedFile &mm_file) 23 | : mm_file_{mm_file}, mem_res_{reinterpret_cast( 24 | mm_file_.data()), 25 | mm_file_.size()} { 26 | Base(AllocatorFixed(&mem_res_)); 27 | } 28 | 29 | DiskBTreeBase(const std::filesystem::path &path, std::size_t pool_size, 30 | bool trunc = false) 31 | : mm_file_{path, pool_size, trunc}, mem_res_{ 32 | reinterpret_cast( 33 | mm_file_.data()), 34 | mm_file_.size()} { 35 | Base(AllocatorFixed(&mem_res_)); 36 | } 37 | }; 38 | 39 | template 40 | using DiskBTreeSet = DiskBTreeBase; 41 | 42 | template 43 | using DiskBTreeMultiSet = DiskBTreeBase; 44 | 45 | template 47 | using DiskBTreeMap = DiskBTreeBase, t, Comp, false>; 48 | 49 | template 51 | using DiskBTreeMultiMap = DiskBTreeBase, t, Comp, true>; 52 | 53 | } // namespace frozenca 54 | 55 | #endif //__FC_DISK_BTREE_H__ 56 | -------------------------------------------------------------------------------- /test/test_statistics.h: -------------------------------------------------------------------------------- 1 | #ifndef FC_TEST_STATISTICS_H 2 | #define FC_TEST_STATISTICS_H 3 | 4 | #include "fc_catch2.h" 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | struct stats { 14 | float average = 0.0f; 15 | float stdev = 0.0f; 16 | float percentile_95 = 0.0f; 17 | float percentile_99 = 0.0f; 18 | float percentile_999 = 0.0f; 19 | }; 20 | 21 | struct perf_result { 22 | size_t values_cnt{0}; 23 | stats insert; 24 | stats find; 25 | stats erase; 26 | void print_stats(std::ostream &os) const; 27 | }; 28 | 29 | stats get_statistics(std::vector &v); 30 | 31 | std::vector generate_random_strings(int max_n, int max_length, bool allow_duplicates); 32 | 33 | template 34 | [[maybe_unused]] perf_result tree_perf_test(TreeType &tree, std::vector v, size_t trials = 1) { 35 | const size_t max_n = v.size(); 36 | const size_t max_trials = trials; 37 | 38 | std::mt19937 gen(std::random_device{}()); 39 | std::vector durations_insert; 40 | std::vector durations_find; 41 | std::vector durations_erase; 42 | 43 | for (size_t t = 0; t < max_trials; ++t) { 44 | float duration = 0.0f; 45 | std::ranges::shuffle(v, gen); 46 | for (auto num : v) { 47 | auto start = std::chrono::steady_clock::now(); 48 | tree.insert(num); 49 | auto end = std::chrono::steady_clock::now(); 50 | duration += std::chrono::duration_cast>(end - start).count(); 51 | } 52 | durations_insert.push_back(duration); 53 | 54 | duration = 0.0f; 55 | std::ranges::shuffle(v, gen); 56 | for (auto num : v) { 57 | auto start = std::chrono::steady_clock::now(); 58 | if (!tree.contains(num)) { 59 | FAIL("Lookup verification fail!"); 60 | } 61 | auto end = std::chrono::steady_clock::now(); 62 | duration += std::chrono::duration_cast>(end - start).count(); 63 | } 64 | durations_find.push_back(duration); 65 | 66 | duration = 0.0f; 67 | std::ranges::shuffle(v, gen); 68 | for (auto num : v) { 69 | auto start = std::chrono::steady_clock::now(); 70 | if (!tree.erase(num)) { 71 | FAIL("Erase verification fail!"); 72 | } 73 | auto end = std::chrono::steady_clock::now(); 74 | duration += std::chrono::duration_cast>(end - start).count(); 75 | } 76 | durations_erase.push_back(duration); 77 | } 78 | perf_result result; 79 | result.values_cnt = max_n; 80 | result.insert = get_statistics(durations_insert); 81 | result.find = get_statistics(durations_find); 82 | result.erase = get_statistics(durations_erase); 83 | return result; 84 | } 85 | 86 | #endif // FC_TEST_STATISTICS_H 87 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | Language: Cpp 2 | # BasedOnStyle: Google 3 | AccessModifierOffset: -1 4 | AlignAfterOpenBracket: Align 5 | AlignConsecutiveAssignments: None 6 | AlignConsecutiveDeclarations: None 7 | AlignOperands: true 8 | AlignTrailingComments: true 9 | AllowAllParametersOfDeclarationOnNextLine: true 10 | AllowShortBlocksOnASingleLine: Never 11 | AllowShortCaseLabelsOnASingleLine: false 12 | AllowShortFunctionsOnASingleLine: All 13 | AllowShortIfStatementsOnASingleLine: true 14 | AllowShortLoopsOnASingleLine: true 15 | AlwaysBreakAfterDefinitionReturnType: None 16 | AlwaysBreakAfterReturnType: None 17 | AlwaysBreakBeforeMultilineStrings: true 18 | AlwaysBreakTemplateDeclarations: Yes 19 | BinPackArguments: false 20 | BinPackParameters: false 21 | BraceWrapping: 22 | AfterClass: false 23 | AfterControlStatement: false 24 | AfterEnum: false 25 | AfterFunction: false 26 | AfterNamespace: false 27 | AfterObjCDeclaration: false 28 | AfterStruct: false 29 | AfterUnion: false 30 | BeforeCatch: false 31 | BeforeElse: false 32 | IndentBraces: false 33 | BreakBeforeBinaryOperators: None 34 | BreakBeforeBraces: Attach 35 | BreakBeforeTernaryOperators: true 36 | BreakConstructorInitializersBeforeComma: false 37 | BreakAfterJavaFieldAnnotations: false 38 | BreakStringLiterals: true 39 | ColumnLimit: 150 40 | CommentPragmas: '^ IWYU pragma:' 41 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 42 | ConstructorInitializerIndentWidth: 4 43 | ContinuationIndentWidth: 4 44 | Cpp11BracedListStyle: true 45 | DerivePointerAlignment: true 46 | DisableFormat: false 47 | ExperimentalAutoDetectBinPacking: false 48 | ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] 49 | IncludeCategories: 50 | - Regex: '^<.*\.h>' 51 | Priority: 1 52 | - Regex: '^<.*' 53 | Priority: 2 54 | - Regex: '.*' 55 | Priority: 3 56 | IncludeIsMainRegex: '([-_](test|unittest))?$' 57 | IndentCaseLabels: true 58 | IndentWidth: 2 59 | IndentWrappedFunctionNames: false 60 | JavaScriptQuotes: Leave 61 | JavaScriptWrapImports: true 62 | KeepEmptyLinesAtTheStartOfBlocks: false 63 | MacroBlockBegin: '' 64 | MacroBlockEnd: '' 65 | MaxEmptyLinesToKeep: 1 66 | NamespaceIndentation: None 67 | ObjCBlockIndentWidth: 2 68 | ObjCSpaceAfterProperty: false 69 | ObjCSpaceBeforeProtocolList: false 70 | PenaltyBreakBeforeFirstCallParameter: 1 71 | PenaltyBreakComment: 300 72 | PenaltyBreakFirstLessLess: 120 73 | PenaltyBreakString: 1000 74 | PenaltyExcessCharacter: 1000000 75 | PenaltyReturnTypeOnItsOwnLine: 200 76 | PointerAlignment: Left 77 | ReflowComments: true 78 | SortIncludes: Never 79 | SpaceAfterCStyleCast: false 80 | SpaceAfterTemplateKeyword: true 81 | SpaceBeforeAssignmentOperators: true 82 | SpaceBeforeParens: ControlStatements 83 | SpaceInEmptyParentheses: false 84 | SpacesBeforeTrailingComments: 2 85 | SpacesInAngles: false 86 | SpacesInContainerLiterals: true 87 | SpacesInCStyleCastParentheses: false 88 | SpacesInParentheses: false 89 | SpacesInSquareBrackets: false 90 | Standard: Auto 91 | TabWidth: 8 92 | UseTab: Never 93 | -------------------------------------------------------------------------------- /include/fc/mmfile_nix.h: -------------------------------------------------------------------------------- 1 | #ifndef FC_MMFILE_NIX_H 2 | #define FC_MMFILE_NIX_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | namespace frozenca { 16 | 17 | class MemoryMappedFileImpl { 18 | public: 19 | static inline constexpr std::size_t new_file_size_ = (1UL << 20UL); 20 | using handle_type = int; 21 | using path_type = std::filesystem::path::value_type; 22 | 23 | private: 24 | const std::filesystem::path path_; 25 | void *data_ = nullptr; 26 | std::size_t size_ = 0; 27 | 28 | handle_type handle_ = 0; 29 | int flags_ = 0; 30 | 31 | public: 32 | MemoryMappedFileImpl(const std::filesystem::path &path, 33 | std::size_t init_file_size = new_file_size_, 34 | bool trunc = false) 35 | : path_{path} { 36 | bool exists = std::filesystem::exists(path); 37 | if (exists && trunc) { 38 | std::filesystem::remove(path); 39 | exists = false; 40 | } 41 | open_file(path.c_str(), exists, init_file_size); 42 | map_file(); 43 | } 44 | 45 | ~MemoryMappedFileImpl() noexcept { 46 | if (!data_) { 47 | return; 48 | } 49 | bool error = false; 50 | error = !unmap_file() || error; 51 | error = !close_file() || error; 52 | } 53 | 54 | private: 55 | void open_file(const path_type *path, bool exists, 56 | std::size_t init_file_size) { 57 | flags_ = O_RDWR; 58 | if (!exists) { 59 | flags_ |= (O_CREAT | O_TRUNC); 60 | } 61 | #ifdef _LARGEFILE64_SOURCE 62 | flags_ |= O_LARGEFILE; 63 | #endif 64 | errno = 0; 65 | handle_ = open(path, flags_, S_IRWXU); 66 | if (errno != 0) { 67 | throw std::runtime_error("file open failed\n"); 68 | } 69 | 70 | if (!exists) { 71 | if (ftruncate(handle_, init_file_size) == -1) { 72 | throw std::runtime_error("failed setting file size\n"); 73 | } 74 | } 75 | 76 | struct stat info {}; 77 | bool success = (fstat(handle_, &info) != -1); 78 | size_ = info.st_size; 79 | if (!success) { 80 | throw std::runtime_error("failed querying file size\n"); 81 | } 82 | } 83 | 84 | void map_file() { 85 | void *data = 86 | mmap(nullptr, size_, PROT_READ | PROT_WRITE, MAP_SHARED, handle_, 0); 87 | if (data == reinterpret_cast(-1)) { 88 | throw std::runtime_error("failed mapping file"); 89 | } 90 | data_ = data; 91 | } 92 | 93 | bool close_file() noexcept { 94 | return close(handle_) == 0; 95 | } 96 | 97 | bool unmap_file() noexcept { 98 | return (munmap(data_, size_) == 0); 99 | } 100 | 101 | public: 102 | void resize(std::size_t new_size) { 103 | if (!data_) { 104 | throw std::runtime_error("file is closed\n"); 105 | } 106 | if (!unmap_file()) { 107 | throw std::runtime_error("failed unmappping file\n"); 108 | } 109 | if (ftruncate(handle_, new_size) == -1) { 110 | throw std::runtime_error("failed resizing mapped file\n"); 111 | } 112 | size_ = static_cast(new_size); 113 | map_file(); 114 | } 115 | 116 | [[nodiscard]] std::size_t size() const noexcept { return size_; } 117 | 118 | [[nodiscard]] void *data() noexcept { return data_; } 119 | 120 | [[nodiscard]] const void *data() const noexcept { return data_; } 121 | 122 | friend bool operator==(const MemoryMappedFileImpl &mmfile1, 123 | const MemoryMappedFileImpl &mmfile2) { 124 | auto res = 125 | (mmfile1.path_ == mmfile2.path_ && mmfile1.data_ == mmfile2.data_ && 126 | mmfile1.size_ == mmfile2.size_ && mmfile1.handle_ == mmfile2.handle_ && 127 | mmfile1.flags_ == mmfile2.flags_); 128 | return res; 129 | } 130 | 131 | friend bool operator!=(const MemoryMappedFileImpl &mmfile1, 132 | const MemoryMappedFileImpl &mmfile2) { 133 | return !(mmfile1 == mmfile2); 134 | } 135 | }; 136 | 137 | } // namespace frozenca 138 | 139 | #endif // FC_MMFILE_NIX_H 140 | -------------------------------------------------------------------------------- /include/fc/disk_fixed_alloc.h: -------------------------------------------------------------------------------- 1 | #ifndef __FC_DISK_FIXED_ALLOC_H__ 2 | #define __FC_DISK_FIXED_ALLOC_H__ 3 | 4 | #include "fc/mmfile.h" 5 | #include "fc/details.h" 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #if defined(__clang__) && __clang_major__ < 15 12 | #include 13 | namespace stdpmr = std::experimental::pmr; 14 | #elif defined(__clang__) || (__GNUC__) 15 | #include 16 | namespace stdpmr = std::pmr; 17 | #endif 18 | #include 19 | #include 20 | 21 | namespace frozenca { 22 | 23 | template 24 | class MemoryResourceFixed : public stdpmr::memory_resource { 25 | T *pool_ = nullptr; 26 | std::size_t pool_size_ = 0; 27 | T *free_ = nullptr; 28 | 29 | public: 30 | MemoryResourceFixed(unsigned char *pool_ptr, std::size_t pool_byte_size) { 31 | if (!pool_ptr) { 32 | throw std::invalid_argument("pool ptr is null"); 33 | } 34 | if ((std::bit_cast(pool_ptr) % alignof(T)) || 35 | (std::bit_cast(pool_ptr) % sizeof(T *))) { 36 | throw std::invalid_argument("pool ptr is not aligned with T/T*"); 37 | } 38 | if (pool_byte_size < sizeof(T)) { 39 | throw std::invalid_argument("pool byte size is too small"); 40 | } 41 | if ((pool_byte_size % alignof(T)) || (pool_byte_size % sizeof(T *))) { 42 | throw std::invalid_argument("pool byte size is not aligned with T/T*"); 43 | } 44 | 45 | pool_ = reinterpret_cast(pool_ptr); 46 | // size in chunks 47 | pool_size_ = pool_byte_size / sizeof(T); 48 | 49 | auto curr_chunk = pool_; 50 | for (size_t i = 0; i < pool_size_; i++, curr_chunk++) { 51 | *(reinterpret_cast(curr_chunk)) = 52 | std::bit_cast(curr_chunk + 1); 53 | } 54 | free_ = pool_; 55 | } 56 | 57 | explicit MemoryResourceFixed(MemoryMappedFile &file) 58 | : MemoryResourceFixed(static_cast(file.data()), 59 | file.size()) {} 60 | 61 | private: 62 | void *do_allocate([[maybe_unused]] std::size_t num_bytes, 63 | [[maybe_unused]] std::size_t alignment) override { 64 | if (free_ == pool_ + pool_size_) { 65 | throw std::invalid_argument("fixed allocator out of memory"); 66 | } else { 67 | auto x = free_; 68 | free_ = std::bit_cast(*(reinterpret_cast(x))); 69 | return reinterpret_cast(x); 70 | } 71 | } 72 | 73 | void do_deallocate(void *x, [[maybe_unused]] std::size_t num_bytes, 74 | [[maybe_unused]] std::size_t alignment) override { 75 | auto x_chunk = reinterpret_cast(x); 76 | *(reinterpret_cast(x)) = std::bit_cast(free_); 77 | free_ = x_chunk; 78 | } 79 | 80 | [[nodiscard]] bool 81 | do_is_equal(const stdpmr::memory_resource &other) const noexcept override { 82 | if (this == &other) { 83 | return true; 84 | } 85 | auto op = dynamic_cast(&other); 86 | return op && op->pool_ == pool_ && op->pool_size_ == pool_size_ && 87 | op->free_ == free_; 88 | } 89 | }; 90 | 91 | template class AllocatorFixed { 92 | stdpmr::memory_resource *mem_res_; 93 | 94 | public: 95 | template struct rebind { 96 | using other = AllocatorFixed; 97 | }; 98 | 99 | using value_type = T; 100 | 101 | explicit AllocatorFixed( 102 | stdpmr::memory_resource *mem_res = stdpmr::get_default_resource()) 103 | : mem_res_{mem_res} {} 104 | 105 | template 106 | AllocatorFixed(const AllocatorFixed &other) 107 | : AllocatorFixed(other.get_memory_resource()) {} 108 | 109 | T *allocate(size_t n) { 110 | return reinterpret_cast( 111 | mem_res_->allocate(sizeof(T) * n, std::alignment_of_v)); 112 | } 113 | 114 | void deallocate(T *ptr, size_t n) { 115 | mem_res_->deallocate(reinterpret_cast(ptr), sizeof(T) * n, 116 | std::alignment_of_v); 117 | } 118 | 119 | [[nodiscard]] stdpmr::memory_resource * 120 | get_memory_resource() const noexcept { 121 | return mem_res_; 122 | } 123 | }; 124 | 125 | template struct isDiskAlloc : std::false_type {}; 126 | 127 | template 128 | struct isDiskAlloc> : std::true_type {}; 129 | 130 | } // namespace frozenca 131 | 132 | #endif //__FC_DISK_FIXED_ALLOC_H__ 133 | -------------------------------------------------------------------------------- /.github/workflows/action-cpp.yml: -------------------------------------------------------------------------------- 1 | name: build and cpack 2 | on: [ push, pull_request ] 3 | jobs: 4 | ubuntu-22-04: 5 | runs-on: ubuntu-latest 6 | name: Build on ${{ matrix.container }} x86_64 7 | strategy: 8 | # 9 | # matrix for containers 10 | # 11 | matrix: 12 | container: 13 | - ubuntu:latest 14 | - debian:latest 15 | - fedora:latest 16 | # - alt:sisyphus 17 | 18 | container: 19 | image: ${{ matrix.container }} 20 | 21 | steps: 22 | - name: Get current date 23 | id: date 24 | run: echo "date=$(date +'%Y-%m-%d %H:%M:%S')" >> $GITHUB_OUTPUT 25 | - name: Get commit sha 26 | id: git_sha 27 | run: echo "git_sha=$(echo $GITHUB_SHA)" >> $GITHUB_OUTPUT 28 | - uses: actions/checkout@v4 29 | # install dependencies 30 | - name: devel-pkgs 31 | run: | 32 | case "${{ matrix.container }}" in 33 | ubuntu*|debian*) 34 | apt-get update -y && apt-get install -yq binutils git make cmake catch2 gcc g++ lsb-release 35 | ;; 36 | fedora*) 37 | yum update -y && yum install -yq binutils git make cmake catch2-devel gcc gcc-c++ rpm-build redhat-lsb 38 | ;; 39 | alt*) 40 | apt-get update -y && apt-get install -yq binutils git make cmake ctest catch2-devel gcc gcc-c++ rpm-build lsb-release 41 | ;; 42 | esac 43 | # build project 44 | - name: mkdir 45 | run: mkdir cmake-build-release 46 | - name: cmake cmake-build-release 47 | run: cmake -DCOMMITTER_DATE="${{ steps.date.outputs.date }}" -DCOMMITTER_FULLSHA="${{ steps.git_sha.outputs.git_sha }}" -DCOMMITTER_SHORTSHA="$(echo ${{ steps.git_sha.outputs.git_sha }} | cut -c1-7)" -DCMAKE_BUILD_TYPE=Release -Bcmake-build-release -H. 48 | - name: cmake make 49 | run: cmake --build cmake-build-release/ --target all --parallel 50 | - name: get-version 51 | id: get-version 52 | run: echo "prj_ver=$(cat ./VERSION.txt)" >> $GITHUB_OUTPUT 53 | - name: ctest 54 | run: cd cmake-build-release && ctest && cd .. 55 | - name: cpack 56 | run: | 57 | case "${{ matrix.container }}" in 58 | ubuntu*|debian*) 59 | cd cmake-build-release && cpack -G DEB && cd .. 60 | ;; 61 | fedora*) 62 | cd cmake-build-release && cpack -G RPM && cd .. 63 | ;; 64 | alt*) 65 | cd cmake-build-release && echo "%_allow_root_build 1" > /etc/rpm/macros.d/02-enable-build-root && cpack -G RPM && cd .. 66 | ;; 67 | esac 68 | - uses: mad9000/actions-find-and-replace-string@3 69 | id: container 70 | with: 71 | source: ${{ matrix.container }} 72 | find: ':' # we want to remove : from container name 73 | replace: '-' # and replace it with - 74 | - name: Upload BTree binary 75 | uses: actions/upload-artifact@v4 76 | with: 77 | name: ${{ format('BTree-{0}.{1}', steps.get-version.outputs.prj_ver, steps.container.outputs.value) }} 78 | path: cmake-build-release/${{ format('BTree-{0}-noarch.???', steps.get-version.outputs.prj_ver) }} 79 | 80 | avx512f-gcc-cmake: 81 | runs-on: ubuntu-22.04 82 | steps: 83 | - uses: actions/checkout@v4 84 | - run: sudo apt-get update -y && sudo apt-get install -yq binutils git make cmake catch2 gcc g++ lsb-release 85 | - name: mkdir 86 | run: mkdir cmake-build-release 87 | - name: cmake cmake-build-release 88 | run: cmake -DFORCE_USE_SIMD=ON -DCMAKE_BUILD_TYPE=Release -Bcmake-build-release -H. 89 | - name: cmake make 90 | run: cmake --build cmake-build-release/ --target all --parallel 91 | - name: sde-test 92 | uses: petarpetrovt/setup-sde@v2.3 93 | - name: test 94 | run: cp cmake-build-release/fc_tests ${SDE_PATH} && cd ${SDE_PATH} && ./sde64 -knl -- ./fc_tests --benchmark-samples=1 95 | 96 | macos-clang-cmake: 97 | runs-on: macos-latest 98 | steps: 99 | - uses: actions/checkout@v4 100 | - run: brew install git ninja cmake catch2 101 | - name: mkdir 102 | run: mkdir cmake-build-release 103 | - name: cmake cmake-build-release 104 | run: cmake -DFORCE_PREFER_BINARY_SEARCH=ON -DCMAKE_BUILD_TYPE=Release -Bcmake-build-release -H. 105 | - name: cmake make 106 | run: cmake --build cmake-build-release/ --target all --parallel 107 | - name: test 108 | run: cd cmake-build-release && ctest && cd .. 109 | -------------------------------------------------------------------------------- /include/fc/comp.h: -------------------------------------------------------------------------------- 1 | #ifndef __FC_COMP_H__ 2 | #define __FC_COMP_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #ifdef _MSC_VER 12 | #include 13 | #endif // _MSC_VER 14 | 15 | namespace frozenca { 16 | 17 | template 18 | concept CanUseSimd = (sizeof(K) == 4 || sizeof(K) == 8) && 19 | (std::signed_integral || std::floating_point); 20 | 21 | using regi = __m512i; 22 | using regf = __m512; 23 | using regd = __m512d; 24 | 25 | inline regi broadcast(std::int32_t key) { return _mm512_set1_epi32(key); } 26 | 27 | inline regi broadcast(std::int64_t key) { return _mm512_set1_epi64(key); } 28 | 29 | inline regf broadcast(float key) { return _mm512_set1_ps(key); } 30 | 31 | inline regd broadcast(double key) { return _mm512_set1_pd(key); } 32 | 33 | inline unsigned int cmp(regi key, const std::int32_t *key_ptr) { 34 | regi keys_to_comp = 35 | _mm512_load_si512(reinterpret_cast(key_ptr)); 36 | return _mm512_cmpgt_epi32_mask(key, keys_to_comp); 37 | } 38 | 39 | inline unsigned int cmp(regi key, const std::int64_t *key_ptr) { 40 | regi keys_to_comp = 41 | _mm512_load_si512(reinterpret_cast(key_ptr)); 42 | return _mm512_cmpgt_epi64_mask(key, keys_to_comp); 43 | } 44 | 45 | inline unsigned int cmp(regf key, const float *key_ptr) { 46 | regf keys_to_comp = _mm512_load_ps(key_ptr); 47 | return _mm512_cmp_ps_mask(key, keys_to_comp, _MM_CMPINT_GT); 48 | } 49 | 50 | inline unsigned int cmp(regd key, const double *key_ptr) { 51 | regd keys_to_comp = _mm512_load_pd(key_ptr); 52 | return _mm512_cmp_pd_mask(key, keys_to_comp, _MM_CMPINT_GT); 53 | } 54 | 55 | inline unsigned int cmp(const std::int32_t *key_ptr, regi key) { 56 | regi keys_to_comp = 57 | _mm512_load_si512(reinterpret_cast(key_ptr)); 58 | return _mm512_cmpgt_epi32_mask(keys_to_comp, key); 59 | } 60 | 61 | inline unsigned int cmp(const std::int64_t *key_ptr, regi key) { 62 | regi keys_to_comp = 63 | _mm512_load_si512(reinterpret_cast(key_ptr)); 64 | return _mm512_cmpgt_epi64_mask(keys_to_comp, key); 65 | } 66 | 67 | inline unsigned int cmp(const float *key_ptr, regf key) { 68 | regf keys_to_comp = _mm512_load_ps(key_ptr); 69 | return _mm512_cmp_ps_mask(keys_to_comp, key, _MM_CMPINT_GT); 70 | } 71 | 72 | inline unsigned int cmp(const double *key_ptr, regd key) { 73 | regd keys_to_comp = _mm512_load_pd(key_ptr); 74 | return _mm512_cmp_pd_mask(keys_to_comp, key, _MM_CMPINT_GT); 75 | } 76 | 77 | template struct SimdTrait { 78 | static constexpr int shift = (sizeof(K) == 4) ? 4 : 3; 79 | static constexpr int mask = (sizeof(K) == 4) ? 0xF : 0x7; 80 | static constexpr int unit = (sizeof(K) == 4) ? 16 : 8; 81 | }; 82 | 83 | template 84 | inline std::ptrdiff_t get_lb_simd(K key, const K *first, const K *last) { 85 | const auto len = static_cast(last - first); 86 | const K *curr = first; 87 | auto key_broadcast = broadcast(key); 88 | int mask = 0; 89 | ptrdiff_t offset = 0; 90 | while (offset < len) { 91 | if constexpr (less) { 92 | mask = ~cmp(key_broadcast, curr); 93 | } else { 94 | mask = ~cmp(curr, key_broadcast); 95 | } 96 | #ifdef _MSC_VER 97 | unsigned long i = 0; 98 | _BitScanForward(&i, mask); 99 | #else 100 | auto i = __builtin_ffs(mask) - 1; 101 | #endif // _MSC_VER 102 | if (i < SimdTrait::unit) { 103 | return offset + i; 104 | } 105 | curr += SimdTrait::unit; 106 | offset += SimdTrait::unit; 107 | } 108 | return len; 109 | } 110 | 111 | template 112 | inline std::ptrdiff_t get_ub_simd(K key, const K *first, const K *last) { 113 | const auto len = static_cast(last - first); 114 | const K *curr = first; 115 | auto key_broadcast = broadcast(key); 116 | int mask = 0; 117 | ptrdiff_t offset = 0; 118 | while (offset < len) { 119 | if constexpr (less) { 120 | mask = cmp(curr, key_broadcast); 121 | } else { 122 | mask = cmp(key_broadcast, curr); 123 | } 124 | #ifdef _MSC_VER 125 | unsigned long i = 0; 126 | auto isnonzero = _BitScanForward(&i, mask); 127 | if (isnonzero && i < SimdTrait::unit) { 128 | return offset + i; 129 | } 130 | #else 131 | auto i = __builtin_ffs(mask) - 1; 132 | if (i > -1) { 133 | return offset + i; 134 | } 135 | #endif // _MSC_VER 136 | curr += SimdTrait::unit; 137 | offset += SimdTrait::unit; 138 | } 139 | return len; 140 | } 141 | 142 | } // namespace frozenca 143 | 144 | #endif //__FC_COMP_H__ 145 | -------------------------------------------------------------------------------- /include/fc/mmfile_win.h: -------------------------------------------------------------------------------- 1 | #ifndef FC_MMFILE_WIN_H 2 | #define FC_MMFILE_WIN_H 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | namespace frozenca { 11 | 12 | class MemoryMappedFileImpl { 13 | public: 14 | static inline constexpr std::size_t new_file_size_ = (1UL << 20UL); 15 | using handle_type = HANDLE; 16 | using path_type = std::filesystem::path::value_type; 17 | 18 | private: 19 | const std::filesystem::path path_; 20 | void *data_ = nullptr; 21 | std::size_t size_ = 0; 22 | 23 | handle_type handle_ = 0; 24 | int flags_ = 0; 25 | handle_type mapped_handle_ = 0; 26 | 27 | public: 28 | MemoryMappedFileImpl(const std::filesystem::path &path, 29 | std::size_t init_file_size = new_file_size_, 30 | bool trunc = false) 31 | : path_{path} { 32 | bool exists = std::filesystem::exists(path); 33 | if (exists && trunc) { 34 | std::filesystem::remove(path); 35 | exists = false; 36 | } 37 | open_file(path.c_str(), exists, init_file_size); 38 | map_file(); 39 | } 40 | 41 | ~MemoryMappedFileImpl() noexcept { 42 | if (!data_) { 43 | return; 44 | } 45 | bool error = false; 46 | error = !unmap_file() || error; 47 | error = !close_file() || error; 48 | } 49 | 50 | private: 51 | void open_file(const path_type *path, bool exists, 52 | std::size_t init_file_size) { 53 | DWORD dwDesiredAccess = GENERIC_READ | GENERIC_WRITE; 54 | DWORD dwCreationDisposition = exists ? OPEN_EXISTING : CREATE_ALWAYS; 55 | DWORD dwFlagsandAttributes = FILE_ATTRIBUTE_TEMPORARY; 56 | handle_ = CreateFileW(path, dwDesiredAccess, FILE_SHARE_READ, 0, 57 | dwCreationDisposition, dwFlagsandAttributes, 0); 58 | if (handle_ == INVALID_HANDLE_VALUE) { 59 | throw std::runtime_error("file open failed\n"); 60 | } 61 | 62 | if (!exists) { 63 | LONG sizehigh = (init_file_size >> (sizeof(LONG) * 8)); 64 | LONG sizelow = (init_file_size & 0xffffffff); 65 | DWORD result = SetFilePointer(handle_, sizelow, &sizehigh, FILE_BEGIN); 66 | if ((result == INVALID_SET_FILE_POINTER && GetLastError() != NO_ERROR) || 67 | !SetEndOfFile(handle_)) { 68 | throw std::runtime_error("failed setting file size\n"); 69 | } 70 | } 71 | 72 | typedef BOOL(WINAPI * func)(HANDLE, PLARGE_INTEGER); 73 | HMODULE hmod = GetModuleHandleA("kernel32.dll"); 74 | func get_size = 75 | reinterpret_cast(GetProcAddress(hmod, "GetFileSizeEx")); 76 | if (get_size) { 77 | LARGE_INTEGER info; 78 | if (get_size(handle_, &info)) { 79 | std::int64_t size = 80 | ((static_cast(info.HighPart) << 32) | info.LowPart); 81 | size_ = static_cast(size); 82 | } else { 83 | throw std::runtime_error("failed querying file size"); 84 | } 85 | } else { 86 | DWORD hi = 0; 87 | DWORD low = 0; 88 | if ((low = GetFileSize(handle_, &hi)) != INVALID_FILE_SIZE) { 89 | std::int64_t size = (static_cast(hi) << 32) | low; 90 | size_ = static_cast(size); 91 | } else { 92 | throw std::runtime_error("failed querying file size"); 93 | return; 94 | } 95 | } 96 | } 97 | 98 | void map_file() { 99 | DWORD protect = PAGE_READWRITE; 100 | mapped_handle_ = CreateFileMappingA(handle_, 0, protect, 0, 0, 0); 101 | if (!mapped_handle_) { 102 | throw std::runtime_error("failed mapping file"); 103 | } 104 | 105 | DWORD access = FILE_MAP_WRITE; 106 | void *data = MapViewOfFileEx(mapped_handle_, access, 0, 0, size_, 0); 107 | if (!data) { 108 | throw std::runtime_error("failed mapping file"); 109 | } 110 | data_ = data; 111 | } 112 | 113 | bool close_file() noexcept { return CloseHandle(handle_); } 114 | 115 | bool unmap_file() noexcept { 116 | bool error = false; 117 | error = !UnmapViewOfFile(data_) || error; 118 | error = !CloseHandle(mapped_handle_) || error; 119 | mapped_handle_ = NULL; 120 | return !error; 121 | } 122 | 123 | public: 124 | void resize(std::size_t new_size) { 125 | if (!data_) { 126 | throw std::runtime_error("file is closed\n"); 127 | } 128 | if (!unmap_file()) { 129 | throw std::runtime_error("failed unmappping file\n"); 130 | } 131 | 132 | std::int64_t offset = SetFilePointer(handle_, 0, 0, FILE_CURRENT); 133 | if (offset == INVALID_SET_FILE_POINTER && GetLastError() != NO_ERROR) { 134 | throw std::runtime_error("failed querying file pointer"); 135 | } 136 | LONG sizehigh = (new_size >> (sizeof(LONG) * 8)); 137 | LONG sizelow = (new_size & 0xffffffff); 138 | DWORD result = SetFilePointer(handle_, sizelow, &sizehigh, FILE_BEGIN); 139 | if ((result == INVALID_SET_FILE_POINTER && GetLastError() != NO_ERROR) || 140 | !SetEndOfFile(handle_)) { 141 | throw std::runtime_error("failed resizing mapped file"); 142 | } 143 | sizehigh = (offset >> (sizeof(LONG) * 8)); 144 | sizelow = (offset & 0xffffffff); 145 | SetFilePointer(handle_, sizelow, &sizehigh, FILE_BEGIN); 146 | 147 | size_ = static_cast(new_size); 148 | map_file(); 149 | } 150 | 151 | [[nodiscard]] std::size_t size() const noexcept { return size_; } 152 | 153 | [[nodiscard]] void *data() noexcept { return data_; } 154 | 155 | [[nodiscard]] const void *data() const noexcept { return data_; } 156 | 157 | friend bool operator==(const MemoryMappedFileImpl &mmfile1, 158 | const MemoryMappedFileImpl &mmfile2) { 159 | auto res = 160 | (mmfile1.path_ == mmfile2.path_ && mmfile1.data_ == mmfile2.data_ && 161 | mmfile1.size_ == mmfile2.size_ && mmfile1.handle_ == mmfile2.handle_ && 162 | mmfile1.flags_ == mmfile2.flags_); 163 | 164 | res = res && (mmfile1.mapped_handle_ == mmfile2.mapped_handle_); 165 | 166 | return res; 167 | } 168 | 169 | friend bool operator!=(const MemoryMappedFileImpl &mmfile1, 170 | const MemoryMappedFileImpl &mmfile2) { 171 | return !(mmfile1 == mmfile2); 172 | } 173 | }; 174 | 175 | } // namespace frozenca 176 | 177 | #endif // FC_MMFILE_WIN_H 178 | -------------------------------------------------------------------------------- /test/unittest.cpp: -------------------------------------------------------------------------------- 1 | #define _CONTROL_IN_TEST 2 | 3 | #ifndef CATCH_CONFIG_MAIN 4 | #define CATCH_CONFIG_MAIN 5 | #endif // CATCH_CONFIG_MAIN 6 | 7 | #include "fc_catch2.h" 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "fc/btree.h" 18 | 19 | namespace fc = frozenca; 20 | 21 | TEST_CASE("BTree insert-lookup-erase") { 22 | fc::BTreeSet btree; 23 | constexpr int n = 100; 24 | 25 | std::mt19937 gen(std::random_device{}()); 26 | 27 | std::vector v(n); 28 | std::iota(v.begin(), v.end(), 0); 29 | SECTION("Random insert") { 30 | std::ranges::shuffle(v, gen); 31 | for (auto num : v) { 32 | btree.insert(num); 33 | } 34 | } 35 | SECTION("Random lookup") { 36 | std::ranges::shuffle(v, gen); 37 | for (auto num : v) { 38 | btree.insert(num); 39 | } 40 | std::ranges::shuffle(v, gen); 41 | for (auto num : v) { 42 | REQUIRE(btree.contains(num)); 43 | } 44 | } 45 | SECTION("Random erase") { 46 | std::ranges::shuffle(v, gen); 47 | for (auto num : v) { 48 | btree.insert(num); 49 | } 50 | std::ranges::shuffle(v, gen); 51 | for (auto num : v) { 52 | REQUIRE(btree.erase(num)); 53 | } 54 | } 55 | } 56 | 57 | TEST_CASE("BTree std::initializer_list-test") { 58 | fc::BTreeSet btree{1, 4, 3, 2, 3, 3, 6, 5, 8}; 59 | REQUIRE(btree.size() == 7); 60 | } 61 | 62 | TEST_CASE("Multiset test") { 63 | fc::BTreeMultiSet btree{1, 4, 3, 2, 3, 3, 6, 5, 8}; 64 | REQUIRE(btree.size() == 9); 65 | REQUIRE_NOTHROW(btree.erase(3)); 66 | REQUIRE(btree.size() == 6); 67 | } 68 | 69 | TEST_CASE("Order statistic test") { 70 | fc::BTreeSet btree; 71 | constexpr int n = 100; 72 | 73 | for (int i = 0; i < n; ++i) { 74 | REQUIRE_NOTHROW(btree.insert(i)); 75 | } 76 | 77 | for (int i = 0; i < n; ++i) { 78 | REQUIRE(btree.kth(i) == i); 79 | } 80 | 81 | for (int i = 0; i < n; ++i) { 82 | REQUIRE(btree.order(btree.find(i)) == i); 83 | } 84 | } 85 | 86 | TEST_CASE("Enumerate") { 87 | fc::BTreeSet btree; 88 | constexpr int n = 100; 89 | 90 | for (int i = 0; i < n; ++i) { 91 | REQUIRE_NOTHROW(btree.insert(i)); 92 | } 93 | auto rg = btree.enumerate(20, 30); 94 | REQUIRE(std::ranges::distance(rg.begin(), rg.end()) == 11); 95 | 96 | SECTION("erase_if test") { 97 | REQUIRE_NOTHROW(btree.erase_if([](auto n) { return n >= 20 && n <= 90; })); 98 | REQUIRE(btree.size() == 29); 99 | } 100 | } 101 | 102 | TEST_CASE("BTreeMap") { 103 | fc::BTreeMap btree; 104 | 105 | REQUIRE_NOTHROW(btree["asd"] = 3); 106 | REQUIRE_NOTHROW(btree["a"] = 6); 107 | REQUIRE_NOTHROW(btree["bbb"] = 9); 108 | REQUIRE_NOTHROW(btree["asdf"] = 8); 109 | REQUIRE_NOTHROW(btree["asdf"] = 333); 110 | REQUIRE(btree["asdf"] == 333); 111 | 112 | REQUIRE_NOTHROW(btree.emplace("asdfgh", 200)); 113 | REQUIRE(btree["asdfgh"] == 200); 114 | } 115 | 116 | TEST_CASE("Join/Split") { 117 | fc::BTreeSet btree1; 118 | for (int i = 0; i < 100; ++i) { 119 | REQUIRE_NOTHROW(btree1.insert(i)); 120 | } 121 | 122 | fc::BTreeSet btree2; 123 | for (int i = 101; i < 300; ++i) { 124 | REQUIRE_NOTHROW(btree2.insert(i)); 125 | } 126 | fc::BTreeSet btree3; 127 | 128 | REQUIRE_NOTHROW( 129 | btree3 = fc::join(std::move(btree1), 100, std::move(btree2))); 130 | 131 | for (int i = 0; i < 300; ++i) { 132 | REQUIRE(btree3.contains(i)); 133 | } 134 | 135 | auto [btree4, btree5] = fc::split(std::move(btree3), 200); 136 | for (int i = 0; i < 200; ++i) { 137 | REQUIRE(btree4.contains(i)); 138 | } 139 | REQUIRE_FALSE(btree5.contains(200)); 140 | 141 | for (int i = 201; i < 300; ++i) { 142 | REQUIRE(btree5.contains(i)); 143 | } 144 | } 145 | 146 | TEST_CASE("Multiset split") { 147 | fc::BTreeMultiSet btree6; 148 | REQUIRE_NOTHROW(btree6.insert(0)); 149 | REQUIRE_NOTHROW(btree6.insert(2)); 150 | for (int i = 0; i < 100; ++i) { 151 | REQUIRE_NOTHROW(btree6.insert(1)); 152 | } 153 | auto [btree7, btree8] = fc::split(std::move(btree6), 1); 154 | REQUIRE(btree7.size() == 1); 155 | REQUIRE(btree8.size() == 1); 156 | } 157 | 158 | TEST_CASE("Two arguments join") { 159 | fc::BTreeSet tree1; 160 | for (int i = 0; i < 100; ++i) { 161 | REQUIRE_NOTHROW(tree1.insert(i)); 162 | } 163 | fc::BTreeSet tree2; 164 | for (int i = 100; i < 200; ++i) { 165 | REQUIRE_NOTHROW(tree2.insert(i)); 166 | } 167 | auto tree3 = fc::join(std::move(tree1), std::move(tree2)); 168 | for (int i = 0; i < 200; ++i) { 169 | REQUIRE(tree3.contains(i)); 170 | } 171 | } 172 | 173 | TEST_CASE("Three arguments split") { 174 | fc::BTreeSet tree1; 175 | for (int i = 0; i < 100; ++i) { 176 | tree1.insert(i); 177 | } 178 | auto [tree2, tree3] = fc::split(std::move(tree1), 10, 80); 179 | REQUIRE(tree2.size() == 10); 180 | REQUIRE(tree3.size() == 19); 181 | } 182 | 183 | TEST_CASE("Multiset erase") { 184 | fc::BTreeMultiSet tree1; 185 | REQUIRE_NOTHROW(tree1.insert(0)); 186 | for (int i = 0; i < 100; ++i) { 187 | REQUIRE_NOTHROW(tree1.insert(1)); 188 | } 189 | REQUIRE_NOTHROW(tree1.insert(2)); 190 | 191 | REQUIRE_NOTHROW(tree1.erase(1)); 192 | 193 | REQUIRE(tree1.size() == 2); 194 | } 195 | 196 | TEST_CASE("Range insert-1") { 197 | fc::BTreeSet btree; 198 | REQUIRE_NOTHROW(btree.insert(1)); 199 | REQUIRE_NOTHROW(btree.insert(10)); 200 | 201 | std::vector v{2, 5, 4, 3, 7, 6, 6, 6, 2, 8, 8, 9}; 202 | REQUIRE_NOTHROW(btree.insert_range(std::move(v))); 203 | 204 | for (int i = 1; i < 10; ++i) { 205 | REQUIRE(btree.contains(i)); 206 | } 207 | } 208 | 209 | TEST_CASE("Range insert-2") { 210 | fc::BTreeSet btree; 211 | REQUIRE_NOTHROW(btree.insert(1)); 212 | REQUIRE_NOTHROW(btree.insert(10)); 213 | 214 | std::vector v{2, 5, 4, 3, 7, 6, 6, 6, 2, 8, 8, 9, 10}; 215 | REQUIRE_NOTHROW(btree.insert_range(std::move(v))); 216 | 217 | for (int i = 1; i < 10; ++i) { 218 | REQUIRE(btree.contains(i)); 219 | } 220 | } 221 | 222 | TEST_CASE("count()") { 223 | fc::BTreeMultiSet btree2; 224 | REQUIRE_NOTHROW(btree2.insert(1)); 225 | REQUIRE_NOTHROW(btree2.insert(1)); 226 | REQUIRE(btree2.count(1) == 2); 227 | REQUIRE(btree2.count(0) == 0); 228 | REQUIRE(btree2.count(2) == 0); 229 | } 230 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${CMAKE_SOURCE_DIR}/cmake/modules") 2 | cmake_minimum_required(VERSION 3.16) 3 | 4 | set(MAJOR "0") 5 | set(MINOR "0") 6 | set(PATCH "1") 7 | cmake_policy(SET CMP0048 NEW) 8 | 9 | project(BTree VERSION ${MAJOR}.${MINOR}.${PATCH}) 10 | 11 | option(FORCE_USE_SIMD "force define FC_USE_SIMD (apllicable only for x86_64)" OFF) 12 | option(FORCE_PREFER_BINARY_SEARCH "force define FC_PREFER_BINARY_SEARCH (recommended for clang only)" OFF) 13 | 14 | set(CMAKE_CXX_STANDARD 20) 15 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 16 | 17 | find_package(VersionHeader) 18 | set(PROJECT_VERSION "${MAJOR}.${MINOR}.${PATCH}.${COMMITTER_SHORTSHA}") 19 | file(WRITE ${CMAKE_SOURCE_DIR}/VERSION.txt "${MAJOR}.${MINOR}.${PATCH}") 20 | 21 | find_package(Catch2 3 QUIET) 22 | set(Catch_VER 3) 23 | if (NOT Catch2_FOUND) 24 | find_package(Catch2 REQUIRED) 25 | set(Catch_VER 2) 26 | endif() 27 | set(CATCH_LIBS_ALIASES Catch2::Catch2 Catch2::Catch2WithMain) 28 | 29 | function(get_linux_lsb_release_information) 30 | execute_process(COMMAND sh -c "cat /etc/*release | grep -w ID | cut -d'=' -f 2" OUTPUT_VARIABLE LSB_RELEASE_ID_SHORT OUTPUT_STRIP_TRAILING_WHITESPACE) 31 | execute_process(COMMAND sh -c "cat /etc/*release | grep ^VERSION_ID | cut -d'=' -f 2" OUTPUT_VARIABLE LSB_RELEASE_VERSION_SHORT OUTPUT_STRIP_TRAILING_WHITESPACE) 32 | execute_process(COMMAND sh -c "cat /etc/*release | grep ^VERSION_CODENAME | cut -d'=' -f 2" OUTPUT_VARIABLE LSB_RELEASE_CODENAME_SHORT OUTPUT_STRIP_TRAILING_WHITESPACE) 33 | 34 | set(LSB_RELEASE_ID_SHORT "${LSB_RELEASE_ID_SHORT}" PARENT_SCOPE) 35 | set(LSB_RELEASE_VERSION_SHORT "${LSB_RELEASE_VERSION_SHORT}" PARENT_SCOPE) 36 | set(LSB_RELEASE_CODENAME_SHORT "${LSB_RELEASE_CODENAME_SHORT}" PARENT_SCOPE) 37 | endfunction() 38 | 39 | message(STATUS "COMMITTER_FULLSHA ${COMMITTER_FULLSHA}") 40 | message(STATUS "COMMITTER_SHORTSHA ${COMMITTER_SHORTSHA}") 41 | message(STATUS "COMMITTER_DATE ${COMMITTER_DATE}") 42 | message(STATUS "PROJECT_VERSION ${PROJECT_VERSION}") 43 | 44 | if(CMAKE_SYSTEM_NAME MATCHES "Linux") 45 | get_linux_lsb_release_information() 46 | message(STATUS "OS Linux ${LSB_RELEASE_ID_SHORT} ${LSB_RELEASE_VERSION_SHORT} ${LSB_RELEASE_CODENAME_SHORT}") 47 | else() 48 | message(STATUS "OS ${CMAKE_SYSTEM_NAME}") 49 | message(STATUS "OS VERSION ${CMAKE_SYSTEM_VERSION}") 50 | endif() 51 | 52 | 53 | add_library(BTree INTERFACE 54 | include/fc/comp.h 55 | include/fc/disk_fixed_alloc.h 56 | include/fc/mmfile.h 57 | include/fc/btree.h 58 | include/fc/disk_btree.h 59 | include/fc/mmfile_win.h 60 | include/fc/mmfile_nix.h 61 | include/fc/details.h) 62 | 63 | target_include_directories( 64 | BTree 65 | INTERFACE 66 | "$" 67 | "$" ) 68 | 69 | add_library(BTree::BTree ALIAS BTree) 70 | 71 | if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR FORCE_PREFER_BINARY_SEARCH) 72 | add_definitions(-DFC_PREFER_BINARY_SEARCH=1) 73 | endif() 74 | 75 | if (FORCE_USE_SIMD) 76 | add_definitions(-DFC_USE_SIMD=1) 77 | endif () 78 | 79 | add_executable(fc_tests 80 | test/test_statistics.h 81 | test/test_statistics.cpp 82 | test/fc_catch2.h 83 | test/unittest.cpp 84 | test/rwtest.cpp 85 | test/perftest.cpp 86 | test/perftest_no_simd.cpp 87 | test/perftest_string.cpp) 88 | 89 | if (${Catch_VER} EQUAL 2) 90 | message(STATUS "Used old target for catch2") 91 | if (NOT TARGET Catch2::Catch2WithMain) 92 | set(CATCH_LIBS_ALIASES Catch2::Catch2) 93 | endif() 94 | target_compile_definitions(fc_tests PRIVATE CATCH2_OLD) 95 | endif() 96 | 97 | target_include_directories(fc_tests PRIVATE ${CMAKE_CURRENT_LIST_DIR}) 98 | target_link_libraries(fc_tests PRIVATE ${CATCH_LIBS_ALIASES} BTree::BTree) 99 | 100 | if(MSVC) 101 | target_compile_options(fc_tests PRIVATE /W4 /WX /nologo /MDd /EHsc /std:c++latest /experimental:module) 102 | set(CMAKE_C_FLAGS_RELEASE "/Ox") 103 | set(CMAKE_CXX_FLAGS_RELEASE "/Ox") 104 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/Ox") 105 | else() 106 | set(CMAKE_C_FLAGS_RELEASE "-O3") 107 | set(CMAKE_CXX_FLAGS_RELEASE "-O3") 108 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3") 109 | if (FORCE_USE_SIMD) 110 | target_compile_options(fc_tests PRIVATE -Wall -Wextra -Wpedantic -Werror -mavx512f) 111 | else () 112 | target_compile_options(fc_tests PRIVATE -Wall -Wextra -Wpedantic -Werror -march=native -mtune=generic) 113 | endif () 114 | if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS_EQUAL 14.0.3.14030022) 115 | target_link_options(fc_tests PRIVATE -fexperimental-library) 116 | endif() 117 | if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS_EQUAL 14) 118 | target_link_options(fc_tests PRIVATE -fexperimental-library) 119 | endif() 120 | endif() 121 | 122 | include(CTest) 123 | include(Catch) 124 | catch_discover_tests(fc_tests 125 | EXTRA_ARGS --benchmark-samples=1) 126 | 127 | set(CPACK_SOURCE_IGNORE_FILES 128 | #git files 129 | "\\\\.git/" 130 | "\\\\.github/" 131 | # temporary files 132 | "\\\\.swp$" 133 | # backup files 134 | "~$" 135 | # eclipse files 136 | "\\\\.cdtproject$" 137 | "\\\\.cproject$" 138 | "\\\\.project$" 139 | "\\\\.settings/" 140 | # others 141 | "\\\\.#" 142 | "/#" 143 | "/build/" 144 | "/_build/" 145 | "/\\\\.git/" 146 | "Makefile\\\\.in$" 147 | ) 148 | 149 | include(GNUInstallDirs) 150 | 151 | install( 152 | TARGETS BTree 153 | EXPORT BTree_Targets 154 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} 155 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 156 | RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) 157 | 158 | include(CMakePackageConfigHelpers) 159 | write_basic_package_version_file( 160 | "BTreeConfigVersion.cmake" 161 | VERSION ${MAJOR}.${MINOR}.${PATCH} 162 | COMPATIBILITY SameMajorVersion) 163 | 164 | configure_package_config_file( 165 | "${PROJECT_SOURCE_DIR}/cmake/BTreeConfig.cmake.in" 166 | "${PROJECT_BINARY_DIR}/BTreeConfig.cmake" 167 | INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/BTree/cmake) 168 | 169 | install( 170 | EXPORT BTree_Targets 171 | FILE BTreeTargets.cmake 172 | NAMESPACE BTree:: 173 | DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/BTree/cmake) 174 | 175 | install(FILES "${PROJECT_BINARY_DIR}/BTreeConfig.cmake" 176 | "${PROJECT_BINARY_DIR}/BTreeConfigVersion.cmake" 177 | DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/BTree/cmake) 178 | 179 | install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/fc 180 | DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) 181 | 182 | set(CPACK_PACKAGE_NAME "BTree") 183 | set(CPACK_PACKAGE_VERSION "${MAJOR}.${MINOR}.${PATCH}") 184 | set(CPACK_PACKAGE_VENDOR "community") 185 | set(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}-noarch") 186 | set(CMAKE_PROJECT_HOMEPAGE_URL "https://github.com/bas524/BTree") 187 | set(CPACK_PACKAGE_CONTACT "bas524") 188 | set(CPACK_PACKAGE_MAINTAINER "bas524") 189 | set(CPACK_PACKAGE_DESCRIPTION "A general-purpose high-performance lightweight STL-like modern C++ B-Tree") 190 | 191 | set(CPACK_DEBIAN_PACKAGE_NAME ${CPACK_PACKAGE_NAME}) 192 | set(CPACK_DEBIAN_PACKAGE_VERSION ${CPACK_PACKAGE_VERSION}) 193 | set(CPACK_DEBIAN_PACKAGE_MAINTAINER ${CPACK_PACKAGE_MAINTAINER}) 194 | set(CPACK_DEBIAN_PACKAGE_DESCRIPTION ${CPACK_PACKAGE_DESCRIPTION}) 195 | set(CPACK_DEBIAN_PACKAGE_HOMEPAGE ${CMAKE_PROJECT_HOMEPAGE_URL}) 196 | set(CPACK_DEBIAN_PACKAGE_LICENSE "Apache-2.0") 197 | set(CPACK_DEBIAN_PACKAGE_DEPENDS "") 198 | 199 | set(CPACK_RPM_PACKAGE_NAME ${CPACK_PACKAGE_NAME}) 200 | set(CPACK_RPM_PACKAGE_VERSION ${CPACK_PACKAGE_VERSION}) 201 | set(CPACK_RPM_PACKAGE_MAINTAINER ${CPACK_PACKAGE_MAINTAINER}) 202 | set(CPACK_RPM_PACKAGE_DESCRIPTION ${CPACK_PACKAGE_DESCRIPTION}) 203 | set(CPACK_RPM_PACKAGE_URL ${CMAKE_PROJECT_HOMEPAGE_URL}) 204 | set(CPACK_RPM_PACKAGE_LICENSE "Apache-2.0") 205 | set(CPACK_RPM_PACKAGE_REQUIRES "") 206 | 207 | include(CPack) 208 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # B-Tree 2 | 3 | This library implements a general-purpose header-only STL-like B-Tree in C++, including supports for using it for memory-mapped disk files and fixed-size allocators. 4 | 5 | A B-Tree is a self-balancing tree data structure that maintains sorted data and allows searches, sequential access, insertions, and deletions in logarithmic time. Unlike other self-balancing binary search trees, the [B-tree](https://en.wikipedia.org/wiki/B-tree) is well suited for storage systems that read and write relatively large blocks of data, such as databases and file systems 6 | 7 | Just like ordered associative containers in the C++ standard library, key-value pairs can be supported and duplicates can be allowed. 8 | 9 | There are four specialized B-Tree classes: ```frozenca::BTreeSet```, ```frozenca::BTreeMultiSet```, ```frozenca::BTreeMap``` and ```frozenca::BTreeMultiMap```, which corresponds to ```std::set```, ```std::multiset```, ```std::map``` and ```std::multimap``` respectively. 10 | 11 | ## How to use 12 | 13 | This library is header-only, so no additional setup process is required beyond including the headers. 14 | 15 | Or 16 | 17 | For cmake projects: 18 | 19 | Install one of package BTree..rpm or BTree..deb or include this project into yours and then 20 | 21 | ```cmake 22 | find_package(BTree) 23 | #... 24 | target_link_libraries(${your_target} PRIVATE BTree::BTree) 25 | ``` 26 | 27 | 28 | ## Target OS/Compiler version 29 | 30 | This library aggressively uses C++20 features, and verified to work in gcc 11.2 and MSVC 19.32. 31 | 32 | POSIX and Windows operating systems are supported in order to use the memory-mapped disk file interface. 33 | 34 | There are currently no plans to support C++17 and earlier. 35 | 36 | ## Example usages 37 | 38 | Usage is very similar to the C++ standard library ordered associative containers (i.e. ```std::set``` and its friends) 39 | 40 | ```cpp 41 | #include "fc/btree.h" 42 | #include 43 | #include 44 | 45 | int main() { 46 | namespace fc = frozenca; 47 | fc::BTreeSet btree; 48 | 49 | btree.insert(3); 50 | btree.insert(4); 51 | btree.insert(2); 52 | btree.insert(1); 53 | btree.insert(5); 54 | 55 | // 1 2 3 4 5 56 | for (auto num : btree) { 57 | std::cout << num << ' '; 58 | } 59 | std::cout << '\n'; 60 | 61 | fc::BTreeMap strtree; 62 | 63 | strtree["asd"] = 3; 64 | strtree["a"] = 6; 65 | strtree["bbb"] = 9; 66 | strtree["asdf"] = 8; 67 | 68 | for (const auto &[k, v] : strtree) { 69 | std::cout << k << ' ' << v << '\n'; 70 | } 71 | 72 | strtree["asdf"] = 333; 73 | 74 | // 333 75 | std::cout << strtree["asdf"] << '\n'; 76 | 77 | strtree.emplace("asdfgh", 200); 78 | for (const auto &[k, v] : strtree) { 79 | std::cout << k << ' ' << v << '\n'; 80 | } 81 | } 82 | ``` 83 | 84 | You can refer more example usages in ```test/unittest.cpp```. 85 | 86 | Users can specify a fanout parameter for B-tree: the default is 64. 87 | 88 | ```cpp 89 | // btree with fanout 128 90 | fc::BTreeSet btree; 91 | ``` 92 | 93 | The smallest possible value for fanout is 2, where a B-Tree boils down to an [2-3-4 tree](https://en.wikipedia.org/wiki/2%E2%80%933%E2%80%934_tree) 94 | 95 | ## Supported operations 96 | 97 | Other than regular operations supported by ```std::set``` and its friends (```lower_bound()```, ```upper_bound()```, ```equal_range()``` and etc), the following operations are supported. 98 | 99 | ```tree.count(const key_type& key)``` : Returns the number of elements in the tree for their key is equivalent to ```key```. Time complexity: ```O(log n)``` 100 | 101 | ```tree.kth(std::ptrdiff_t k)``` : Returns the k-th element in the tree as 0-based index. Time complexity: ```O(log n)``` 102 | 103 | ```tree.order(const_iterator_type iter)``` : Returns the rank of the element in the iterator in the tree as 0-based index. Time complexity: ```O(log n)``` 104 | 105 | ```tree.enumerate(const key_type& a, const key_type& b)``` : Range query. Returns the range of values for their key in ```[a, b]```. Time complexity: ```O(log n)``` 106 | 107 | ```tree.insert_range(ForwardIter first, ForwardIter last)``` : Inserts the elements in ```[first, last)```. The range version also exists. Time complexity: ```O(k log k + log n)``` if all of elements in the range can be fit between two elements in the tree, otherwise ```O(k log n)``` 108 | 109 | ```tree.erase_range(const key_type& a, const key_type&)``` : Erases the elements for their key in ```[a, b]```. Time complexity: ```O(log n) + O(k)``` (NOT ```O(k log n)```) 110 | 111 | ```frozenca::join(Tree&& tree1, Tree&& tree2)``` : Joins two trees to a single tree. The largest key in ```tree1``` should be less than or equal to the smallest key in ```tree2```. Time complexity: ```O(log n)``` 112 | 113 | ```frozenca::join(Tree&& tree1, value_type val, Tree&& tree2)``` : Joins two trees to a single tree. The largest key in ```tree1``` should be less than or equal to the key of ```val``` and the smallest key in ```tree2``` should be greater than or equal to the key of ```val```. Time complexity: ```O(1 + diff_height)``` 114 | 115 | ```frozenca::split(Tree&& tree, key_type key)``` : Splits a tree to two trees, so that the first tree contains keys less than ```key```, and the second tree contains keys greater than ```key```. Time complexity: ```O(log n)``` 116 | 117 | ```frozenca::split(Tree&& tree, key_type key1, key_type key2)``` : Splits a tree to two trees, so that the first tree contains keys less than ```key1```, and the second tree contains keys greater than ```key2```. ```key2``` must be greater than or equal to ```key1```. Time complexity: ```O(log n) + O(k)``` 118 | 119 | ## Iterators 120 | STL compatible iterators are fully supported. (both ```const``` and non-```const```) However, unlike ```std::set``` and its friends, all insert and erase operations can invalidate iterators. This is because ```std::set``` and its friends are node-based containers where a single node can only have a single key, but a node in B-Trees can have multiple keys. 121 | 122 | ## Concurrency 123 | 124 | Currently, thread safety is not guaranteed. Lock-free support is the first TODO, but contributions are welcome if you're interested. 125 | 126 | ## Linear search vs Binary search 127 | 128 | The core operation for B-Tree is a search in the sorted key array of each node. For small arrays with primitive key types that have relatively cheap comparisons, linear search is often better than binary search. This threshold may vary by compiler by a big margin. 129 | 130 | If you use Clang, I recommend that you set this variable to 1. For gcc users, it seems better not to change the variable (may be changed by future gcc optimizations) 131 | https://github.com/frozenca/BTree/blob/7083e8034b5905552cc6a3b8277452c56c05d587/fc_btree.h#L22 132 | 133 | ## SIMD Operation 134 | 135 | When keys are signed integers or floating point types, if your machine supports AVX-512, you can activate SIMD intrinsics to speed up B-Tree operations, by setting this variable to 1: 136 | https://github.com/frozenca/BTree/blob/3498a53e75e916015561008cf91fecc3f7df69d1/fc_btree.h#L4 137 | (Inspired from: [Static B-Trees](https://en.algorithmica.org/hpc/data-structures/s-tree/)) 138 | 139 | ## Disk B-Tree 140 | 141 | You can use a specialized variant that utilizes memory-mapped disk files and an associated fixed-size allocator. You have to include ```fc_disk_btree.h```, ```fc_disk_fixed_alloc.h``` and ```fc_mmfile.h``` to use it. 142 | 143 | For this variant, supported types have stricter type constraints: it should satisfy ```std::trivially_copyable_v```, and its alignment should at least be the alignment of the pointer type in the machine (for both key type and value type for key-value pairs). 144 | 145 | The following code initializes a ```frozenca::DiskBTreeSet```, which generates a memory-mapped disk file ```database.bin``` and uses it, with an initial byte size of 32 megabytes. If the third argument is ```true```, it will destroy the existing file and create a new one (default is ```false```). You can't extend the pool size of the memory-mapped disk file once you initialized (doing so invalidates all pointers in the associated allocator). 146 | 147 | ```cpp 148 | fc::DiskBTreeSet btree("database.bin", 1UL << 25UL, true); 149 | ``` 150 | 151 | ## Serialization and deserialization 152 | 153 | Serialization/deserialization of B-Trees via byte streams using ```operator<<``` and ```operator>>``` is also supported when key types (and value types, if present) meet the above requirements for disk B-Tree. You can refer how to do serialization/deserialization in ```test/rwtest.cpp```. 154 | 155 | ## Performance 156 | 157 | Using a performance test code similar with ```test/perftest.cpp```, that inserts/retrieves/erases 1 million ```std::int64_t``` in random order, I see the following results in my machine (gcc 11.2, -O3, 200 times repeated per each target), compared to ```std::set``` and Google's B-Tree implementation(https://code.google.com/archive/p/cpp-btree/): 158 | 159 | ``` 160 | Balanced tree test 161 | Warming up complete... 162 | frozenca::BTreeSet test (fanout 64 - default, SIMD) 163 | Time to insert 1000000 elements: Average : 175.547ms, Stdev : 8.65575ms, 95% : 189.553ms, 164 | Time to lookup 1000000 elements: Average : 197.75ms, Stdev : 7.75456ms, 95% : 208.783ms, 165 | Time to erase 1000000 elements: Average : 211.274ms, Stdev : 10.3499ms, 95% : 225.221ms, 166 | 167 | frozenca::BTreeSet test (fanout 96, SIMD) 168 | Time to insert 1000000 elements: Average : 176.432ms, Stdev : 9.12931ms, 95% : 192.688ms, 169 | Time to lookup 1000000 elements: Average : 194.997ms, Stdev : 11.3563ms, 95% : 205.048ms, 170 | Time to erase 1000000 elements: Average : 212.86ms, Stdev : 11.3598ms, 95% : 228.145ms, 171 | 172 | frozenca::DiskBTreeSet test (fanout 128, SIMD) 173 | Time to insert 1000000 elements: Average : 187.797ms, Stdev : 8.69872ms, 95% : 202.318ms, 174 | Time to lookup 1000000 elements: Average : 200.799ms, Stdev : 7.10905ms, 95% : 211.436ms, 175 | Time to erase 1000000 elements: Average : 216.105ms, Stdev : 6.83771ms, 95% : 228.9ms, 176 | 177 | frozenca::BTreeSet test (fanout 128, SIMD) 178 | Time to insert 1000000 elements: Average : 189.536ms, Stdev : 15.3073ms, 95% : 221.393ms, 179 | Time to lookup 1000000 elements: Average : 204.741ms, Stdev : 17.8811ms, 95% : 232.494ms, 180 | Time to erase 1000000 elements: Average : 219.17ms, Stdev : 20.6449ms, 95% : 244.232ms, 181 | 182 | frozenca::BTreeSet test (fanout 64, uint64, don't use SIMD) 183 | Time to insert 1000000 elements: Average : 204.187ms, Stdev : 57.3915ms, 95% : 222.939ms, 184 | Time to lookup 1000000 elements: Average : 221.049ms, Stdev : 25.3429ms, 95% : 245.708ms, 185 | Time to erase 1000000 elements: Average : 249.832ms, Stdev : 52.1106ms, 95% : 288.095ms, 186 | 187 | std::set test 188 | Time to insert 1000000 elements: Average : 907.104ms, Stdev : 43.7566ms, 95% : 966.12ms, 189 | Time to lookup 1000000 elements: Average : 961.859ms, Stdev : 30.1132ms, 95% : 1019.59ms, 190 | Time to erase 1000000 elements: Average : 990.027ms, Stdev : 37.1807ms, 95% : 1049.58ms, 191 | 192 | Google btree::btree_set test (fanout 64) 193 | Time to insert 1000000 elements: Average : 425.071ms, Stdev : 13.117ms, 95% : 434.819ms, 194 | Time to lookup 1000000 elements: Average : 377.009ms, Stdev : 15.2407ms, 95% : 385.736ms, 195 | Time to erase 1000000 elements: Average : 421.514ms, Stdev : 17.3882ms, 95% : 432.955ms, 196 | 197 | Google btree::btree_set test (fanout 256 - default value) 198 | Time to insert 1000000 elements: Average : 251.597ms, Stdev : 14.3492ms, 95% : 289.579ms, 199 | Time to lookup 1000000 elements: Average : 235.204ms, Stdev : 11.8999ms, 95% : 255.495ms, 200 | Time to erase 1000000 elements: Average : 250.782ms, Stdev : 12.1752ms, 95% : 270.575ms, 201 | ``` 202 | 203 | For 1 million ```std::string```s with length 1~50, I see the following results in my machine: 204 | ``` 205 | frozenca::BTreeSet test (fanout 64 - default, std::string) 206 | Time to insert 1000000 elements: Average : 1519.62ms, Stdev : 81.3793ms, 95% : 1685.13ms, 207 | Time to lookup 1000000 elements: Average : 1188.33ms, Stdev : 83.8154ms, 95% : 1392.47ms, 208 | Time to erase 1000000 elements: Average : 1570.44ms, Stdev : 93.771ms, 95% : 1747.73ms, 209 | 210 | frozenca::BTreeSet test (fanout 128, std::string) 211 | Time to insert 1000000 elements: Average : 1774.12ms, Stdev : 41.601ms, 95% : 1812.62ms, 212 | Time to lookup 1000000 elements: Average : 1089.02ms, Stdev : 22.8206ms, 95% : 1127.83ms, 213 | Time to erase 1000000 elements: Average : 1670.09ms, Stdev : 24.2791ms, 95% : 1711.33ms, 214 | 215 | std::set test (std::string) 216 | Time to insert 1000000 elements: Average : 1662.92ms, Stdev : 178.644ms, 95% : 1861.37ms, 217 | Time to lookup 1000000 elements: Average : 1666.16ms, Stdev : 127.095ms, 95% : 1845.49ms, 218 | Time to erase 1000000 elements: Average : 1639.79ms, Stdev : 82.7256ms, 95% : 1770.9ms, 219 | ``` 220 | 221 | 222 | ## Sanity check and unit test 223 | 224 | If you want to contribute and test the code, pay attention and use macro _CONTROL_IN_TEST, which will do full sanity checks on the entire tree: 225 | 226 | https://github.com/frozenca/BTree/blob/adf3c3309f45a65010d767df674c232c12f5c00a/fc_btree.h#L350 227 | https://github.com/frozenca/BTree/blob/adf3c3309f45a65010d767df674c232c12f5c00a/fc_btree.h#L531-#L532 228 | 229 | and by running ```test/unittest.cpp``` you can verify basic operations. 230 | 231 | 232 | ## License 233 | 234 | This library is licensed under either of Apache License Version 2.0 with LLVM Exceptions (LICENSE-Apache2-LLVM or https://llvm.org/foundation/relicensing/LICENSE.txt) or Boost Software License Version 1.0 (LICENSE-Boost or https://www.boost.org/LICENSE_1_0.txt). 235 | -------------------------------------------------------------------------------- /include/fc/btree.h: -------------------------------------------------------------------------------- 1 | #ifndef __FC_BTREE_H__ 2 | #define __FC_BTREE_H__ 3 | 4 | #ifndef FC_USE_SIMD 5 | #define FC_USE_SIMD 0 6 | #endif // FC_USE_SIMD 7 | 8 | #ifndef FC_PREFER_BINARY_SEARCH 9 | #define FC_PREFER_BINARY_SEARCH 0 10 | #endif //FC_PREFER_BINARY_SEARCH 11 | 12 | #if FC_USE_SIMD 13 | #include "fc/comp.h" 14 | #ifdef _MSC_VER 15 | #pragma warning(disable : 4324) 16 | #endif // MSC_VER 17 | #endif // FC_USE_SIMD 18 | 19 | #include "fc/details.h" 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | namespace frozenca { 39 | 40 | template struct BTreePair { 41 | K first; 42 | V second; 43 | 44 | BTreePair(K &&k, V &&v): first(std::forward(k)), second(std::forward(v)) {} 45 | 46 | BTreePair() = default; 47 | 48 | BTreePair(K &&k): first(std::forward(k)), second() {} 49 | 50 | BTreePair(V &&v): first(), second(std::forward(v)) {} 51 | 52 | operator std::pair() noexcept { return {first, second}; } 53 | 54 | friend bool operator==(const BTreePair &lhs, const BTreePair &rhs) noexcept { 55 | return lhs.first == rhs.first && lhs.second == rhs.second; 56 | } 57 | 58 | friend bool operator!=(const BTreePair &lhs, const BTreePair &rhs) noexcept { 59 | return !(lhs == rhs); 60 | } 61 | }; 62 | 63 | template struct TreePairRef { using type = T &; }; 64 | 65 | template struct TreePairRef> { 66 | using type = std::pair; 67 | }; 68 | 69 | template using PairRefType = typename TreePairRef::type; 70 | 71 | template 72 | bool operator==(const BTreePair &lhs, 73 | const PairRefType> &rhs) noexcept { 74 | return lhs.first == rhs.first && lhs.second == rhs.second; 75 | } 76 | 77 | template 78 | bool operator!=(const BTreePair &lhs, 79 | const PairRefType> &rhs) noexcept { 80 | return !(lhs == rhs); 81 | } 82 | 83 | template 84 | bool operator==(const PairRefType> &lhs, 85 | const BTreePair &rhs) noexcept { 86 | return rhs == lhs; 87 | } 88 | 89 | template 90 | bool operator!=(const PairRefType> &lhs, 91 | const BTreePair &rhs) noexcept { 92 | return rhs != lhs; 93 | } 94 | 95 | template struct Projection { 96 | const auto &operator()(const V &value) const noexcept { return value.first; } 97 | }; 98 | 99 | template struct ProjectionIter { 100 | auto &operator()(V &iter_ref) noexcept { return iter_ref.first; } 101 | 102 | const auto &operator()(const V &iter_ref) const noexcept { 103 | return iter_ref.first; 104 | } 105 | }; 106 | 107 | template class Alloc> 109 | requires(Fanout >= 2) class BTreeBase; 110 | 111 | template class AllocTemplate> 113 | struct join_helper; 114 | 115 | template class AllocTemplate, typename T> 117 | struct split_helper; 118 | 119 | template class AllocTemplate> 121 | requires(Fanout >= 2) class BTreeBase { 122 | 123 | struct Node; 124 | using Alloc = AllocTemplate; 125 | 126 | struct Deleter { 127 | [[no_unique_address]] Alloc alloc_; 128 | Deleter(const Alloc &alloc) : alloc_{alloc} {} 129 | 130 | template void operator()(T *node) noexcept { 131 | alloc_.deallocate(node, 1); 132 | } 133 | }; 134 | 135 | // invariant: V is either K or pair for some Value type. 136 | static constexpr bool is_set_ = std::is_same_v; 137 | 138 | static constexpr bool is_disk_ = DiskAllocable; 139 | 140 | static constexpr auto disk_max_nkeys = static_cast(2 * Fanout); 141 | 142 | static constexpr bool use_linsearch_ = 143 | #if FC_PREFER_BINARY_SEARCH 144 | std::is_arithmetic_v && (Fanout <= 32); 145 | #else 146 | std::is_arithmetic_v && (Fanout <= 128); 147 | #endif // FC_PREFER_BINARY_SEARCH 148 | 149 | static constexpr bool CompIsLess = std::is_same_v || 150 | std::is_same_v>; 151 | static constexpr bool CompIsGreater = 152 | std::is_same_v || 153 | std::is_same_v>; 154 | 155 | static constexpr bool use_simd_ = 156 | #if FC_USE_SIMD 157 | is_set_ && CanUseSimd && (Fanout % (sizeof(K) == 4 ? 8 : 4) == 0) && 158 | (Fanout <= 128) && (CompIsLess || CompIsGreater); 159 | #else 160 | false; 161 | #endif // FC_USE_SIMD 162 | 163 | #if FC_USE_SIMD 164 | struct alignas(64) Node { 165 | #else 166 | struct Node { 167 | #endif // FC_USE_SIND 168 | using keys_type = 169 | std::conditional_t, 170 | std::vector>; 171 | 172 | // invariant: except root, t - 1 <= #(key) <= 2 * t - 1 173 | // invariant: for root, 0 <= #(key) <= 2 * t - 1 174 | // invariant: keys are sorted 175 | // invariant: for internal nodes, t <= #(child) == (#(key) + 1)) <= 2 * t 176 | // invariant: for root, 0 <= #(child) == (#(key) + 1)) <= 2 * t 177 | // invariant: for leaves, 0 == #(child) 178 | // invariant: child_0 <= key_0 <= child_1 <= ... <= key_(N - 1) <= child_N 179 | keys_type keys_; 180 | Node *parent_ = nullptr; 181 | attr_t size_ = 0; // number of keys in the subtree (not keys in this node) 182 | attr_t index_ = 0; 183 | attr_t height_ = 0; 184 | attr_t num_keys_ = 185 | 0; // number of keys in this node, used only for disk variant 186 | std::vector, 187 | std::unique_ptr>> 188 | children_; 189 | 190 | Node() { keys_.reserve(disk_max_nkeys); } 191 | 192 | // can throw bad_alloc 193 | Node() requires(is_disk_) { 194 | if constexpr (use_simd_) { 195 | keys_.fill(std::numeric_limits::max()); 196 | } 197 | } 198 | 199 | Node(const Node &node) = delete; 200 | Node &operator=(const Node &node) = delete; 201 | Node(Node &&node) = delete; 202 | Node &operator=(Node &&node) = delete; 203 | 204 | [[nodiscard]] bool is_leaf() const noexcept { return children_.empty(); } 205 | 206 | [[nodiscard]] bool is_full() const noexcept { 207 | if constexpr (is_disk_) { 208 | return num_keys_ == 2 * Fanout - 1; 209 | } else { 210 | return std::ssize(keys_) == 2 * Fanout - 1; 211 | } 212 | } 213 | 214 | [[nodiscard]] bool can_take_key() const noexcept { 215 | if constexpr (is_disk_) { 216 | return num_keys_ > Fanout - 1; 217 | } else { 218 | return std::ssize(keys_) > Fanout - 1; 219 | } 220 | } 221 | 222 | [[nodiscard]] bool has_minimal_keys() const noexcept { 223 | if constexpr (is_disk_) { 224 | return parent_ && num_keys_ == Fanout - 1; 225 | } else { 226 | return parent_ && std::ssize(keys_) == Fanout - 1; 227 | } 228 | } 229 | 230 | [[nodiscard]] bool empty() const noexcept { 231 | if constexpr (is_disk_) { 232 | return num_keys_ == 0; 233 | } else { 234 | return keys_.empty(); 235 | } 236 | } 237 | 238 | void clear_keys() noexcept { 239 | if constexpr (is_disk_) { 240 | num_keys_ = 0; 241 | } else { 242 | keys_.clear(); 243 | } 244 | } 245 | 246 | [[nodiscard]] attr_t nkeys() const noexcept { 247 | if constexpr (is_disk_) { 248 | return num_keys_; 249 | } else { 250 | return static_cast(std::ssize(keys_)); 251 | } 252 | } 253 | }; 254 | 255 | struct BTreeNonConstIterTraits { 256 | using difference_type = attr_t; 257 | using value_type = V; 258 | using pointer = V *; 259 | using reference = V &; 260 | using iterator_category = std::bidirectional_iterator_tag; 261 | using iterator_concept = iterator_category; 262 | 263 | static reference make_ref(value_type &val) noexcept { return val; } 264 | }; 265 | 266 | struct BTreeConstIterTraits { 267 | using difference_type = attr_t; 268 | using value_type = V; 269 | using pointer = const V *; 270 | using reference = const V &; 271 | using iterator_category = std::bidirectional_iterator_tag; 272 | using iterator_concept = iterator_category; 273 | 274 | static reference make_ref(const value_type &val) noexcept { return val; } 275 | }; 276 | 277 | struct BTreeRefIterTraits { 278 | using difference_type = attr_t; 279 | using value_type = V; 280 | using pointer = V *; 281 | using reference = PairRefType; 282 | using iterator_category = std::bidirectional_iterator_tag; 283 | using iterator_concept = iterator_category; 284 | 285 | static reference make_ref(value_type &val) noexcept { 286 | return {std::cref(val.first), std::ref(val.second)}; 287 | } 288 | }; 289 | 290 | template struct BTreeIterator { 291 | using difference_type = typename IterTraits::difference_type; 292 | using value_type = typename IterTraits::value_type; 293 | using pointer = typename IterTraits::pointer; 294 | using reference = typename IterTraits::reference; 295 | using iterator_category = typename IterTraits::iterator_category; 296 | using iterator_concept = typename IterTraits::iterator_concept; 297 | 298 | Node *node_ = nullptr; 299 | attr_t index_; 300 | 301 | BTreeIterator() noexcept = default; 302 | 303 | BTreeIterator(Node *node, attr_t i) noexcept : node_{node}, index_{i} { 304 | assert(node_ && i >= 0 && i <= node_->nkeys()); 305 | } 306 | 307 | template 308 | BTreeIterator(const BTreeIterator &other) noexcept 309 | : BTreeIterator(other.node_, other.index_) {} 310 | 311 | reference operator*() const noexcept { 312 | return IterTraits::make_ref(node_->keys_[index_]); 313 | } 314 | 315 | pointer operator->() const noexcept { return &(node_->keys_[index_]); } 316 | 317 | // useful remark: 318 | // incrementing/decrementing iterator in an internal node will always 319 | // produce an iterator in a leaf node, 320 | // incrementing/decrementing iterator in a leaf node will always produce 321 | // an iterator in a leaf node for non-boundary keys, 322 | // an iterator in an internal node for boundary keys 323 | 324 | void climb() noexcept { 325 | while (node_->parent_ && index_ == node_->nkeys()) { 326 | index_ = node_->index_; 327 | node_ = node_->parent_; 328 | } 329 | } 330 | 331 | void dig() noexcept { 332 | while (!node_->children_.empty()) { 333 | auto id = index_; 334 | index_ = ssize(node_->children_[id]->keys_); 335 | node_ = node_->children_[id].get(); 336 | } 337 | } 338 | 339 | void increment() noexcept { 340 | // we don't do past to end() check for efficiency 341 | if (!node_->is_leaf()) { 342 | node_ = leftmost_leaf(node_->children_[index_ + 1].get()); 343 | index_ = 0; 344 | } else { 345 | ++index_; 346 | while (node_->parent_ && index_ == node_->nkeys()) { 347 | index_ = node_->index_; 348 | node_ = node_->parent_; 349 | } 350 | } 351 | } 352 | 353 | void decrement() noexcept { 354 | if (!node_->is_leaf()) { 355 | node_ = rightmost_leaf(node_->children_[index_].get()); 356 | index_ = node_->nkeys() - 1; 357 | } else if (index_ > 0) { 358 | --index_; 359 | } else { 360 | while (node_->parent_ && node_->index_ == 0) { 361 | node_ = node_->parent_; 362 | } 363 | if (node_->index_ > 0) { 364 | index_ = node_->index_ - 1; 365 | node_ = node_->parent_; 366 | } 367 | } 368 | } 369 | 370 | bool verify() noexcept { 371 | // Uncomment this line for testing 372 | // assert(!node_->parent_ || (index_ < node_->nkeys())); 373 | return true; 374 | } 375 | 376 | BTreeIterator &operator++() noexcept { 377 | increment(); 378 | assert(verify()); 379 | return *this; 380 | } 381 | 382 | BTreeIterator operator++(int) noexcept { 383 | BTreeIterator temp = *this; 384 | increment(); 385 | assert(verify()); 386 | return temp; 387 | } 388 | 389 | BTreeIterator &operator--() noexcept { 390 | decrement(); 391 | assert(verify()); 392 | return *this; 393 | } 394 | 395 | BTreeIterator operator--(int) noexcept { 396 | BTreeIterator temp = *this; 397 | decrement(); 398 | assert(verify()); 399 | return temp; 400 | } 401 | 402 | friend bool operator==(const BTreeIterator &x, 403 | const BTreeIterator &y) noexcept { 404 | return x.node_ == y.node_ && x.index_ == y.index_; 405 | } 406 | 407 | friend bool operator!=(const BTreeIterator &x, 408 | const BTreeIterator &y) noexcept { 409 | return !(x == y); 410 | } 411 | }; 412 | 413 | public: 414 | using key_type = K; 415 | using value_type = V; 416 | using reference_type = std::conditional_t>; 417 | using const_reference_type = const V &; 418 | using node_type = Node; 419 | using size_type = std::size_t; 420 | using difference_type = attr_t; 421 | using allocator_type = Alloc; 422 | using deleter_type = Deleter; 423 | using nodeptr_type = 424 | std::conditional_t, std::unique_ptr, 425 | std::unique_ptr>; 426 | using Proj = 427 | std::conditional_t>; 428 | using ProjIter = std::conditional_t>>; 430 | 431 | static_assert( 432 | std::indirect_strict_weak_order< 433 | Comp, std::projected>, Proj>>); 434 | 435 | // invariant: K cannot be mutated 436 | // so if V is K, uses a const iterator. 437 | // if V is BTreePair, uses a non-const iterator (but only value can 438 | // be mutated) 439 | private: 440 | using nonconst_iterator_type = BTreeIterator; 441 | 442 | public: 443 | using iterator_type = BTreeIterator< 444 | std::conditional_t>; 445 | using const_iterator_type = BTreeIterator; 446 | using reverse_iterator_type = std::reverse_iterator; 447 | using const_reverse_iterator_type = 448 | std::reverse_iterator; 449 | 450 | private: 451 | [[no_unique_address]] Alloc alloc_; 452 | nodeptr_type root_; 453 | const_iterator_type begin_; 454 | 455 | protected: 456 | nodeptr_type make_node() { 457 | if constexpr (is_disk_) { 458 | auto buf = alloc_.allocate(1); 459 | Node *node = new (buf) Node(); 460 | return nodeptr_type(node, deleter_type(alloc_)); 461 | } else { 462 | return std::make_unique(); 463 | } 464 | } 465 | 466 | public: 467 | BTreeBase(const Alloc &alloc = Alloc{}) 468 | : alloc_{alloc}, root_(make_node()), begin_{root_.get(), 0} {} 469 | 470 | BTreeBase(std::initializer_list init, 471 | const Alloc &alloc = Alloc{}) 472 | : BTreeBase(alloc) { 473 | for (auto val : init) { 474 | insert(std::move(val)); 475 | } 476 | } 477 | 478 | BTreeBase(const BTreeBase &other) = delete; 479 | BTreeBase &operator=(const BTreeBase &other) = delete; 480 | BTreeBase(BTreeBase &&other) noexcept = default; 481 | BTreeBase &operator=(BTreeBase &&other) noexcept = default; 482 | 483 | void swap(BTreeBase &other) noexcept { 484 | std::swap(alloc_, other.alloc_); 485 | std::swap(root_, other.root_); 486 | std::swap(begin_, other.begin_); 487 | } 488 | 489 | bool verify(const Node *node) const { 490 | // invariant: node never null 491 | assert(node); 492 | 493 | // invariant: except root, t - 1 <= #(key) <= 2 * t - 1 494 | assert(!node->parent_ || 495 | (node->nkeys() >= Fanout - 1 && node->nkeys() <= 2 * Fanout - 1)); 496 | 497 | // invariant: keys are sorted 498 | assert(std::ranges::is_sorted(node->keys_.begin(), 499 | node->keys_.begin() + node->nkeys(), Comp{}, 500 | Proj{})); 501 | 502 | // invariant: for internal nodes, t <= #(child) == (#(key) + 1)) <= 2 * t 503 | assert(!node->parent_ || node->is_leaf() || 504 | (std::ssize(node->children_) >= Fanout && 505 | std::ssize(node->children_) == node->nkeys() + 1 && 506 | std::ssize(node->children_) <= 2 * Fanout)); 507 | 508 | // index check 509 | assert(!node->parent_ || 510 | node == node->parent_->children_[node->index_].get()); 511 | 512 | // invariant: child_0 <= key_0 <= child_1 <= ... <= key_(N - 1) <= 513 | // child_N 514 | if (!node->is_leaf()) { 515 | auto num_keys = node->nkeys(); 516 | 517 | for (attr_t i = 0; i < node->nkeys(); ++i) { 518 | assert(node->children_[i]); 519 | assert(!Comp{}( 520 | Proj{}(node->keys_[i]), 521 | Proj{}( 522 | node->children_[i]->keys_[node->children_[i]->nkeys() - 1]))); 523 | assert(!Comp{}(Proj{}(node->children_[i + 1]->keys_[0]), 524 | Proj{}(node->keys_[i]))); 525 | // parent check 526 | assert(node->children_[i]->parent_ == node); 527 | // recursive check 528 | assert(verify(node->children_[i].get())); 529 | assert(node->height_ == node->children_[i]->height_ + 1); 530 | num_keys += node->children_[i]->size_; 531 | } 532 | // parent check 533 | assert(node->children_.back()->parent_ == node); 534 | assert(verify(node->children_.back().get())); 535 | assert(node->height_ == node->children_.back()->height_ + 1); 536 | num_keys += node->children_.back()->size_; 537 | assert(node->size_ == num_keys); 538 | } else { 539 | assert(node->size_ == node->nkeys()); 540 | assert(node->height_ == 0); 541 | } 542 | 543 | return true; 544 | } 545 | 546 | [[nodiscard]] bool verify() const { 547 | // Uncomment these lines for testing 548 | #ifdef _CONTROL_IN_TEST 549 | assert(begin_ == const_iterator_type(leftmost_leaf(root_.get()), 0)); 550 | assert(verify(root_.get())); 551 | #endif 552 | return true; 553 | } 554 | 555 | [[nodiscard]] iterator_type begin() noexcept { return begin_; } 556 | 557 | [[nodiscard]] const_iterator_type begin() const noexcept { 558 | return const_iterator_type(begin_); 559 | } 560 | 561 | [[nodiscard]] const_iterator_type cbegin() const noexcept { 562 | return const_iterator_type(begin_); 563 | } 564 | 565 | [[nodiscard]] iterator_type end() noexcept { 566 | return iterator_type(root_.get(), root_->nkeys()); 567 | } 568 | 569 | [[nodiscard]] const_iterator_type end() const noexcept { 570 | return const_iterator_type(root_.get(), root_->nkeys()); 571 | } 572 | 573 | [[nodiscard]] const_iterator_type cend() const noexcept { 574 | return const_iterator_type(root_.get(), root_->nkeys()); 575 | } 576 | 577 | [[nodiscard]] reverse_iterator_type rbegin() noexcept { 578 | return reverse_iterator_type(begin()); 579 | } 580 | 581 | [[nodiscard]] const_reverse_iterator_type rbegin() const noexcept { 582 | return const_reverse_iterator_type(begin()); 583 | } 584 | 585 | [[nodiscard]] const_reverse_iterator_type crbegin() const noexcept { 586 | return const_reverse_iterator_type(cbegin()); 587 | } 588 | 589 | [[nodiscard]] reverse_iterator_type rend() noexcept { 590 | return reverse_iterator_type(end()); 591 | } 592 | 593 | [[nodiscard]] const_reverse_iterator_type rend() const noexcept { 594 | return const_reverse_iterator_type(end()); 595 | } 596 | 597 | [[nodiscard]] const_reverse_iterator_type crend() const noexcept { 598 | return const_reverse_iterator_type(cend()); 599 | } 600 | 601 | [[nodiscard]] bool empty() const noexcept { return root_->size_ == 0; } 602 | 603 | [[nodiscard]] size_type size() const noexcept { 604 | return static_cast(root_->size_); 605 | } 606 | 607 | [[nodiscard]] attr_t height() const noexcept { return root_->height_; } 608 | 609 | protected: 610 | [[nodiscard]] Node *get_root() noexcept { return root_.get(); } 611 | 612 | [[nodiscard]] Node *get_root() const noexcept { return root_.get(); } 613 | 614 | public: 615 | void clear() { 616 | root_ = make_node(); 617 | begin_ = iterator_type(root_.get(), 0); 618 | } 619 | 620 | protected: 621 | static Node *rightmost_leaf(Node *curr) noexcept { 622 | while (curr && !curr->is_leaf()) { 623 | curr = curr->children_[std::ssize(curr->children_) - 1].get(); 624 | } 625 | return curr; 626 | } 627 | 628 | static const Node *rightmost_leaf(const Node *curr) noexcept { 629 | while (curr && !curr->is_leaf()) { 630 | curr = curr->children_[std::ssize(curr->children_) - 1].get(); 631 | } 632 | return curr; 633 | } 634 | 635 | static Node *leftmost_leaf(Node *curr) noexcept { 636 | while (curr && !curr->is_leaf()) { 637 | curr = curr->children_[0].get(); 638 | } 639 | return curr; 640 | } 641 | 642 | static const Node *leftmost_leaf(const Node *curr) noexcept { 643 | while (curr && !curr->is_leaf()) { 644 | curr = curr->children_[0].get(); 645 | } 646 | return curr; 647 | } 648 | 649 | void promote_root_if_necessary() { 650 | if (root_->empty()) { 651 | assert(std::ssize(root_->children_) == 1); 652 | root_ = std::move(root_->children_[0]); 653 | root_->index_ = 0; 654 | root_->parent_ = nullptr; 655 | } 656 | } 657 | 658 | void set_begin() { begin_ = iterator_type(leftmost_leaf(root_.get()), 0); } 659 | 660 | // node brings a key from parent 661 | // parent brings a key from right sibling 662 | // node brings a child from right sibling 663 | void left_rotate(Node *node) { 664 | auto parent = node->parent_; 665 | assert(node && parent && parent->children_[node->index_].get() == node && 666 | node->index_ + 1 < std::ssize(parent->children_) && 667 | parent->children_[node->index_ + 1]->can_take_key()); 668 | auto sibling = parent->children_[node->index_ + 1].get(); 669 | 670 | if constexpr (is_disk_) { 671 | node->keys_[node->num_keys_] = parent->keys_[node->index_]; 672 | node->num_keys_++; 673 | parent->keys_[node->index_] = sibling->keys_[0]; 674 | std::memmove(sibling->keys_.data(), sibling->keys_.data() + 1, 675 | (sibling->num_keys_ - 1) * sizeof(V)); 676 | sibling->num_keys_--; 677 | if constexpr (use_simd_) { 678 | sibling->keys_[sibling->num_keys_] = std::numeric_limits::max(); 679 | } 680 | } else { 681 | node->keys_.push_back(std::move(parent->keys_[node->index_])); 682 | parent->keys_[node->index_] = std::move(sibling->keys_.front()); 683 | std::shift_left(sibling->keys_.begin(), sibling->keys_.end(), 1); 684 | sibling->keys_.pop_back(); 685 | } 686 | 687 | node->size_++; 688 | sibling->size_--; 689 | 690 | if (!node->is_leaf()) { 691 | const auto orphan_size = sibling->children_.front()->size_; 692 | node->size_ += orphan_size; 693 | sibling->size_ -= orphan_size; 694 | 695 | sibling->children_.front()->parent_ = node; 696 | sibling->children_.front()->index_ = 697 | static_cast(std::ssize(node->children_)); 698 | node->children_.push_back(std::move(sibling->children_.front())); 699 | std::shift_left(sibling->children_.begin(), sibling->children_.end(), 1); 700 | sibling->children_.pop_back(); 701 | for (auto &&child : sibling->children_) { 702 | child->index_--; 703 | } 704 | } 705 | } 706 | 707 | // left_rotate() * n 708 | void left_rotate_n(Node *node, attr_t n) { 709 | assert(n >= 1); 710 | if (n == 1) { 711 | left_rotate(node); 712 | return; 713 | } 714 | 715 | auto parent = node->parent_; 716 | assert(node && parent && parent->children_[node->index_].get() == node && 717 | node->index_ + 1 < std::ssize(parent->children_)); 718 | auto sibling = parent->children_[node->index_ + 1].get(); 719 | assert(sibling->nkeys() >= (Fanout - 1) + n); 720 | 721 | if constexpr (is_disk_) { 722 | // brings one key from parent 723 | node->keys_[node->num_keys_] = parent->keys_[node->index_]; 724 | node->num_keys_++; 725 | // brings n - 1 keys from sibling 726 | std::memcpy(node->keys_.data() + node->num_keys_, sibling->keys_.data(), 727 | (n - 1) * sizeof(V)); 728 | // parent brings one key from sibling 729 | parent->keys_[node->index_] = sibling->keys_[n - 1]; 730 | std::memmove(sibling->keys_.data(), sibling->keys_.data() + n, 731 | (sibling->num_keys_ - n) * sizeof(V)); 732 | sibling->num_keys_ -= n; 733 | if constexpr (use_simd_) { 734 | for (attr_t k = 0; k < n; ++k) { 735 | sibling->keys_[sibling->num_keys_ + k] = 736 | std::numeric_limits::max(); 737 | } 738 | } 739 | } else { 740 | // brings one key from parent 741 | node->keys_.push_back(std::move(parent->keys_[node->index_])); 742 | // brings n - 1 keys from sibling 743 | std::ranges::move(sibling->keys_ | std::views::take(n - 1), 744 | std::back_inserter(node->keys_)); 745 | // parent brings one key from sibling 746 | parent->keys_[node->index_] = std::move(sibling->keys_[n - 1]); 747 | std::shift_left(sibling->keys_.begin(), sibling->keys_.end(), n); 748 | sibling->keys_.resize(sibling->nkeys() - n); 749 | } 750 | 751 | node->size_ += n; 752 | sibling->size_ -= n; 753 | 754 | if (!node->is_leaf()) { 755 | // brings n children from sibling 756 | attr_t orphan_size = 0; 757 | attr_t immigrant_index = static_cast(std::ssize(node->children_)); 758 | for (auto &&immigrant : sibling->children_ | std::views::take(n)) { 759 | immigrant->parent_ = node; 760 | immigrant->index_ = immigrant_index++; 761 | orphan_size += immigrant->size_; 762 | } 763 | node->size_ += orphan_size; 764 | sibling->size_ -= orphan_size; 765 | 766 | std::ranges::move(sibling->children_ | std::views::take(n), 767 | std::back_inserter(node->children_)); 768 | std::shift_left(sibling->children_.begin(), sibling->children_.end(), n); 769 | for (attr_t idx = 0; idx < n; ++idx) { 770 | sibling->children_.pop_back(); 771 | } 772 | attr_t sibling_index = 0; 773 | for (auto &&child : sibling->children_) { 774 | child->index_ = sibling_index++; 775 | } 776 | } 777 | } 778 | 779 | // node brings a key from parent 780 | // parent brings a key from left sibling 781 | // node brings a child from left sibling 782 | void right_rotate(Node *node) { 783 | auto parent = node->parent_; 784 | assert(node && parent && parent->children_[node->index_].get() == node && 785 | node->index_ - 1 >= 0 && 786 | parent->children_[node->index_ - 1]->can_take_key()); 787 | auto sibling = parent->children_[node->index_ - 1].get(); 788 | 789 | if constexpr (is_disk_) { 790 | std::memmove(node->keys_.data() + 1, node->keys_.data(), 791 | node->num_keys_ * sizeof(V)); 792 | node->num_keys_++; 793 | node->keys_[0] = parent->keys_[node->index_ - 1]; 794 | parent->keys_[node->index_ - 1] = sibling->keys_[sibling->num_keys_ - 1]; 795 | sibling->num_keys_--; 796 | if constexpr (use_simd_) { 797 | sibling->keys_[sibling->num_keys_] = std::numeric_limits::max(); 798 | } 799 | } else { 800 | node->keys_.insert(node->keys_.begin(), 801 | std::move(parent->keys_[node->index_ - 1])); 802 | parent->keys_[node->index_ - 1] = std::move(sibling->keys_.back()); 803 | sibling->keys_.pop_back(); 804 | } 805 | 806 | node->size_++; 807 | sibling->size_--; 808 | 809 | if (!node->is_leaf()) { 810 | const auto orphan_size = sibling->children_.back()->size_; 811 | node->size_ += orphan_size; 812 | sibling->size_ -= orphan_size; 813 | 814 | sibling->children_.back()->parent_ = node; 815 | sibling->children_.back()->index_ = 0; 816 | 817 | node->children_.insert(node->children_.begin(), 818 | std::move(sibling->children_.back())); 819 | sibling->children_.pop_back(); 820 | for (auto &&child : node->children_ | std::views::drop(1)) { 821 | child->index_++; 822 | } 823 | } 824 | } 825 | 826 | // right_rotate() * n 827 | void right_rotate_n(Node *node, attr_t n) { 828 | assert(n >= 1); 829 | if (n == 1) { 830 | right_rotate(node); 831 | return; 832 | } 833 | 834 | auto parent = node->parent_; 835 | assert(node && parent && parent->children_[node->index_].get() == node && 836 | node->index_ - 1 >= 0); 837 | auto sibling = parent->children_[node->index_ - 1].get(); 838 | assert(sibling->nkeys() >= (Fanout - 1) + n); 839 | 840 | if constexpr (is_disk_) { 841 | std::memcpy(node->keys_.data() + node->num_keys_, 842 | sibling->keys_.data() + (sibling->num_keys_ - n), 843 | (n - 1) * sizeof(V)); 844 | node->num_keys_ += (n - 1); 845 | node->keys_[node->num_keys_] = parent->keys_[node->index_ - 1]; 846 | node->num_keys_++; 847 | parent->keys_[node->index_ - 1] = sibling->keys_[sibling->num_keys_ - 1]; 848 | 849 | std::rotate( 850 | std::make_reverse_iterator(node->keys_.begin() + node->num_keys_), 851 | std::make_reverse_iterator(node->keys_.begin() + node->num_keys_ - n), 852 | node->keys_.rend()); 853 | sibling->num_keys_ -= n; 854 | if constexpr (use_simd_) { 855 | sibling->keys_[sibling->num_keys_] = std::numeric_limits::max(); 856 | } 857 | } else { 858 | // brings n - 1 keys from sibling 859 | std::ranges::move(sibling->keys_ | 860 | std::views::drop(sibling->nkeys() - n) | 861 | std::views::take(n - 1), 862 | std::back_inserter(node->keys_)); 863 | // brings one key from parent 864 | node->keys_.push_back(std::move(parent->keys_[node->index_ - 1])); 865 | // parent brings one key from sibling 866 | parent->keys_[node->index_ - 1] = std::move(sibling->keys_.back()); 867 | // right rotate n 868 | std::ranges::rotate(node->keys_ | std::views::reverse, 869 | node->keys_.rbegin() + n); 870 | sibling->keys_.resize(sibling->nkeys() - n); 871 | } 872 | 873 | node->size_ += n; 874 | sibling->size_ -= n; 875 | 876 | if (!node->is_leaf()) { 877 | // brings n children from sibling 878 | attr_t orphan_size = 0; 879 | attr_t immigrant_index = 0; 880 | for (auto &&immigrant : 881 | sibling->children_ | 882 | std::views::drop(std::ssize(sibling->children_) - n)) { 883 | immigrant->parent_ = node; 884 | immigrant->index_ = immigrant_index++; 885 | orphan_size += immigrant->size_; 886 | } 887 | node->size_ += orphan_size; 888 | sibling->size_ -= orphan_size; 889 | 890 | std::ranges::move( 891 | sibling->children_ | 892 | std::views::drop(std::ssize(sibling->children_) - n), 893 | std::back_inserter(node->children_)); 894 | std::ranges::rotate(node->children_ | std::views::reverse, 895 | node->children_.rbegin() + n); 896 | for (attr_t idx = 0; idx < n; ++idx) { 897 | sibling->children_.pop_back(); 898 | } 899 | attr_t child_index = n; 900 | for (auto &&child : node->children_ | std::views::drop(n)) { 901 | child->index_ = child_index++; 902 | } 903 | } 904 | } 905 | 906 | auto get_lb(const K &key, const Node *x) const noexcept { 907 | if constexpr (use_simd_) { 908 | return get_lb_simd(key, x->keys_.data(), 909 | x->keys_.data() + 2 * Fanout); 910 | } else if constexpr (use_linsearch_) { 911 | auto lbcomp = [&key](const K &other) { return Comp{}(other, key); }; 912 | return std::distance( 913 | x->keys_.begin(), 914 | std::ranges::find_if_not( 915 | x->keys_.begin(), x->keys_.begin() + x->nkeys(), lbcomp, Proj{})); 916 | } else { 917 | return std::distance(x->keys_.begin(), 918 | std::ranges::lower_bound( 919 | x->keys_.begin(), x->keys_.begin() + x->nkeys(), 920 | key, Comp{}, Proj{})); 921 | } 922 | } 923 | 924 | auto get_ub(const K &key, const Node *x) const noexcept { 925 | if constexpr (use_simd_) { 926 | return get_ub_simd(key, x->keys_.data(), 927 | x->keys_.data() + 2 * Fanout); 928 | } else if constexpr (use_linsearch_) { 929 | auto ubcomp = [&key](const K &other) { return Comp{}(key, other); }; 930 | return std::distance(x->keys_.begin(), 931 | std::ranges::find_if(x->keys_.begin(), 932 | x->keys_.begin() + x->nkeys(), 933 | ubcomp, Proj{})); 934 | } else { 935 | return std::distance(x->keys_.begin(), 936 | std::ranges::upper_bound( 937 | x->keys_.begin(), x->keys_.begin() + x->nkeys(), 938 | key, Comp{}, Proj{})); 939 | } 940 | } 941 | 942 | const_iterator_type search(const K &key) const { 943 | auto x = root_.get(); 944 | while (x) { 945 | auto i = get_lb(key, x); 946 | if (i < x->nkeys() && key == Proj{}(x->keys_[i])) { // equal? key found 947 | return const_iterator_type(x, static_cast(i)); 948 | } else if (x->is_leaf()) { // no child, key is not in the tree 949 | return cend(); 950 | } else { // search on child between range 951 | x = x->children_[i].get(); 952 | } 953 | } 954 | return cend(); 955 | } 956 | 957 | nonconst_iterator_type find_lower_bound(const K &key, bool climb = true) { 958 | auto x = root_.get(); 959 | while (x) { 960 | auto i = get_lb(key, x); 961 | if (x->is_leaf()) { 962 | auto it = nonconst_iterator_type(x, static_cast(i)); 963 | if (climb) { 964 | it.climb(); 965 | } 966 | return it; 967 | } else { 968 | x = x->children_[i].get(); 969 | } 970 | } 971 | return nonconst_iterator_type(end()); 972 | } 973 | 974 | const_iterator_type find_lower_bound(const K &key, bool climb = true) const { 975 | auto x = root_.get(); 976 | while (x) { 977 | auto i = get_lb(key, x); 978 | if (x->is_leaf()) { 979 | auto it = const_iterator_type(x, static_cast(i)); 980 | if (climb) { 981 | it.climb(); 982 | } 983 | return it; 984 | } else { 985 | x = x->children_[i].get(); 986 | } 987 | } 988 | return cend(); 989 | } 990 | 991 | nonconst_iterator_type find_upper_bound(const K &key, bool climb = true) { 992 | auto x = root_.get(); 993 | while (x) { 994 | auto i = get_ub(key, x); 995 | if (x->is_leaf()) { 996 | auto it = nonconst_iterator_type(x, static_cast(i)); 997 | if (climb) { 998 | it.climb(); 999 | } 1000 | return it; 1001 | } else { 1002 | x = x->children_[i].get(); 1003 | } 1004 | } 1005 | return nonconst_iterator_type(end()); 1006 | } 1007 | 1008 | const_iterator_type find_upper_bound(const K &key, bool climb = true) const { 1009 | auto x = root_.get(); 1010 | while (x) { 1011 | auto i = get_ub(key, x); 1012 | if (x->is_leaf()) { 1013 | auto it = const_iterator_type(x, static_cast(i)); 1014 | if (climb) { 1015 | it.climb(); 1016 | } 1017 | return it; 1018 | } else { 1019 | x = x->children_[i].get(); 1020 | } 1021 | } 1022 | return cend(); 1023 | } 1024 | 1025 | // split child[i] to child[i], child[i + 1] 1026 | void split_child(Node *y) { 1027 | assert(y); 1028 | auto i = y->index_; 1029 | Node *x = y->parent_; 1030 | assert(x && y == x->children_[i].get() && y->is_full() && !x->is_full()); 1031 | 1032 | // split y's 2 * t keys 1033 | // y will have left t - 1 keys 1034 | // y->keys_[t - 1] will be a key of y->parent_ 1035 | // right t keys of y will be taken by y's right sibling 1036 | 1037 | auto z = make_node(); // will be y's right sibling 1038 | z->parent_ = x; 1039 | z->index_ = i + 1; 1040 | z->height_ = y->height_; 1041 | 1042 | // bring right t keys from y 1043 | if constexpr (is_disk_) { 1044 | std::memcpy(z->keys_.data(), y->keys_.data() + Fanout, 1045 | (y->num_keys_ - Fanout) * sizeof(V)); 1046 | z->num_keys_ = y->num_keys_ - Fanout; 1047 | } else { 1048 | std::ranges::move(y->keys_ | std::views::drop(Fanout), 1049 | std::back_inserter(z->keys_)); 1050 | } 1051 | auto z_size = z->nkeys(); 1052 | if (!y->is_leaf()) { 1053 | z->children_.reserve(2 * Fanout); 1054 | // bring right half children from y 1055 | std::ranges::move(y->children_ | std::views::drop(Fanout), 1056 | std::back_inserter(z->children_)); 1057 | for (auto &&child : z->children_) { 1058 | child->parent_ = z.get(); 1059 | child->index_ -= Fanout; 1060 | z_size += child->size_; 1061 | } 1062 | while (static_cast(std::ssize(y->children_)) > Fanout) { 1063 | y->children_.pop_back(); 1064 | } 1065 | } 1066 | z->size_ = z_size; 1067 | y->size_ -= (z_size + 1); 1068 | 1069 | x->children_.insert(x->children_.begin() + i + 1, std::move(z)); 1070 | for (auto &&child : x->children_ | std::views::drop(i + 2)) { 1071 | child->index_++; 1072 | } 1073 | 1074 | if constexpr (is_disk_) { 1075 | std::memmove(x->keys_.data() + i + 1, x->keys_.data() + i, 1076 | (x->num_keys_ - i) * sizeof(V)); 1077 | x->num_keys_++; 1078 | x->keys_[i] = y->keys_[Fanout - 1]; 1079 | y->num_keys_ = Fanout - 1; 1080 | if constexpr (use_simd_) { 1081 | for (attr_t k = Fanout - 1; k < 2 * Fanout; ++k) { 1082 | y->keys_[k] = std::numeric_limits::max(); 1083 | } 1084 | } 1085 | } else { 1086 | x->keys_.insert(x->keys_.begin() + i, std::move(y->keys_[Fanout - 1])); 1087 | y->keys_.resize(Fanout - 1); 1088 | } 1089 | } 1090 | 1091 | // merge child[i + 1] and key[i] into child[i] 1092 | void merge_child(Node *y) { 1093 | assert(y); 1094 | auto i = y->index_; 1095 | Node *x = y->parent_; 1096 | assert(x && y == x->children_[i].get() && !x->is_leaf() && i >= 0 && 1097 | i + 1 < std::ssize(x->children_)); 1098 | auto sibling = x->children_[i + 1].get(); 1099 | assert(y->nkeys() + sibling->nkeys() <= 2 * Fanout - 2); 1100 | 1101 | auto immigrated_size = sibling->nkeys(); 1102 | 1103 | if constexpr (is_disk_) { 1104 | y->keys_[y->num_keys_] = x->keys_[i]; 1105 | y->num_keys_++; 1106 | std::memcpy(y->keys_.data() + y->num_keys_, sibling->keys_.data(), 1107 | sibling->num_keys_ * sizeof(V)); 1108 | y->num_keys_ += sibling->num_keys_; 1109 | } else { 1110 | y->keys_.push_back(std::move(x->keys_[i])); 1111 | // bring keys of child[i + 1] 1112 | std::ranges::move(sibling->keys_, std::back_inserter(y->keys_)); 1113 | } 1114 | 1115 | // bring children of child[i + 1] 1116 | if (!y->is_leaf()) { 1117 | attr_t immigrant_index = static_cast(std::ssize(y->children_)); 1118 | for (auto &&child : sibling->children_) { 1119 | child->parent_ = y; 1120 | child->index_ = immigrant_index++; 1121 | immigrated_size += child->size_; 1122 | } 1123 | std::ranges::move(sibling->children_, std::back_inserter(y->children_)); 1124 | } 1125 | y->size_ += immigrated_size + 1; 1126 | 1127 | // shift children from i + 1 left by 1 (because child[i + 1] is merged) 1128 | std::shift_left(x->children_.begin() + i + 1, x->children_.end(), 1); 1129 | x->children_.pop_back(); 1130 | if constexpr (is_disk_) { 1131 | std::memmove(x->keys_.data() + i, x->keys_.data() + i + 1, 1132 | (x->num_keys_ - (i + 1)) * sizeof(V)); 1133 | x->num_keys_--; 1134 | if constexpr (use_simd_) { 1135 | x->keys_[x->num_keys_] = std::numeric_limits::max(); 1136 | } 1137 | } else { 1138 | // shift keys from i left by 1 (because key[i] is merged) 1139 | std::shift_left(x->keys_.begin() + i, x->keys_.end(), 1); 1140 | x->keys_.pop_back(); 1141 | } 1142 | 1143 | for (auto &&child : x->children_ | std::views::drop(i + 1)) { 1144 | child->index_--; 1145 | } 1146 | } 1147 | 1148 | // only used in join() when join() is called by split() 1149 | // preinvariant: x is the leftmost of the root (left side) 1150 | // or the rightmost (right side) 1151 | 1152 | // (left side) merge child[0], child[1] if necessary, and propagate to 1153 | // possibly the root for right side it's child[n - 2], child[n - 1] 1154 | void try_merge(Node *x, bool left_side) { 1155 | assert(x && !x->is_leaf()); 1156 | if (std::ssize(x->children_) < 2) { 1157 | return; 1158 | } 1159 | if (left_side) { 1160 | auto first = x->children_[0].get(); 1161 | auto second = x->children_[1].get(); 1162 | 1163 | if (first->nkeys() + second->nkeys() <= 2 * Fanout - 2) { 1164 | // just merge to one node 1165 | merge_child(first); 1166 | } else if (first->nkeys() < Fanout - 1) { 1167 | // first borrows key from second 1168 | auto deficit = (Fanout - 1 - first->nkeys()); 1169 | 1170 | // this is mathematically true, otherwise 1171 | // #(first.keys) + #(second.keys) < 2 * t - 2, so it should be merged 1172 | // before 1173 | assert(second->nkeys() > deficit + (Fanout - 1)); 1174 | left_rotate_n(first, deficit); 1175 | } 1176 | } else { 1177 | auto rfirst = x->children_.back().get(); 1178 | auto rsecond = x->children_[std::ssize(x->children_) - 2].get(); 1179 | 1180 | if (rfirst->nkeys() + rsecond->nkeys() <= 2 * Fanout - 2) { 1181 | // just merge to one node 1182 | merge_child(rsecond); 1183 | } else if (rfirst->nkeys() < Fanout - 1) { 1184 | // rfirst borrows key from rsecond 1185 | auto deficit = (Fanout - 1 - rfirst->nkeys()); 1186 | 1187 | assert(rsecond->nkeys() > deficit + (Fanout - 1)); 1188 | right_rotate_n(rfirst, deficit); 1189 | } else if (rsecond->nkeys() < Fanout - 1) { 1190 | // rsecond borrows key from rfirst 1191 | auto deficit = (Fanout - 1 - rsecond->nkeys()); 1192 | 1193 | assert(rfirst->nkeys() > deficit + (Fanout - 1)); 1194 | left_rotate_n(rsecond, deficit); 1195 | } 1196 | } 1197 | } 1198 | 1199 | template 1200 | iterator_type 1201 | insert_leaf(Node *node, attr_t i, 1202 | T &&value) requires std::is_same_v, V> { 1203 | assert(node && node->is_leaf() && !node->is_full()); 1204 | bool update_begin = (empty() || Comp{}(Proj{}(value), Proj{}(*begin_))); 1205 | 1206 | if constexpr (is_disk_) { 1207 | std::memmove(node->keys_.data() + i + 1, node->keys_.data() + i, 1208 | (node->num_keys_ - i) * sizeof(V)); 1209 | node->keys_[i] = std::forward(value); 1210 | node->num_keys_++; 1211 | } else { 1212 | node->keys_.insert(node->keys_.begin() + i, std::forward(value)); 1213 | } 1214 | iterator_type iter(node, i); 1215 | if (update_begin) { 1216 | assert(node == leftmost_leaf(root_.get()) && i == 0); 1217 | begin_ = iter; 1218 | } 1219 | 1220 | auto curr = node; 1221 | while (curr) { 1222 | curr->size_++; 1223 | curr = curr->parent_; 1224 | } 1225 | 1226 | assert(verify()); 1227 | return iter; 1228 | } 1229 | 1230 | template 1231 | iterator_type insert_ub(T &&key) requires( 1232 | AllowDup &&std::is_same_v, V>) { 1233 | auto x = root_.get(); 1234 | while (true) { 1235 | auto i = get_ub(Proj{}(key), x); 1236 | if (x->is_leaf()) { 1237 | return insert_leaf(x, static_cast(i), std::forward(key)); 1238 | } else { 1239 | if (x->children_[i]->is_full()) { 1240 | split_child(x->children_[i].get()); 1241 | if (Comp{}(Proj{}(x->keys_[i]), Proj{}(key))) { 1242 | ++i; 1243 | } 1244 | } 1245 | x = x->children_[i].get(); 1246 | } 1247 | } 1248 | } 1249 | 1250 | template 1251 | std::pair 1252 | insert_lb(T &&key) requires(!AllowDup && 1253 | std::is_same_v, V>) { 1254 | auto x = root_.get(); 1255 | while (true) { 1256 | auto i = get_lb(Proj{}(key), x); 1257 | if (i < x->nkeys() && Proj{}(key) == Proj{}(x->keys_[i])) { 1258 | return {iterator_type(x, static_cast(i)), false}; 1259 | } else if (x->is_leaf()) { 1260 | return {insert_leaf(x, static_cast(i), std::forward(key)), 1261 | true}; 1262 | } else { 1263 | if (x->children_[i]->is_full()) { 1264 | split_child(x->children_[i].get()); 1265 | if (Proj{}(key) == Proj{}(x->keys_[i])) { 1266 | return {iterator_type(x, static_cast(i)), false}; 1267 | } else if (Comp{}(Proj{}(x->keys_[i]), Proj{}(key))) { 1268 | ++i; 1269 | } 1270 | } 1271 | x = x->children_[i].get(); 1272 | } 1273 | } 1274 | } 1275 | 1276 | iterator_type erase_leaf(Node *node, attr_t i) { 1277 | assert(node && i >= 0 && i < node->nkeys() && node->is_leaf() && 1278 | !node->has_minimal_keys()); 1279 | bool update_begin = (begin_ == const_iterator_type(node, i)); 1280 | if constexpr (is_disk_) { 1281 | std::memmove(node->keys_.data() + i, node->keys_.data() + i + 1, 1282 | (node->num_keys_ - (i + 1)) * sizeof(V)); 1283 | node->num_keys_--; 1284 | if constexpr (use_simd_) { 1285 | node->keys_[node->num_keys_] = std::numeric_limits::max(); 1286 | } 1287 | } else { 1288 | std::shift_left(node->keys_.begin() + i, node->keys_.end(), 1); 1289 | node->keys_.pop_back(); 1290 | } 1291 | iterator_type iter(node, i); 1292 | iter.climb(); 1293 | if (update_begin) { 1294 | begin_ = iter; 1295 | } 1296 | auto curr = node; 1297 | while (curr) { 1298 | curr->size_--; 1299 | curr = curr->parent_; 1300 | } 1301 | assert(verify()); 1302 | return iter; 1303 | } 1304 | 1305 | size_t erase_lb(Node *x, const K &key) requires(!AllowDup) { 1306 | while (true) { 1307 | auto i = get_lb(key, x); 1308 | if (i < x->nkeys() && key == Proj{}(x->keys_[i])) { 1309 | // key found 1310 | assert(x->is_leaf() || i + 1 < std::ssize(x->children_)); 1311 | if (x->is_leaf()) { 1312 | erase_leaf(x, static_cast(i)); 1313 | return 1; 1314 | } else if (x->children_[i]->can_take_key()) { 1315 | // swap key with pred 1316 | nonconst_iterator_type iter(x, static_cast(i)); 1317 | auto pred = std::prev(iter); 1318 | assert(pred.node_ == rightmost_leaf(x->children_[i].get())); 1319 | std::iter_swap(pred, iter); 1320 | // search pred 1321 | x = x->children_[i].get(); 1322 | } else if (x->children_[i + 1]->can_take_key()) { 1323 | // swap key with succ 1324 | nonconst_iterator_type iter(x, static_cast(i)); 1325 | auto succ = std::next(iter); 1326 | assert(succ.node_ == leftmost_leaf(x->children_[i + 1].get())); 1327 | std::iter_swap(succ, iter); 1328 | // search succ 1329 | x = x->children_[i + 1].get(); 1330 | } else { 1331 | auto next = x->children_[i].get(); 1332 | merge_child(next); 1333 | promote_root_if_necessary(); 1334 | x = next; 1335 | } 1336 | } else if (x->is_leaf()) { 1337 | // no child, key is not in the tree 1338 | return 0; 1339 | } else { 1340 | auto next = x->children_[i].get(); 1341 | if (x->children_[i]->has_minimal_keys()) { 1342 | if (i + 1 < std::ssize(x->children_) && 1343 | x->children_[i + 1]->can_take_key()) { 1344 | left_rotate(next); 1345 | } else if (i - 1 >= 0 && x->children_[i - 1]->can_take_key()) { 1346 | right_rotate(next); 1347 | } else if (i + 1 < std::ssize(x->children_)) { 1348 | merge_child(next); 1349 | promote_root_if_necessary(); 1350 | } else if (i - 1 >= 0) { 1351 | next = x->children_[i - 1].get(); 1352 | merge_child(next); 1353 | promote_root_if_necessary(); 1354 | } 1355 | } 1356 | x = next; 1357 | } 1358 | } 1359 | } 1360 | 1361 | iterator_type erase_hint([[maybe_unused]] const V &value, 1362 | std::vector &hints) { 1363 | auto x = root_.get(); 1364 | while (true) { 1365 | auto i = hints.back(); 1366 | hints.pop_back(); 1367 | if (hints.empty()) { 1368 | // key found 1369 | assert(i < x->nkeys() && value == x->keys_[i]); 1370 | assert(x->is_leaf() || i + 1 < std::ssize(x->children_)); 1371 | if (x->is_leaf()) { 1372 | return erase_leaf(x, i); 1373 | } else if (x->children_[i]->can_take_key()) { 1374 | // swap key with pred 1375 | nonconst_iterator_type iter(x, i); 1376 | auto pred = std::prev(iter); 1377 | assert(pred.node_ == rightmost_leaf(x->children_[i].get())); 1378 | std::iter_swap(pred, iter); 1379 | // search pred 1380 | x = x->children_[i].get(); 1381 | auto curr = x; 1382 | assert(curr->index_ == i); 1383 | while (!curr->is_leaf()) { 1384 | hints.push_back(static_cast(std::ssize(curr->children_)) - 1385 | 1); 1386 | curr = curr->children_.back().get(); 1387 | } 1388 | hints.push_back(curr->nkeys() - 1); 1389 | std::ranges::reverse(hints); 1390 | } else if (x->children_[i + 1]->can_take_key()) { 1391 | // swap key with succ 1392 | nonconst_iterator_type iter(x, i); 1393 | auto succ = std::next(iter); 1394 | assert(succ.node_ == leftmost_leaf(x->children_[i + 1].get())); 1395 | std::iter_swap(succ, iter); 1396 | // search succ 1397 | x = x->children_[i + 1].get(); 1398 | auto curr = x; 1399 | assert(curr->index_ == i + 1); 1400 | while (!curr->is_leaf()) { 1401 | hints.push_back(0); 1402 | curr = curr->children_.front().get(); 1403 | } 1404 | hints.push_back(0); 1405 | } else { 1406 | auto next = x->children_[i].get(); 1407 | merge_child(next); 1408 | promote_root_if_necessary(); 1409 | x = next; 1410 | // i'th key of x is now t - 1'th key of x->children_[i] 1411 | hints.push_back(Fanout - 1); 1412 | } 1413 | } else { 1414 | assert(!hints.empty()); 1415 | auto next = x->children_[i].get(); 1416 | if (x->children_[i]->has_minimal_keys()) { 1417 | if (i + 1 < std::ssize(x->children_) && 1418 | x->children_[i + 1]->can_take_key()) { 1419 | left_rotate(x->children_[i].get()); 1420 | } else if (i - 1 >= 0 && x->children_[i - 1]->can_take_key()) { 1421 | right_rotate(x->children_[i].get()); 1422 | // x->children_[i] stuffs are shifted right by 1 1423 | hints.back() += 1; 1424 | } else if (i + 1 < std::ssize(x->children_)) { 1425 | merge_child(next); 1426 | promote_root_if_necessary(); 1427 | } else if (i - 1 >= 0) { 1428 | next = x->children_[i - 1].get(); 1429 | merge_child(next); 1430 | promote_root_if_necessary(); 1431 | // x->children_[i] stuffs are shifted right by t 1432 | hints.back() += Fanout; 1433 | } 1434 | } 1435 | x = next; 1436 | } 1437 | } 1438 | } 1439 | 1440 | private: 1441 | static constexpr attr_t bulk_erase_threshold = 30; 1442 | 1443 | protected: 1444 | size_type erase_range(const_iterator_type first, const_iterator_type last) { 1445 | if (first == cend()) { 1446 | return 0; 1447 | } 1448 | if (first == begin_ && last == cend()) { 1449 | auto cnt = size(); 1450 | clear(); 1451 | return cnt; 1452 | } 1453 | 1454 | attr_t first_order = get_order(first); 1455 | attr_t last_order = (last == cend()) ? root_->size_ : get_order(last); 1456 | 1457 | attr_t cnt = last_order - first_order; 1458 | if (cnt < bulk_erase_threshold) { 1459 | first.climb(); 1460 | for (attr_t i = 0; i < cnt; ++i) { 1461 | first = erase(first); 1462 | } 1463 | return cnt; 1464 | } 1465 | 1466 | K first_key = Proj{}(*first); 1467 | 1468 | auto [tree1, tree2] = split_to_two_trees(first, last); 1469 | auto final_tree = join(std::move(tree1), first_key, std::move(tree2)); 1470 | final_tree.erase(final_tree.lower_bound(first_key)); 1471 | 1472 | this->swap(final_tree); 1473 | return cnt; 1474 | } 1475 | 1476 | V get_kth(attr_t idx) const { 1477 | auto x = root_.get(); 1478 | while (x) { 1479 | if (x->is_leaf()) { 1480 | assert(idx >= 0 && idx < x->nkeys()); 1481 | return x->keys_[idx]; 1482 | } else { 1483 | assert(!x->children_.empty()); 1484 | attr_t i = 0; 1485 | const auto n = x->nkeys(); 1486 | Node *next = nullptr; 1487 | for (; i < n; ++i) { 1488 | auto child_sz = x->children_[i]->size_; 1489 | if (idx < child_sz) { 1490 | next = x->children_[i].get(); 1491 | break; 1492 | } else if (idx == child_sz) { 1493 | return x->keys_[i]; 1494 | } else { 1495 | idx -= child_sz + 1; 1496 | } 1497 | } 1498 | if (i == n) { 1499 | next = x->children_[n].get(); 1500 | } 1501 | x = next; 1502 | } 1503 | } 1504 | throw std::runtime_error("unreachable"); 1505 | } 1506 | 1507 | attr_t get_order(const_iterator_type iter) const { 1508 | auto [node, idx] = iter; 1509 | attr_t order = 0; 1510 | assert(node); 1511 | if (!node->is_leaf()) { 1512 | for (attr_t i = 0; i <= idx; ++i) { 1513 | order += node->children_[i]->size_; 1514 | } 1515 | } 1516 | order += idx; 1517 | while (node->parent_) { 1518 | for (attr_t i = 0; i < node->index_; ++i) { 1519 | order += node->parent_->children_[i]->size_; 1520 | } 1521 | order += node->index_; 1522 | node = node->parent_; 1523 | } 1524 | return order; 1525 | } 1526 | 1527 | public: 1528 | iterator_type find(const K &key) { return iterator_type(search(key)); } 1529 | 1530 | const_iterator_type find(const K &key) const { return search(key); } 1531 | 1532 | bool contains(const K &key) const { return search(key) != cend(); } 1533 | 1534 | iterator_type lower_bound(const K &key) { 1535 | return iterator_type(find_lower_bound(key)); 1536 | } 1537 | 1538 | const_iterator_type lower_bound(const K &key) const { 1539 | return const_iterator_type(find_lower_bound(key)); 1540 | } 1541 | 1542 | iterator_type upper_bound(const K &key) { 1543 | return iterator_type(find_upper_bound(key)); 1544 | } 1545 | 1546 | const_iterator_type upper_bound(const K &key) const { 1547 | return const_iterator_type(find_upper_bound(key)); 1548 | } 1549 | 1550 | std::ranges::subrange equal_range(const K &key) { 1551 | return {iterator_type(find_lower_bound(key)), 1552 | iterator_type(find_upper_bound(key))}; 1553 | } 1554 | 1555 | std::ranges::subrange equal_range(const K &key) const { 1556 | return {const_iterator_type(find_lower_bound(key)), 1557 | const_iterator_type(find_upper_bound(key))}; 1558 | } 1559 | 1560 | std::ranges::subrange enumerate(const K &a, 1561 | const K &b) const { 1562 | if (Comp{}(b, a)) { 1563 | throw std::invalid_argument("b < a in enumerate()"); 1564 | } 1565 | return {const_iterator_type(find_lower_bound(a)), 1566 | const_iterator_type(find_upper_bound(b))}; 1567 | } 1568 | 1569 | V kth(attr_t idx) const { 1570 | if (idx >= root_->size_) { 1571 | throw std::invalid_argument("in kth() k >= size()"); 1572 | } 1573 | return get_kth(idx); 1574 | } 1575 | 1576 | attr_t order(const_iterator_type iter) const { 1577 | if (iter == cend()) { 1578 | throw std::invalid_argument("attempt to get order in end()"); 1579 | } 1580 | return get_order(iter); 1581 | } 1582 | 1583 | attr_t count(const K &key) const requires(AllowDup) { 1584 | auto first = find_lower_bound(key); 1585 | auto last = find_upper_bound(key); 1586 | attr_t first_order = get_order(first); 1587 | attr_t last_order = (last == cend()) ? root_->size_ : get_order(last); 1588 | return last_order - first_order; 1589 | } 1590 | 1591 | protected: 1592 | template 1593 | std::conditional_t> 1594 | insert_value(T &&key) requires(std::is_same_v, V>) { 1595 | if (root_->is_full()) { 1596 | // if root is full then make it as a child of the new root 1597 | auto new_root = make_node(); 1598 | root_->parent_ = new_root.get(); 1599 | new_root->size_ = root_->size_; 1600 | new_root->height_ = root_->height_ + 1; 1601 | new_root->children_.reserve(Fanout * 2); 1602 | new_root->children_.push_back(std::move(root_)); 1603 | root_ = std::move(new_root); 1604 | // and split 1605 | split_child(root_->children_[0].get()); 1606 | } 1607 | if constexpr (AllowDup) { 1608 | return insert_ub(std::forward(key)); 1609 | } else { 1610 | return insert_lb(std::forward(key)); 1611 | } 1612 | } 1613 | 1614 | std::vector get_path_from_root(const_iterator_type iter) const { 1615 | auto node = iter.node_; 1616 | std::vector hints; 1617 | hints.push_back(iter.index_); 1618 | while (node && node->parent_) { 1619 | hints.push_back(node->index_); 1620 | node = node->parent_; 1621 | } 1622 | return hints; 1623 | } 1624 | 1625 | public: 1626 | std::conditional_t> 1627 | insert(const V &key) { 1628 | return insert_value(key); 1629 | } 1630 | 1631 | std::conditional_t> 1632 | insert(V &&key) { 1633 | return insert_value(std::move(key)); 1634 | } 1635 | 1636 | template 1637 | std::conditional_t> 1638 | emplace(Args &&...args) requires std::is_constructible_v { 1639 | V val{std::forward(args)...}; 1640 | return insert_value(std::move(val)); 1641 | } 1642 | 1643 | template 1644 | auto &operator[](T &&raw_key) requires(!is_set_ && !AllowDup) { 1645 | if (root_->is_full()) { 1646 | // if root is full then make it as a child of the new root 1647 | auto new_root = make_node(); 1648 | 1649 | root_->parent_ = new_root.get(); 1650 | new_root->size_ = root_->size_; 1651 | new_root->height_ = root_->height_ + 1; 1652 | new_root->children_.reserve(Fanout * 2); 1653 | new_root->children_.push_back(std::move(root_)); 1654 | root_ = std::move(new_root); 1655 | // and split 1656 | split_child(root_->children_[0].get()); 1657 | } 1658 | 1659 | K key{std::forward(raw_key)}; 1660 | auto x = root_.get(); 1661 | while (true) { 1662 | auto i = get_lb(key, x); 1663 | if (i < x->nkeys() && key == Proj{}(x->keys_[i])) { 1664 | return iterator_type(x, static_cast(i))->second; 1665 | } else if (x->is_leaf()) { 1666 | V val{std::move(key), {}}; 1667 | return insert_leaf(x, static_cast(i), std::move(val))->second; 1668 | } else { 1669 | if (x->children_[i]->is_full()) { 1670 | split_child(x->children_[i].get()); 1671 | if (key == Proj{}(x->keys_[i])) { 1672 | return iterator_type(x, static_cast(i))->second; 1673 | } else if (Comp{}(Proj{}(x->keys_[i]), key)) { 1674 | ++i; 1675 | } 1676 | } 1677 | x = x->children_[i].get(); 1678 | } 1679 | } 1680 | } 1681 | 1682 | template 1683 | requires std::is_constructible_v> 1684 | size_type insert_range(Iter first, Iter last) { 1685 | auto [min_elem, max_elem] = 1686 | std::ranges::minmax_element(first, last, Comp{}, Proj{}); 1687 | auto lb = find_lower_bound(*min_elem); 1688 | auto ub = find_upper_bound(*max_elem); 1689 | if (lb != ub) { 1690 | size_type sz = 0; 1691 | for (; first != last; ++first) { 1692 | if constexpr (AllowDup) { 1693 | insert(*first); 1694 | sz++; 1695 | } else { 1696 | auto [_, inserted] = insert(*first); 1697 | if (inserted) { 1698 | sz++; 1699 | } 1700 | } 1701 | } 1702 | return sz; 1703 | } else { 1704 | BTreeBase tree_mid{alloc_}; 1705 | for (; first != last; ++first) { 1706 | tree_mid.insert(*first); 1707 | } 1708 | auto sz = tree_mid.size(); 1709 | auto [tree_left, tree_right] = split_to_two_trees(lb, ub); 1710 | auto tree_leftmid = join(std::move(tree_left), std::move(tree_mid)); 1711 | auto final_tree = join(std::move(tree_leftmid), std::move(tree_right)); 1712 | this->swap(final_tree); 1713 | return sz; 1714 | } 1715 | } 1716 | 1717 | template 1718 | requires std::is_constructible_v> 1719 | size_type insert_range(R &&r) { 1720 | return insert_range(r.begin(), r.end()); 1721 | } 1722 | 1723 | const_iterator_type erase(const_iterator_type iter) { 1724 | if (iter == cend()) { 1725 | throw std::invalid_argument("attempt to erase cend()"); 1726 | } 1727 | std::vector hints = get_path_from_root(iter); 1728 | V value(std::move(*iter)); 1729 | return erase_hint(value, hints); 1730 | } 1731 | 1732 | size_type erase(const K &key) { 1733 | if constexpr (AllowDup) { 1734 | return erase_range(const_iterator_type(find_lower_bound(key, false)), 1735 | const_iterator_type(find_upper_bound(key, false))); 1736 | } else { 1737 | return erase_lb(root_.get(), key); 1738 | } 1739 | } 1740 | 1741 | size_type erase_range(const K &a, const K &b) { 1742 | if (Comp{}(b, a)) { 1743 | throw std::invalid_argument("b < a in erase_range()"); 1744 | } 1745 | return erase_range(const_iterator_type(find_lower_bound(a)), 1746 | const_iterator_type(find_upper_bound(b))); 1747 | } 1748 | 1749 | template size_type erase_if(Pred pred) { 1750 | auto old_size = size(); 1751 | auto it = begin_; 1752 | for (; it != end();) { 1753 | if (pred(*it)) { 1754 | it = erase(it); 1755 | } else { 1756 | ++it; 1757 | } 1758 | } 1759 | return old_size - size(); 1760 | } 1761 | 1762 | private: 1763 | // serialization and deserialization 1764 | static constexpr std::uint64_t begin_code = 0x6567696e; // 'begin' 1765 | static constexpr std::uint64_t end_code = 0x656e64; // 'end' 1766 | 1767 | // for tree, we write a root height 1768 | 1769 | // for each node, we only read/write two information: 1770 | // 1. number of keys (attr_t, int32) 1771 | // 2. byte stream for key data (sizeof(V) * nkeys()) 1772 | 1773 | // all other information can be inferred during tree traversal 1774 | 1775 | // number of max bytes for serializing/deserializing a single node 1776 | static constexpr std::size_t keydata_size = sizeof(V) * disk_max_nkeys; 1777 | 1778 | // maximum possible height for B-Tree 1779 | // if height exceeds this value, this means that serialization/deserialization 1780 | // size will exceed 16TB, much more likely a user mistake or a malicious 1781 | // attack 1782 | static constexpr std::size_t max_possible_height = 1783 | (44UL - std::bit_width(static_cast(2 * Fanout))) / 1784 | std::bit_width(keydata_size); 1785 | 1786 | public: 1787 | friend std::istream &operator>>(std::istream &is, 1788 | BTreeBase &tree) requires(is_disk_) { 1789 | std::uint64_t tree_code = 0; 1790 | if (!is.read(reinterpret_cast(&tree_code), sizeof(std::uint64_t))) { 1791 | std::cerr << "Tree deserialization: begin code parse error\n"; 1792 | return is; 1793 | } 1794 | if (tree_code != begin_code) { 1795 | std::cerr << "Tree deserialization: begin code is invalid\n"; 1796 | is.clear(std::ios_base::failbit); 1797 | return is; 1798 | } 1799 | 1800 | attr_t tree_height = 0; 1801 | if (!is.read(reinterpret_cast(&tree_height), sizeof(attr_t))) { 1802 | std::cerr << "Tree deserialization: tree height parse error\n"; 1803 | return is; 1804 | } 1805 | if (static_cast(tree_height) > max_possible_height) { 1806 | std::cerr << "Tree deserialization: height is invalid\n"; 1807 | is.clear(std::ios_base::failbit); 1808 | return is; 1809 | } 1810 | 1811 | auto node = tree.root_.get(); 1812 | assert(node); 1813 | 1814 | if (!tree.deserialize_node(is, node, 0, tree_height)) { 1815 | return is; 1816 | } 1817 | if (!is.read(reinterpret_cast(&tree_code), sizeof(std::uint64_t))) { 1818 | std::cerr << "Tree deserialization: end code parse error\n"; 1819 | tree.clear(); 1820 | return is; 1821 | } 1822 | if (tree_code != end_code) { 1823 | std::cerr << "Tree deserialization: end code is invalid\n"; 1824 | tree.clear(); 1825 | is.clear(std::ios_base::failbit); 1826 | return is; 1827 | } 1828 | tree.set_begin(); 1829 | assert(tree.verify()); 1830 | return is; 1831 | } 1832 | 1833 | protected: 1834 | // preorder DFS traversal 1835 | bool deserialize_node(std::istream &is, Node *node, attr_t node_index, 1836 | attr_t node_height) requires(is_disk_) { 1837 | assert(node); 1838 | node->index_ = node_index; 1839 | node->height_ = node_height; 1840 | if (!is.read(reinterpret_cast(&node->num_keys_), sizeof(attr_t))) { 1841 | std::cerr << "Tree deserialization: nkeys parse error\n"; 1842 | return false; 1843 | } 1844 | if (node->num_keys_ >= 2 * Fanout || 1845 | (node != root_.get() && node->num_keys_ < Fanout - 1) || 1846 | node->num_keys_ < 0) { 1847 | std::cerr << "Tree deserialization: nkeys is invalid\n"; 1848 | is.clear(std::ios_base::failbit); 1849 | return false; 1850 | } 1851 | if (!is.read(reinterpret_cast(node->keys_.data()), 1852 | static_cast(node->num_keys_) * sizeof(V))) { 1853 | std::cerr << "Tree deserialization: key data read error\n"; 1854 | return false; 1855 | } 1856 | node->size_ = node->num_keys_; 1857 | if (node_height > 0) { 1858 | for (attr_t i = 0; i <= node->num_keys_; ++i) { 1859 | node->children_.push_back(make_node()); 1860 | node->children_[i]->parent_ = node; 1861 | if (!deserialize_node(is, node->children_[i].get(), i, 1862 | node_height - 1)) { 1863 | return false; 1864 | } 1865 | } 1866 | } 1867 | if (node->parent_) { 1868 | node->parent_->size_ += node->size_; 1869 | } 1870 | return true; 1871 | } 1872 | 1873 | public: 1874 | friend std::ostream &operator<<(std::ostream &os, 1875 | const BTreeBase &tree) requires(is_disk_) { 1876 | std::uint64_t tree_code = begin_code; 1877 | if (!os.write(reinterpret_cast(&tree_code), 1878 | sizeof(std::uint64_t))) { 1879 | std::cerr << "Tree serialization: begin code write error\n"; 1880 | return os; 1881 | } 1882 | 1883 | attr_t tree_height = tree.height(); 1884 | if (!os.write(reinterpret_cast(&tree_height), sizeof(attr_t))) { 1885 | std::cerr << "Tree serialization: tree height write error\n"; 1886 | return os; 1887 | } 1888 | 1889 | auto node = tree.root_.get(); 1890 | assert(node); 1891 | 1892 | if (!tree.serialize_node(os, node)) { 1893 | return os; 1894 | } 1895 | tree_code = end_code; 1896 | if (!os.write(reinterpret_cast(&tree_code), 1897 | sizeof(std::uint64_t))) { 1898 | std::cerr << "Tree serialization: end code write error\n"; 1899 | return os; 1900 | } 1901 | return os; 1902 | } 1903 | 1904 | protected: 1905 | // preorder DFS traversal 1906 | bool serialize_node(std::ostream &os, const Node *node) const 1907 | requires(is_disk_) { 1908 | assert(node); 1909 | if (!os.write(reinterpret_cast(&node->num_keys_), 1910 | sizeof(attr_t))) { 1911 | std::cerr << "Tree serialization: nkeys write error\n"; 1912 | return false; 1913 | } 1914 | if (!os.write(reinterpret_cast(node->keys_.data()), 1915 | static_cast(node->num_keys_) * sizeof(V))) { 1916 | std::cerr << "Tree serialization: key data write error\n"; 1917 | return false; 1918 | } 1919 | if (node->height_ > 0) { 1920 | for (attr_t i = 0; i <= node->num_keys_; ++i) { 1921 | if (!serialize_node(os, node->children_[i].get())) { 1922 | return false; 1923 | } 1924 | } 1925 | } 1926 | return true; 1927 | } 1928 | 1929 | public: 1930 | template class AllocTemplate_> 1932 | friend struct join_helper; 1933 | 1934 | protected: 1935 | std::pair 1936 | split_to_two_trees(const_iterator_type iter_lb, const_iterator_type iter_ub) { 1937 | BTreeBase tree_left(alloc_); 1938 | BTreeBase tree_right(alloc_); 1939 | iter_lb.dig(); 1940 | 1941 | auto lindices = get_path_from_root(iter_lb); 1942 | auto xl = iter_lb.node_; 1943 | std::ranges::reverse(lindices); 1944 | auto rindices = get_path_from_root(iter_ub); 1945 | auto xr = iter_ub.node_; 1946 | std::ranges::reverse(rindices); 1947 | 1948 | while (!lindices.empty()) { 1949 | auto il = lindices.back(); 1950 | lindices.pop_back(); 1951 | 1952 | auto lroot = tree_left.root_.get(); 1953 | 1954 | if (xl->is_leaf()) { 1955 | assert(lroot->size_ == 0); 1956 | 1957 | if (il > 0) { 1958 | if constexpr (is_disk_) { 1959 | std::memcpy(lroot->keys_.data(), xl->keys_.data(), il * sizeof(V)); 1960 | lroot->num_keys_ += il; 1961 | } else { 1962 | // send left i keys to lroot 1963 | std::ranges::move(xl->keys_ | std::views::take(il), 1964 | std::back_inserter(lroot->keys_)); 1965 | } 1966 | lroot->size_ += il; 1967 | } 1968 | 1969 | xl = xl->parent_; 1970 | } else { 1971 | if (il > 0) { 1972 | BTreeBase supertree_left(alloc_); 1973 | auto slroot = supertree_left.root_.get(); 1974 | // sltree takes left i - 1 keys, i children 1975 | // middle key is key[i - 1] 1976 | 1977 | assert(slroot->size_ == 0); 1978 | 1979 | if constexpr (is_disk_) { 1980 | std::memcpy(slroot->keys_.data(), xl->keys_.data(), 1981 | (il - 1) * sizeof(V)); 1982 | slroot->num_keys_ += (il - 1); 1983 | } else { 1984 | std::ranges::move(xl->keys_ | std::views::take(il - 1), 1985 | std::back_inserter(slroot->keys_)); 1986 | } 1987 | slroot->size_ += (il - 1); 1988 | 1989 | slroot->children_.reserve(Fanout * 2); 1990 | 1991 | std::ranges::move(xl->children_ | std::views::take(il), 1992 | std::back_inserter(slroot->children_)); 1993 | slroot->height_ = slroot->children_[0]->height_ + 1; 1994 | for (auto &&sl_child : slroot->children_) { 1995 | sl_child->parent_ = slroot; 1996 | slroot->size_ += sl_child->size_; 1997 | } 1998 | 1999 | supertree_left.promote_root_if_necessary(); 2000 | supertree_left.set_begin(); 2001 | 2002 | BTreeBase new_tree_left = 2003 | join(std::move(supertree_left), std::move(xl->keys_[il - 1]), std::move(tree_left)); 2004 | tree_left = std::move(new_tree_left); 2005 | } 2006 | 2007 | xl = xl->parent_; 2008 | } 2009 | } 2010 | while (!rindices.empty()) { 2011 | auto ir = rindices.back(); 2012 | rindices.pop_back(); 2013 | 2014 | auto rroot = tree_right.root_.get(); 2015 | 2016 | if (xr->is_leaf()) { 2017 | assert(rroot->size_ == 0); 2018 | 2019 | if (ir < xr->nkeys()) { 2020 | auto immigrants = xr->nkeys() - ir; 2021 | if constexpr (is_disk_) { 2022 | std::memcpy(rroot->keys_.data(), xr->keys_.data() + ir, 2023 | immigrants * sizeof(V)); 2024 | rroot->num_keys_ += immigrants; 2025 | } else { 2026 | // send right n - (i + 1) keys to rroot 2027 | std::ranges::move(xr->keys_ | std::views::drop(ir), 2028 | std::back_inserter(rroot->keys_)); 2029 | } 2030 | rroot->size_ += immigrants; 2031 | } 2032 | 2033 | xr = xr->parent_; 2034 | } else { 2035 | 2036 | if (ir + 1 < std::ssize(xr->children_)) { 2037 | BTreeBase supertree_right(alloc_); 2038 | auto srroot = supertree_right.root_.get(); 2039 | // srtree takes right n - (i + 1) keys, n - (i + 1) children 2040 | // middle key is key[i] 2041 | 2042 | assert(srroot->size_ == 0); 2043 | 2044 | auto immigrants = xr->nkeys() - (ir + 1); 2045 | if constexpr (is_disk_) { 2046 | std::memcpy(srroot->keys_.data(), xr->keys_.data() + (ir + 1), 2047 | immigrants * sizeof(V)); 2048 | srroot->num_keys_ += immigrants; 2049 | } else { 2050 | std::ranges::move(xr->keys_ | std::views::drop(ir + 1), 2051 | std::back_inserter(srroot->keys_)); 2052 | } 2053 | srroot->size_ += immigrants; 2054 | 2055 | srroot->children_.reserve(Fanout * 2); 2056 | 2057 | std::ranges::move(xr->children_ | std::views::drop(ir + 1), 2058 | std::back_inserter(srroot->children_)); 2059 | srroot->height_ = srroot->children_[0]->height_ + 1; 2060 | attr_t sr_index = 0; 2061 | for (auto &&sr_child : srroot->children_) { 2062 | sr_child->parent_ = srroot; 2063 | sr_child->index_ = sr_index++; 2064 | srroot->size_ += sr_child->size_; 2065 | } 2066 | 2067 | supertree_right.promote_root_if_necessary(); 2068 | supertree_right.set_begin(); 2069 | 2070 | BTreeBase new_tree_right = 2071 | join(std::move(tree_right), std::move(xr->keys_[ir]), 2072 | std::move(supertree_right)); 2073 | tree_right = std::move(new_tree_right); 2074 | } 2075 | 2076 | xr = xr->parent_; 2077 | } 2078 | } 2079 | assert(!xl && !xr && lindices.empty() && rindices.empty()); 2080 | assert(tree_left.verify()); 2081 | assert(tree_right.verify()); 2082 | clear(); 2083 | return {std::move(tree_left), std::move(tree_right)}; 2084 | } 2085 | 2086 | public: 2087 | template class AllocTemplate_, typename T> 2089 | friend struct split_helper; 2090 | }; 2091 | 2092 | template class AllocTemplate> 2094 | struct join_helper { 2095 | private: 2096 | BTreeBase result_; 2097 | 2098 | using Tree = BTreeBase; 2099 | using Node = typename Tree::node_type; 2100 | using Proj = typename Tree::Proj; 2101 | static constexpr bool is_disk_ = Tree::is_disk_; 2102 | 2103 | public: 2104 | join_helper(BTreeBase &&tree_left, 2105 | BTreeBase &&tree_right) { 2106 | if (tree_left.empty()) { 2107 | result_ = std::move(tree_right); 2108 | } else if (tree_right.empty()) { 2109 | result_ = std::move(tree_left); 2110 | } else { 2111 | auto it = tree_right.begin(); 2112 | V mid_value = *it; 2113 | tree_right.erase(it); 2114 | result_ = join(std::move(tree_left), std::move(mid_value), 2115 | std::move(tree_right)); 2116 | } 2117 | } 2118 | 2119 | template 2120 | requires std::is_constructible_v> 2121 | join_helper(BTreeBase &&tree_left, 2122 | T_ &&raw_value, 2123 | BTreeBase &&tree_right) { 2124 | 2125 | V mid_value{std::forward(raw_value)}; 2126 | if ((!tree_left.empty() && 2127 | Comp{}(Proj{}(mid_value), Proj{}(*tree_left.crbegin()))) || 2128 | (!tree_right.empty() && 2129 | Comp{}(Proj{}(*tree_right.cbegin()), Proj{}(mid_value)))) { 2130 | throw std::invalid_argument("Join() key order is invalid\n"); 2131 | } 2132 | if (tree_left.alloc_ != tree_right.alloc_) { 2133 | throw std::invalid_argument("Join() two allocators are different\n"); 2134 | } 2135 | 2136 | auto height_left = tree_left.root_->height_; 2137 | auto height_right = tree_right.root_->height_; 2138 | auto size_left = tree_left.root_->size_; 2139 | auto size_right = tree_right.root_->size_; 2140 | 2141 | if (height_left >= height_right) { 2142 | Tree new_tree = std::move(tree_left); 2143 | attr_t curr_height = height_left; 2144 | Node *curr = new_tree.root_.get(); 2145 | if (new_tree.root_->is_full()) { 2146 | // if root is full then make it as a child of the new root 2147 | auto new_root = new_tree.make_node(); 2148 | new_tree.root_->index_ = 0; 2149 | new_tree.root_->parent_ = new_root.get(); 2150 | new_root->size_ = new_tree.root_->size_; 2151 | new_root->height_ = new_tree.root_->height_ + 1; 2152 | new_root->children_.reserve(Fanout * 2); 2153 | new_root->children_.push_back(std::move(new_tree.root_)); 2154 | new_tree.root_ = std::move(new_root); 2155 | // and split 2156 | new_tree.split_child(new_tree.root_->children_[0].get()); 2157 | curr = new_tree.root_->children_[1].get(); 2158 | } 2159 | assert(curr->height_ == height_left); 2160 | 2161 | while (curr && curr_height > height_right) { 2162 | assert(!curr->is_leaf()); 2163 | curr_height--; 2164 | 2165 | if (curr->children_.back()->is_full()) { 2166 | new_tree.split_child(curr->children_.back().get()); 2167 | } 2168 | curr = curr->children_.back().get(); 2169 | } 2170 | assert(curr_height == height_right); 2171 | auto parent = curr->parent_; 2172 | if (!parent) { 2173 | // tree_left was empty or height of two trees were the same 2174 | auto new_root = tree_left.make_node(); 2175 | new_root->height_ = new_tree.root_->height_ + 1; 2176 | 2177 | if constexpr (is_disk_) { 2178 | new_root->keys_[new_root->num_keys_] = mid_value; 2179 | new_root->num_keys_++; 2180 | } else { 2181 | new_root->keys_.push_back(std::move(mid_value)); 2182 | } 2183 | 2184 | new_root->children_.reserve(Fanout * 2); 2185 | 2186 | new_tree.root_->parent_ = new_root.get(); 2187 | new_tree.root_->index_ = 0; 2188 | new_root->children_.push_back(std::move(new_tree.root_)); 2189 | 2190 | tree_right.root_->parent_ = new_root.get(); 2191 | tree_right.root_->index_ = 1; 2192 | new_root->children_.push_back(std::move(tree_right.root_)); 2193 | 2194 | new_tree.root_ = std::move(new_root); 2195 | new_tree.try_merge(new_tree.root_.get(), false); 2196 | new_tree.promote_root_if_necessary(); 2197 | new_tree.root_->size_ = size_left + size_right + 1; 2198 | } else { 2199 | if constexpr (is_disk_) { 2200 | parent->keys_[parent->num_keys_] = mid_value; 2201 | parent->num_keys_++; 2202 | } else { 2203 | parent->keys_.push_back(std::move(mid_value)); 2204 | } 2205 | 2206 | tree_right.root_->parent_ = parent; 2207 | tree_right.root_->index_ = 2208 | static_cast(std::ssize(parent->children_)); 2209 | parent->children_.push_back(std::move(tree_right.root_)); 2210 | 2211 | while (parent) { 2212 | parent->size_ += (size_right + 1); 2213 | new_tree.try_merge(parent, false); 2214 | parent = parent->parent_; 2215 | } 2216 | new_tree.promote_root_if_necessary(); 2217 | } 2218 | assert(new_tree.root_->size_ == size_left + size_right + 1); 2219 | assert(new_tree.verify()); 2220 | result_ = std::move(new_tree); 2221 | } else { 2222 | Tree new_tree = std::move(tree_right); 2223 | attr_t curr_height = height_right; 2224 | Node *curr = new_tree.root_.get(); 2225 | if (new_tree.root_->is_full()) { 2226 | // if root is full then make it as a child of the new root 2227 | auto new_root = new_tree.make_node(); 2228 | new_tree.root_->index_ = 0; 2229 | new_tree.root_->parent_ = new_root.get(); 2230 | new_root->size_ = new_tree.root_->size_; 2231 | new_root->height_ = new_tree.root_->height_ + 1; 2232 | new_root->children_.reserve(Fanout * 2); 2233 | new_root->children_.push_back(std::move(new_tree.root_)); 2234 | new_tree.root_ = std::move(new_root); 2235 | // and split 2236 | new_tree.split_child(new_tree.root_->children_[0].get()); 2237 | curr = new_tree.root_->children_[0].get(); 2238 | } 2239 | assert(curr->height_ == height_right); 2240 | 2241 | while (curr && curr_height > height_left) { 2242 | assert(!curr->is_leaf()); 2243 | curr_height--; 2244 | 2245 | if (curr->children_.front()->is_full()) { 2246 | new_tree.split_child(curr->children_[0].get()); 2247 | } 2248 | curr = curr->children_.front().get(); 2249 | } 2250 | assert(curr_height == height_left); 2251 | auto parent = curr->parent_; 2252 | assert(parent); 2253 | if constexpr (is_disk_) { 2254 | std::memmove(parent->keys_.data() + 1, parent->keys_.data(), 2255 | parent->num_keys_ * sizeof(V)); 2256 | parent->keys_[0] = mid_value; 2257 | parent->num_keys_++; 2258 | } else { 2259 | parent->keys_.insert(parent->keys_.begin(), std::move(mid_value)); 2260 | } 2261 | 2262 | auto new_begin = tree_left.begin(); 2263 | tree_left.root_->parent_ = parent; 2264 | tree_left.root_->index_ = 0; 2265 | parent->children_.insert(parent->children_.begin(), 2266 | std::move(tree_left.root_)); 2267 | for (auto &&child : parent->children_ | std::views::drop(1)) { 2268 | child->index_++; 2269 | } 2270 | while (parent) { 2271 | parent->size_ += (size_left + 1); 2272 | new_tree.try_merge(parent, true); 2273 | parent = parent->parent_; 2274 | } 2275 | new_tree.promote_root_if_necessary(); 2276 | new_tree.begin_ = new_begin; 2277 | assert(new_tree.root_->size_ == size_left + size_right + 1); 2278 | assert(new_tree.verify()); 2279 | result_ = std::move(new_tree); 2280 | } 2281 | } 2282 | BTreeBase&& result() { return std::move(result_); } 2283 | }; 2284 | template class AllocTemplate, typename T> 2286 | struct split_helper { 2287 | private: 2288 | std::pair, 2289 | BTreeBase> result_; 2290 | public: 2291 | using Tree = BTreeBase; 2292 | 2293 | split_helper(BTreeBase &&tree, 2294 | T &&raw_key) 2295 | requires(std::is_constructible_v>) { 2296 | if (tree.empty()) { 2297 | Tree tree_left(tree.alloc_); 2298 | Tree tree_right(tree.alloc_); 2299 | result_ = {std::move(tree_left), std::move(tree_right)}; 2300 | } else { 2301 | K mid_key{std::forward(raw_key)}; 2302 | result_ = tree.split_to_two_trees(tree.find_lower_bound(mid_key, false), 2303 | tree.find_upper_bound(mid_key, false)); 2304 | } 2305 | } 2306 | split_helper(BTreeBase &&tree, 2307 | T &&raw_key1, 2308 | T &&raw_key2) 2309 | requires(std::is_constructible_v>) { 2310 | if (tree.empty()) { 2311 | Tree tree_left(tree.alloc_); 2312 | Tree tree_right(tree.alloc_); 2313 | result_ = {std::move(tree_left), std::move(tree_right)}; 2314 | } else { 2315 | K key1{std::forward(raw_key1)}; 2316 | K key2{std::forward(raw_key2)}; 2317 | if (Comp{}(key2, key1)) { 2318 | throw std::invalid_argument("split() key order is invalid\n"); 2319 | } 2320 | result_ = tree.split_to_two_trees(tree.find_lower_bound(key1, false), 2321 | tree.find_upper_bound(key2, false)); 2322 | } 2323 | } 2324 | std::pair, 2325 | BTreeBase> && 2326 | result() { return std::move(result_); } 2327 | 2328 | }; 2329 | template class AllocTemplate = std::allocator> 2331 | using BTreeSet = BTreeBase; 2332 | 2333 | template class AllocTemplate = std::allocator> 2335 | using BTreeMultiSet = BTreeBase; 2336 | 2337 | template class AllocTemplate = std::allocator> 2340 | using BTreeMap = BTreeBase, t, Comp, false, AllocTemplate>; 2341 | 2342 | template class AllocTemplate = std::allocator> 2345 | using BTreeMultiMap = 2346 | BTreeBase, t, Comp, true, AllocTemplate>; 2347 | 2348 | template class AllocTemplate> 2350 | BTreeBase join(BTreeBase &&tree_left, 2351 | BTreeBase &&tree_right) { 2352 | return join_helper(std::move(tree_left), std::move(tree_right)).result(); 2353 | } 2354 | 2355 | template class AllocTemplate, typename T_> 2357 | BTreeBase join(BTreeBase &&tree_left, 2358 | T_ &&raw_value, 2359 | BTreeBase &&tree_right) { 2360 | return join_helper(std::move(tree_left), std::move(raw_value), std::move(tree_right)).result(); 2361 | } 2362 | 2363 | template class AllocTemplate, typename T> 2365 | std::pair, 2366 | BTreeBase> 2367 | split(BTreeBase &&tree, 2368 | T &&raw_key) { 2369 | return split_helper(std::move(tree), std::move(raw_key)).result(); 2370 | } 2371 | 2372 | template class AllocTemplate, typename T> 2374 | std::pair, 2375 | BTreeBase> 2376 | split(BTreeBase &&tree, 2377 | T &&raw_key1, 2378 | T &&raw_key2) { 2379 | return split_helper(std::move(tree), std::move(raw_key1), std::move(raw_key2)).result(); 2380 | } 2381 | } // namespace frozenca 2382 | 2383 | #endif //__FC_BTREE_H__ 2384 | --------------------------------------------------------------------------------