├── RangeMinimumQuery ├── IntTags.h ├── LinearRMQOperators.h ├── TableArray.h ├── Demo.cpp ├── SparseTableRMQ.h ├── Source.cpp ├── CartesianTree.h └── LinearRMQ.h ├── SkewHeap.hs ├── PairingHeap.hs ├── TrieTraversal.h ├── FibonacciHeapFormat.h ├── TrieTest.h ├── SkewHeap.h ├── TrieTraits.h ├── Splay.hs ├── static_hashset.h ├── CompressedTrie.h ├── binomial_heap_meta.cxx ├── 2-3-4-Tree.hs ├── d-ary_heap.h ├── WaveletTree.hs ├── Trie.h ├── binary_heap_static.h ├── README.md ├── FibonacciHeap.h ├── Treap.hs ├── DList.h ├── pairing_heap_static.h ├── suffix_array.h ├── binomial_heap_meta.h ├── binomial_heap.h ├── pairing_heap.h └── x_fast_trie.h /RangeMinimumQuery/IntTags.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "TaggedInt.h" 3 | 4 | // Not a scoped enum - will increase clutter (also see comment in TaggedInt.h) 5 | enum Tag { 6 | ValueIdx, // This is an index of a value 7 | RMQIdx, // This is a 01RMQ-local index 8 | Depth, // This is a cartesian tree depth 9 | LevelIdx, // This is an index of a level 10 | BV, // This is a bitvector 11 | Chunk, // This is a chunk-local index 12 | ChunkIdx, // This is an index of a chunk 13 | ChunkSize, // This is the size of a chunk (should be const) 14 | }; 15 | -------------------------------------------------------------------------------- /RangeMinimumQuery/LinearRMQOperators.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "IntTags.h" 3 | 4 | #ifndef DISABLE_TAGGED_INTS 5 | // Offset an RMQ index of a chunk beginning with a chunk-local index 6 | Int operator+(const Int& chunkStart, const Int& idx) { 7 | return Int(int(chunkStart) + int(idx)); 8 | } 9 | // Get the chunk index for a given RMQ index 10 | Int operator/(const Int& idx, const Int& chunk) { 11 | return Int(int(idx) / int(chunk)); 12 | } 13 | // Get the chunk-local index for a given RMQ index 14 | Int operator%(const Int& idx, const Int& chunk) { 15 | return Int(int(idx) % int(chunk)); 16 | } 17 | // Get the index of a chunk's beginning by converting "back" 18 | Int operator*(const Int& idx, const Int& chunk) { 19 | return Int(int(idx) * int(chunk)); 20 | } 21 | // By exception, chunk-local indices can be compared to a chunk size 22 | bool operator==(const Int& idx, const Int& chunk) { 23 | return (int(idx) == int(chunk)); 24 | } 25 | auto operator<=>(const Int& idx, const Int& chunk) { 26 | return (int(idx) <=> int(chunk)); 27 | } 28 | // By exception, the chunk size can be converted to a chunk index, but not vice versa (!) 29 | Int toIndex(const Int& chunk) { return Int(int(chunk)); } // Note: this cast to int is dangerous (!) 30 | #else 31 | #define toIndex(x) (x) 32 | auto toIndex = [](auto x) { return x; }; // \x -> x 33 | #endif // !DISABLE_TAGGED_INTS 34 | -------------------------------------------------------------------------------- /RangeMinimumQuery/TableArray.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "TaggedArray.h" 3 | 4 | // An array of flattened upper-triangular tables, residing in contiguous memory. 5 | // Each table's entries represent the results of a given function for all i,j 6 | // s.t. 0<=i 8 | class TableArray { 9 | DynamicArray values; // All tables' values 10 | Idx n, tableSize; // Table rows & total value count in a table (precomputed from n) 11 | TableIdx numTables; // Table count 12 | // Returns the actual index in a table's values, where the result for [i;j) is stored. 13 | Idx getIndex(const Idx i, const Idx j) const { 14 | return (i * n + j - 1 - (i * (i + 1) / 2)); 15 | } 16 | public: 17 | TableArray() : values{}, n{ 0 }, tableSize{ 0 }, numTables{ 0 } {} 18 | TableArray(const Idx n, const TableIdx numTables) 19 | : n(n), tableSize(n * (n + 1) / 2), numTables(numTables) 20 | { 21 | values = DynamicArray(int(numTables) * tableSize); 22 | } 23 | // Returns the results for range [i;j) in table idx 24 | T& at(const TableIdx idx, const Idx i, const Idx j) { 25 | vassert(0 <= idx && idx < numTables); 26 | vassert(0 <= i && i < j && j <= n); 27 | return values[int(idx) * tableSize + getIndex(i, j)]; 28 | } 29 | // Returns the results for range [i;j) in table idx 30 | const T& at(const TableIdx idx, const Idx i, const Idx j) const { 31 | vassert(0 <= idx && idx < numTables); 32 | vassert(0 <= i && i < j && j <= n); 33 | return values[int(idx) * tableSize + getIndex(i,j)]; 34 | } 35 | }; 36 | -------------------------------------------------------------------------------- /SkewHeap.hs: -------------------------------------------------------------------------------- 1 | module SkewHeap 2 | ( SkewHeap, insert, (<>), merge, getMin, 3 | extractMin, empty, size, 4 | fromList, toList, sort 5 | ) where 6 | import Prelude hiding ((<>)) 7 | import Data.List (unfoldr) 8 | 9 | -- No need for comments - the code is self-documenting. 10 | -- The basic operations such as insert, merge and extractMin have O(lgn) 11 | -- amortized complexity. toList, fromList and sort are obviously O(nlgn). 12 | data SkewHeap a = Empty | Node a (SkewHeap a) (SkewHeap a) 13 | 14 | -- The characteristic operation - a skew merge 15 | (<>) :: Ord a => SkewHeap a -> SkewHeap a -> SkewHeap a 16 | sh1 <> Empty = sh1 17 | Empty <> sh2 = sh2 18 | sh1@(Node v1 l1 r1) <> sh2@(Node v2 l2 r2) 19 | | v1 < v2 = Node v1 (sh2 <> r1) l1 20 | | otherwise = Node v2 (sh1 <> r2) l2 21 | 22 | -- All other operations are then reduced to a skew merge 23 | insert :: Ord a => a -> SkewHeap a -> SkewHeap a 24 | insert x sh = singleton x <> sh 25 | where singleton x = Node x Empty Empty 26 | 27 | merge :: Ord a => SkewHeap a -> SkewHeap a -> SkewHeap a 28 | merge = (<>) 29 | 30 | extractMin :: Ord a => SkewHeap a -> Maybe (a, SkewHeap a) 31 | extractMin Empty = Nothing 32 | extractMin (Node val left right) = Just (val, left <> right) 33 | 34 | -- Other basic operations 35 | getMin :: SkewHeap a -> Maybe a 36 | getMin Empty = Nothing 37 | getMin (Node val _ _) = Just val 38 | 39 | empty :: SkewHeap a -> Bool 40 | empty Empty = True 41 | empty _ = False 42 | 43 | -- Unfortunately O(n) 44 | size :: Ord a => SkewHeap a -> Int 45 | size = length . toList 46 | 47 | -- The beauty of folding & unfolding 48 | fromList :: Ord a => [a] -> SkewHeap a 49 | fromList = foldr insert Empty 50 | 51 | toList :: Ord a => SkewHeap a -> [a] 52 | toList = unfoldr extractMin 53 | 54 | sort :: Ord a => [a] -> [a] 55 | sort = toList . fromList 56 | -------------------------------------------------------------------------------- /RangeMinimumQuery/Demo.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include // std::unique_ptr 6 | #include // std::ranges::min_element 7 | 8 | template 9 | struct Node { 10 | T value; 11 | std::unique_ptr left, right; 12 | }; 13 | 14 | template 15 | std::unique_ptr> build(std::span values) { 16 | if (values.empty()) { 17 | return nullptr; 18 | } else { // We can add a values.size() == 1 case here if we want 19 | const auto it = std::ranges::min_element(values); 20 | const size_t pos = (it - values.begin()); 21 | return std::make_unique>( 22 | *it, 23 | build(values.subspan(0, pos)), 24 | build(values.subspan(pos + 1)) 25 | ); 26 | } 27 | } 28 | 29 | // Note: when storing arrays of indices, no need to waste space for 64-bit integers 30 | struct Node2 { 31 | int left, right; 32 | }; 33 | 34 | template 35 | int fill(std::span values, std::span nodes, const int firstIdx) { 36 | if (values.empty()) { 37 | return -1; 38 | } else { // We can add a values.size() == 1 case here if we want 39 | const auto it = std::ranges::min_element(values); 40 | const int pos = int(it - values.begin()); 41 | nodes[pos].left = fill(values.subspan(0, pos), nodes.subspan(0, pos), firstIdx); 42 | nodes[pos].right = fill(values.subspan(pos + 1), nodes.subspan(pos + 1), firstIdx + pos + 1); 43 | return firstIdx + pos; // (!) 44 | } 45 | } 46 | 47 | int main1() { 48 | std::vector values = { 3,4,2,1,5 }; 49 | fmt::print(" Values: {}\nMinimum: ", values); 50 | 51 | std::unique_ptr> root = build(std::span{ values }); 52 | fmt::print("{} ", root->value); 53 | 54 | std::vector nodes(values.size()); 55 | const int rootIdx = fill(std::span{ values }, nodes, 0); 56 | fmt::print("{}\n", values[rootIdx]); 57 | 58 | for (const auto& [l, r] : nodes) { 59 | fmt::print("({},{}) ", l, r); 60 | } 61 | fmt::print("\n"); 62 | 63 | return 0; 64 | } -------------------------------------------------------------------------------- /PairingHeap.hs: -------------------------------------------------------------------------------- 1 | module PairingHeap 2 | ( PairingHeap, insert, (<>), merge, getMin, 3 | extractMin, empty, size, 4 | fromList, toList, sort 5 | ) where 6 | 7 | -- No need for comments - the code is self-documenting. 8 | -- The basic operations such as insert, merge and findMin have O(1) 9 | -- amortized time complexity, and extractMin - O(lgn) amortized. 10 | -- toList, fromList and sort are obviously O(nlgn). 11 | data PairingHeap a = Empty | Node a [PairingHeap a] 12 | 13 | instance (Ord a) => Eq (PairingHeap a) where 14 | ph1 == ph2 = (toList ph1) == (toList ph2) 15 | 16 | instance (Show a) => Show (PairingHeap a) where 17 | show Empty = "PairingHeap{}" 18 | show (Node val _) = "PairingHeap{" ++ show val ++ "..}" 19 | 20 | insert :: Ord a => a -> PairingHeap a -> PairingHeap a 21 | insert x ph = singleton x <> ph 22 | where singleton x = Node x [] 23 | 24 | (<>) :: Ord a => PairingHeap a -> PairingHeap a -> PairingHeap a 25 | (<>) = merge 26 | 27 | merge :: Ord a => PairingHeap a -> PairingHeap a -> PairingHeap a 28 | merge ph1 Empty = ph1 29 | merge Empty ph2 = ph2 30 | merge ph1@(Node v1 ch1) ph2@(Node v2 ch2) 31 | | v1 < v2 = Node v1 (ph2:ch1) 32 | | otherwise = Node v2 (ph1:ch2) 33 | 34 | getMin :: PairingHeap a -> Maybe a 35 | getMin Empty = Nothing 36 | getMin (Node val _) = Just val 37 | 38 | extractMin :: Ord a => PairingHeap a -> Maybe (a, PairingHeap a) 39 | extractMin Empty = Nothing 40 | extractMin (Node val children) = Just (val, makePairs children) 41 | where makePairs [] = Empty 42 | makePairs [x] = x 43 | makePairs (x:y:xs) = (x<>y) <> makePairs xs 44 | 45 | empty :: PairingHeap a -> Bool 46 | empty Empty = True 47 | empty _ = False 48 | 49 | -- unfortunately O(nlgn)... 50 | size :: Ord a => PairingHeap a -> Int 51 | size = length . toList 52 | 53 | fromList :: Ord a => [a] -> PairingHeap a 54 | fromList = foldl (flip insert) Empty 55 | -- fromList = foldr insert Empty 56 | 57 | toList :: Ord a => PairingHeap a -> [a] 58 | toList ph = case extractMin ph of Nothing -> [] 59 | Just (val, ph1) -> val : toList ph1 60 | 61 | sort :: Ord a => [a] -> [a] 62 | sort = toList . fromList 63 | -------------------------------------------------------------------------------- /TrieTraversal.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include // preferrably from github/andreasbuhr 3 | #include "TrieTraits.h" 4 | 5 | template 6 | class TrieTraversal { 7 | using Traits = TrieTraits; 8 | public: 9 | // Traverses down the trie, checking whether a value is present 10 | bool contains(this const auto& self, T value) noexcept(Traits::noexceptConsume) { 11 | auto curr = self.getRootPtr(); 12 | while (!Traits::consumed(value)) { 13 | const size_t idx = Traits::advance(value); 14 | if (!self.hasChild(curr, idx)) { 15 | return false; 16 | } 17 | curr = self.getChild(curr, idx); 18 | } 19 | return self.hasValue(curr); 20 | } 21 | // Traverses the trie depth-first & returns each value found, keeping the path to the current node as the coroutine state. 22 | cppcoro::generator values(this const auto& self) noexcept(Traits::noexceptStack) { 23 | struct VisitState { 24 | decltype(self.getRootPtr()) root; // (sub)tree root 25 | size_t nextIdx; // Index of next subtree of root to be visited 26 | }; 27 | // Stack of not-completely-visited subtrees (each with current visit progress) 28 | using Stack = Traits::template Stack; 29 | Stack path = {}; 30 | path.push({ self.getRootPtr(), 0 }); 31 | // The value, "built" from the path from the root to the current node. 32 | typename Traits::U temp = {}; 33 | while (true) { 34 | auto& [curr, nextIdx] = path.top(); 35 | // Return the value only on the first node visit 36 | if (nextIdx == 0 && self.hasValue(curr)) { 37 | co_yield Traits::fromTemporary(temp); 38 | } 39 | // Jump to next non-empty subtree 40 | while (nextIdx < Traits::numPointers && !self.hasChild(curr, nextIdx)) { 41 | ++nextIdx; 42 | } 43 | if (nextIdx == Traits::numPointers) { 44 | path.pop(); 45 | if (path.empty()) { 46 | break; // We've popped the root, nothing more to do 47 | } 48 | Traits::pop(temp, path.size() - 1); // The stack top's depth 49 | } else { // Push the next subtree to the stack 50 | Traits::push(temp, nextIdx, path.size() - 1); // The stack top's depth 51 | path.push({ self.getChild(curr, nextIdx), 0 }); 52 | ++nextIdx; // For when we return to the current depth back again - continue from the next subtree 53 | } 54 | } 55 | } 56 | }; 57 | -------------------------------------------------------------------------------- /FibonacciHeapFormat.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include "FibonacciHeap.h" 4 | 5 | // Fibonacci heaps can be formatted either horizontally (by default) 6 | // or vertically to accent the hierarchy, using the {:v} modifier: 7 | // [1] 8 | // [1] [2 [3] [4* [5]]] [6 [7]] <-> [2 [3] 9 | // [4* [5]]] 10 | // [6 [7]] 11 | template 12 | struct fmt::formatter> { 13 | bool horizontal = true; 14 | 15 | template 16 | constexpr auto parse(ParseContext& ctx) { 17 | auto it = ctx.begin(), end = ctx.end(); 18 | if (it != end && *it == 'v') { 19 | horizontal = false; 20 | ++it; 21 | } 22 | if (it != end && *it != '}') { throw fmt::format_error("invalid format"); } 23 | return it; 24 | } 25 | template 26 | auto format(const FibonacciHeap& fh, FormatContext& ctx) { 27 | if (horizontal) { 28 | [&](this auto&& self, const DList>& nodes, const bool b) -> void { 29 | bool first = true; 30 | for (const auto& n : nodes) { 31 | if (b || !first) { *ctx.out()++ = ' '; } 32 | first = false; 33 | fmt::format_to(ctx.out(), "[{}", n.val); 34 | if (n.marked) { *ctx.out()++ = '*'; } 35 | self(n.subtrees, true); 36 | *ctx.out()++ = ']'; 37 | } 38 | }(fh.roots, false); // Immediately invoked recursive lambda, lol 39 | } else { 40 | [&](this auto&& self, const DList>& nodes, const size_t pad) -> void { 41 | bool first = true; 42 | for (const auto& n : nodes) { 43 | if (!first) { *ctx.out()++ = '\n'; } 44 | fmt::format_to(ctx.out(), "{:>{}}{}", '[', (first ? 0 : pad) + 1, n.val); 45 | if (n.marked) { *ctx.out()++ = '*'; } 46 | const size_t newPad = fmt::formatted_size("{}", n.val); 47 | if (!n.subtrees.empty()) { 48 | *ctx.out()++ = ' '; 49 | self(n.subtrees, pad + newPad + 2 + n.marked); // for the '[', ' ', and possibly '*' already printed 50 | } 51 | *ctx.out()++ = ']'; 52 | first = false; 53 | } 54 | }(fh.roots, 0); // Another one, lol 55 | } 56 | return ctx.out(); 57 | } 58 | }; 59 | -------------------------------------------------------------------------------- /TrieTest.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include "Trie.h" 4 | #include "CompressedTrie.h" 5 | 6 | template 7 | void testTrie(); 8 | 9 | template <> 10 | void testTrie() { 11 | Trie t; 12 | int numInserted = 0; 13 | for (const char* str : { "andi", "", "and", "andy", "baba" }) { 14 | if (t.insert(str)) { 15 | ++numInserted; 16 | } 17 | } 18 | fmt::print("numInserted={}\ncount={}\n", numInserted, t.size()); 19 | for (const char* str : t.values()) { 20 | fmt::print("\"{}\"\n", str); 21 | } 22 | for (const char* str : { "", "andi", "andrey", "ba", "c" }) { 23 | fmt::print("\"{}\" : {}\n", str, t.contains(str)); 24 | } 25 | int numRemoved = 0; 26 | for (const char* str : { "andi", "", "and", "andy", "baba" }) { 27 | if (t.remove(str)) { 28 | ++numRemoved; 29 | } 30 | } 31 | fmt::print("numRemoved={}\ncount={}\n", numRemoved, t.size()); 32 | for (const char* str : t.values()) { 33 | fmt::print("\"{}\"\n", str); 34 | } 35 | fmt::print("\n"); 36 | } 37 | 38 | template <> 39 | void testTrie() { 40 | Trie t; 41 | int numInserted = 0; 42 | for (const uint64_t value : { 68, 20, 35, 32, 14, 0, 12, 20, 300, 0, 301, 420/*, -1, -2*/ }) { 43 | if (t.insert(value)) { 44 | ++numInserted; 45 | } 46 | } 47 | fmt::print("numInserted={}\n", numInserted); 48 | fmt::print("count={} maxBits={}\n", t.size(), t.maxBits()); 49 | for (const uint64_t value : t.values()) { 50 | // Print arg0 in binary, padding up to arg1 with zeroes; then print arg0 normally 51 | fmt::print("{0:0{1}b} ({0})\n", value, t.maxBits()); 52 | } 53 | for (const uint64_t value : { 0, 5, 12, 20, 28, 68, 420 }) { 54 | fmt::print("{} : {}\n", value, t.contains(value)); 55 | } 56 | int numRemoved = 0; 57 | for (const uint64_t value : { 68, 20, 35, 32, 14, 0, 12, 20, 300, 0, 301, 420/*, -1, -2*/ }) { 58 | if (t.remove(value)) { 59 | ++numRemoved; 60 | } 61 | } 62 | fmt::print("numRemoved={}\n", numRemoved); 63 | fmt::print("count={} maxBits={}\n", t.size(), t.maxBits()); 64 | for (const uint64_t value : t.values()) { 65 | // Print arg0 in binary, padding up to arg1 with zeroes; then print arg0 normally 66 | fmt::print("{0:0{1}b} ({0})\n", value, t.maxBits()); 67 | } 68 | fmt::print("\n"); 69 | } 70 | 71 | template 72 | void testTrieCompress(); 73 | 74 | template <> 75 | void testTrieCompress() { 76 | Trie t{ "and", "ax", "", "bot", "v", "vw" }; 77 | CompressedTrie ct{ t }; 78 | auto v1 = t.values(); 79 | auto v2 = ct.values(); 80 | for (auto it1 = v1.begin(), it2 = v2.begin(); it1 != v1.end() && it2 != v2.end(); ++it1, ++it2) { 81 | fmt::print("\"{}\" \"{}\"\n", *it1, *it2); 82 | } 83 | for (const char* str : { "", "and", "andi", "ax", "ay", "bo", "cc", "v", "vw" }) { 84 | fmt::print("\"{}\" : {} {}\n", str, t.contains(str), ct.contains(str)); 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /SkewHeap.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include // std::less 4 | #include // std::exchange 5 | 6 | template 7 | struct SNode { 8 | T value; 9 | int left, right; // Children node indices inside the same buffer *this resides in. 10 | template 11 | SNode(Args&&... args) : value{ std::forward(args)... }, left{ -1 }, right{ -1 } {} 12 | }; 13 | 14 | // Skew heap, supporting only inserting a value or replacing the min with a bigger (!) one. 15 | template > 16 | class SkewHeap { 17 | std::vector> allNodes; 18 | int rootIdx; 19 | [[no_unique_address]] Compare cmp; 20 | 21 | // O(lgn) amortized complexity, actually log_phi_n = 1.44*lgn approx. 22 | int skewMerge(const int left, const int right) { 23 | if (left == -1) { 24 | return right; 25 | } else if (right == -1) { 26 | return left; 27 | } else { 28 | auto& lhs = allNodes[left]; 29 | auto& rhs = allNodes[right]; 30 | if (cmp(lhs.value, rhs.value)) { 31 | const int res = skewMerge(lhs.right, right); 32 | lhs.right = lhs.left; 33 | lhs.left = res; 34 | return left; 35 | } else { 36 | const int res = skewMerge(left, rhs.right); 37 | rhs.right = rhs.left; 38 | rhs.left = res; 39 | return right; 40 | } 41 | } 42 | } 43 | public: 44 | // Operates on a preallocated buffer (although resizing it it possible) 45 | SkewHeap(std::vector>&& buff, const Compare& cmp = Compare{}) 46 | : allNodes(std::move(buff)), rootIdx{ -1 }, cmp{ cmp } {} 47 | SkewHeap(const SkewHeap&) = delete; 48 | SkewHeap& operator=(const SkewHeap&) = delete; 49 | SkewHeap(SkewHeap&&) = delete; 50 | SkewHeap& operator=(SkewHeap&&) = delete; 51 | 52 | // Inserting a value - creates a singleton heap and merges with the current heap 53 | template 54 | void emplace(Args&&... args) { 55 | allNodes.emplace_back(std::forward(args)...); 56 | const int newNode = int(allNodes.size()) - 1; 57 | rootIdx = (rootIdx == -1 ? newNode : skewMerge(rootIdx, newNode)); 58 | } 59 | 60 | // Attempts replacing the smallest element with one that's larger 61 | // than it & returns true on success. Note: no reallocations. 62 | bool tryReplaceTop(const T& newTop) { 63 | auto& root = allNodes[rootIdx]; 64 | if (!cmp(newTop, root.value)) { 65 | root.value = newTop; 66 | // note: try bubbling down instead 67 | const int rest = skewMerge(std::exchange(root.left, -1), std::exchange(root.right, -1)); 68 | rootIdx = skewMerge(rootIdx, rest); 69 | return true; 70 | } else { 71 | return false; 72 | } 73 | } 74 | 75 | // Returns ownership of the nodes vector & resets the heap to an empty one. 76 | [[nodiscard]] std::vector> dump() { 77 | rootIdx = -1; 78 | return std::exchange(allNodes, {}); 79 | } 80 | }; 81 | -------------------------------------------------------------------------------- /TrieTraits.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include // uint64_t, size_t 3 | #include // std::numeric_limits::max 4 | #include 5 | #include 6 | #include "StaticStack.h" // from SnippySnippets repo 7 | #include "vassert.h" // from SnippySnippets repo 8 | 9 | // Each type to be used in a Trie should have a specialization of this class, similar to const char* and uint64_t 10 | template 11 | class TrieTraits; 12 | 13 | // Example specialization for constructing string tries 14 | template <> 15 | class TrieTraits { 16 | public: 17 | // # of pointers at each trie level 18 | static constexpr size_t numPointers = std::numeric_limits::max() + 1; 19 | // "Consumes" a char from the value, advances the value & returns the index of said char in the pointer array 20 | static size_t advance(const char*& str) { return *(str++); } 21 | // Whether a value has been completely consumed 22 | static bool consumed(const char* str) { return (*str == '\0'); } 23 | // Whether consuming a value is noexcept, to aid in Trie noexcept specification 24 | static constexpr bool noexceptConsume = true; 25 | 26 | // A single, temporary value of this type is maintained & used in a stack-like manner during trie in-order traversal. 27 | // Will be zero-initialized before usage. 28 | using U = std::string; 29 | // Obtains an actual value from the temporary, corresponding to current traversal progress 30 | static const char* fromTemporary(const U& str) { return str.c_str(); } 31 | // When descending a trie level, "appends" to the temporary value (given the pointer index & node depth) 32 | static void push(U& str, const size_t idx, const size_t depth) { 33 | vassert(str.size() == depth); 34 | str.push_back(char(idx)); 35 | } 36 | // When ascending a trie level, "removes" from the temporary value (again given the popped node depth) 37 | static void pop(U& str, const size_t depth) { 38 | str.pop_back(); 39 | vassert(str.size() == depth); 40 | } 41 | // A container with the ability to fit as many values as the # of nodes in the longest root-leaf path in trie. 42 | // For strings, this is unlimited (see other specializations for counter-example) 43 | template 44 | using Stack = std::stack; 45 | // Whether stack operation is noexcept, to aid in Trie noexcept specification 46 | static constexpr bool noexceptStack = false; 47 | }; 48 | 49 | // Integers are represented as strings of numBits-sized chunks, starting from the lowest-significance bits. 50 | // This means in-order trie traversal will return value in a weird, inverse-lexicological ordering. 51 | // On the other side, limited string length removes the need for dynamically allocating trie paths during traversal/deletion. 52 | template <> // to-do: find a way to specialize for all unsigned integers 53 | class TrieTraits { 54 | static constexpr size_t numBits = 4; // # of bits, consumed at each trie level. Change to 1 for a radix trie. 55 | static constexpr size_t maxDepth = (64 / numBits) + (64 % numBits != 0); // Maximim node depth in tree. Root depth is 0. 56 | public: 57 | static constexpr size_t numPointers = (1ULL << numBits); 58 | static bool consumed(const uint64_t& value) { return (value == 0); } 59 | static size_t advance(uint64_t& value) { size_t res = value % numPointers; value >>= numBits; return res; } 60 | static constexpr bool noexceptConsume = true; 61 | 62 | using U = uint64_t; 63 | static uint64_t fromTemporary(const U& value) { return value; } 64 | static void push(U& value, const size_t idx, const size_t depth) { value += (idx << (numBits * depth)); } 65 | static void pop(U& value, const size_t depth) { value &= ~((numPointers - 1) << (numBits * depth)); } 66 | 67 | template 68 | using Stack = StaticStack; // Trie path length limit (+1 for root node) 69 | static constexpr bool noexceptStack = true; 70 | }; 71 | -------------------------------------------------------------------------------- /Splay.hs: -------------------------------------------------------------------------------- 1 | module Splay (Splay,insert,lookup,fromList) where 2 | import Prelude hiding (lookup) 3 | 4 | data Splay a = Empty | Node a (Splay a) (Splay a) 5 | -- Pretty-printing 6 | instance Show a => Show (Splay a) where 7 | show t = "\n" ++ show' 0 t 8 | where show' _ Empty = "" 9 | show' d (Node x l r) = show' (d+1) r ++ replicate (2*d) ' ' ++ show x ++ "\n" ++ show' (d+1) l 10 | 11 | -- A list of directions encodes a path from the root to another node. 12 | data Direction = L | R 13 | 14 | -- A breadcrumb contains everything required to reconstruct a tree after a 15 | -- step down - the direction we took, the root and the other subtree. 16 | data Crumb a = Crumb Direction a (Splay a) 17 | 18 | -- Standard tree rotations 19 | rotate :: Direction -> Splay a -> Splay a 20 | rotate L (Node x a (Node y b c)) = (Node y (Node x a b) c) 21 | rotate R (Node y (Node x a b) c) = (Node x a (Node y b c)) 22 | 23 | -- The first pass: descend the tree in search of val, while recording how to 24 | -- reconstruct it when going back upwards. Either creates a new node 25 | -- if val is not found, or stops at the last node on the path to it. 26 | find :: Ord a => Bool -> a -> (Splay a, [Crumb a]) -> (Splay a, [Crumb a]) 27 | find True val (Empty, cs) = (Node val Empty Empty, cs) -- val not found, add a new node 28 | find False val p@(Node x Empty _, _) | val < x = p -- val not found, stop searching 29 | find False val p@(Node x _ Empty, _) | val > x = p -- val not found, stop searching 30 | find _ val p@(Node x _ _, _) | val == x = p -- found val somewhere in tree 31 | find b val (Node x l r, cs) 32 | | val < x = find b val (l, Crumb L x r : cs) 33 | | val > x = find b val (r, Crumb R x l : cs) 34 | 35 | -- The second, most crucial pass: given a subtree with root x and its path to 36 | -- the root, reconstruct the entire tree using the breadcrumbs and simultaneously splay x up. 37 | splay :: (Splay a, [Crumb a]) -> (Splay a, [Crumb a]) 38 | splay (t, []) = (t, []) 39 | -- Zig 40 | splay (t, [Crumb L x r]) = (rotate R $ Node x t r, []) 41 | splay (t, [Crumb R x l]) = (rotate L $ Node x l t, []) 42 | -- Zig-zig 43 | splay (t, (Crumb L x r : Crumb L x' r' : cs)) = splay (rotate R $ rotate R (Node x' (Node x t r) r'), cs) 44 | splay (t, (Crumb R x l : Crumb R x' l' : cs)) = splay (rotate L $ rotate L (Node x' l' (Node x l t)), cs) 45 | -- Zig-zag 46 | splay (t, (Crumb L x r : Crumb R x' l : cs)) = splay (rotate L $ Node x' l (rotate R $ Node x t r), cs) 47 | splay (t, (Crumb R x l : Crumb L x' r : cs)) = splay (rotate R $ Node x' (rotate L $ Node x l t) r, cs) 48 | 49 | -- Inserting, the standard two-pass operation. 50 | insert :: Ord a => a -> Splay a -> Splay a 51 | insert val t = fst . splay $ find True val (t,[]) 52 | 53 | -- Looking up a value will splay either it if found, 54 | -- or the last value on the search path if not found. 55 | lookup :: Ord a => a -> Splay a -> Splay a 56 | lookup _ Empty = Empty 57 | lookup val t = fst . splay $ find False val (t,[]) 58 | 59 | -- Split a tree into two by a given value 60 | split :: Ord a => a -> Splay a -> (Splay a, Splay a) 61 | split _ Empty = (Empty, Empty) 62 | split val t 63 | | x <= val = (Node x l Empty, r) 64 | | otherwise = (l, Node x Empty r) 65 | where (Node x l r) = lookup val t 66 | 67 | -- Join two trees, given that all values in the first are less than the values in the second 68 | join :: Ord a => Splay a -> Splay a -> Splay a 69 | join t1 Empty = t1 70 | join Empty t2 = t2 71 | join t1 t2 = Node x l t2 72 | where (Node x l Empty) = fst . splay $ findMax (t1, []) 73 | findMax :: Ord a => (Splay a, [Crumb a]) -> (Splay a, [Crumb a]) 74 | findMax p@(Node _ _ Empty, _) = p 75 | findMax (Node x l r, cs) = findMax (r, Crumb R x l : cs) 76 | 77 | -- Removing a value, the splay tree way - look it up, splaying it 78 | -- in the process, and on success join the two subtrees. 79 | delete :: Ord a => a -> Splay a -> Splay a 80 | delete _ Empty = Empty 81 | delete val t 82 | | x == val = join l r 83 | | otherwise = (Node x l r) 84 | where (Node x l r) = lookup val t 85 | 86 | -- Build a tree from the set (!) of values in a list 87 | fromList :: Ord a => [a] -> Splay a 88 | fromList = foldl (flip insert) Empty 89 | -------------------------------------------------------------------------------- /static_hashset.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | // Hashing a static set of n values in 5 | // O(n) storage with no collisions (!). 6 | class static_hashset 7 | { 8 | // All values should be in the range [0..p-1] and p must be a 9 | // prime number. Otherwise, the no-collision guarantee is waived. 10 | static const int p = 1'009; 11 | // c0 is the constant for the first-level hashing with n buckets 12 | const int c0, n; 13 | // For each bucket there is another 'c' used to hash within the bucket 14 | std::vector c; 15 | // The n buckets are concatenated into a single vector. For any n, 16 | // this vector will have no more than 3n-2 elements => O(n) total storage 17 | std::vector table; 18 | // The offsets, at which the buckets are placed inside the main vector. 19 | // The size of a bucket can be deduced from the offsets of that buckets and the next one. 20 | std::vector offsets; 21 | 22 | // The simple hashing function: c is a precalculated constant, 23 | // m is the bucket size, and x is the element being hashed. 24 | static int h(int c, int m, int x) { return (((c*x) % p) % m); } 25 | 26 | // Selects an appropriate constant c, such that hashing values 27 | // into m buckets gives no more than maxColls collisions. 28 | int selectc(const std::vector& values, const int m, const int maxColls) 29 | { 30 | const int n = int(values.size()); 31 | // TO-DO: try c in a more randomized order (!) 32 | for (int c = 1; c < p; ++c) 33 | { 34 | int colls = 0; 35 | for (int i = 0; colls < maxColls && i < n; ++i) 36 | for (int j = i + 1; colls < maxColls && j < n; ++j) 37 | if (h(c, m, values[i]) == h(c, m, values[j])) 38 | ++colls; // no more cycling after we hit maxColls 39 | // if we did hit maxColls, continue to the next c 40 | if (colls < maxColls) 41 | return c; 42 | } 43 | return 42; // this should be unreachable 44 | } 45 | 46 | // Empty, "helper" constructor, which receives the number of values 47 | // as a separate argument and only initializes the member data. 48 | static_hashset(const std::vector& values, int n) 49 | : c0{ selectc(values, n, n) } // c0 is expected to be found on the second try 50 | , n{ n } 51 | , c(n, -1) // careful with uniform initialization for certain vectors... 52 | , table(3 * n - 2, -1) 53 | , offsets(n + 1, -1) 54 | {} 55 | public: 56 | // Do not construct with an empty vector! 57 | static_hashset(const std::vector& values) 58 | : static_hashset(values, int(values.size())) 59 | { 60 | std::vector> buckets(n); 61 | for (int x : values) 62 | buckets[h(c0, n, x)].push_back(x); 63 | // Now each element of buckets contains the values, for 64 | // which the first-level hash function makes collisions. 65 | int currOff = 0; 66 | // This would almost certainly be inlined 67 | auto sq = [](int x) { return x*x; }; 68 | for (int i = 0; i < n; ++i) 69 | { 70 | offsets[i] = currOff; 71 | // We can hash n values into n^2 buckets with 72 | // no collisions, given an appropriate c. 73 | const int wi2 = sq(int(buckets[i].size())); 74 | if (wi2 == 0) 75 | continue; 76 | // No guarantee how much time this would take... 77 | c[i] = selectc(buckets[i], wi2, 1); 78 | int* bucket = table.data() + currOff; 79 | for (int x : buckets[i]) 80 | bucket[h(c[i], wi2, x)] = x; 81 | currOff += wi2; // the offset for the next bucket 82 | } 83 | offsets.back() = currOff; 84 | } 85 | 86 | bool find(int x) const 87 | { 88 | // Bucket index from the first-level hash function 89 | const int i = h(c0, n, x); 90 | // Size of the bucket - note that it may be empty (!) 91 | const int wi2 = offsets[i + 1] - offsets[i]; 92 | return (wi2 && table[offsets[i] + h(c[i], wi2, x)] == x); 93 | } 94 | }; 95 | -------------------------------------------------------------------------------- /CompressedTrie.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "Trie.h" 3 | #include "TrieTraversal.h" 4 | #include "StaticBitset.h" // from SnippySnippets repo 5 | #include "vassert.h" 6 | #include // std::conditional_t 7 | #include 8 | #include 9 | #include // std::bit_ceil 10 | 11 | // A compressed, static representation of a Trie. All original Trie nodes are 12 | // indexed consecutively in a breadth-first manner, starting from 0 for the root. 13 | // This guarantees that all children of a node will have consecutive indices, 14 | // so that for each node we only need to keep the index of the first child; 15 | // and the info which child pointers are non-null can be compressed in a bitset. 16 | template 17 | requires (TrieTraits::numPointers <= 128) // No large enough bitset! 18 | class CompressedTrie : public TrieTraversal { 19 | using Traits = TrieTraits; 20 | using Bitset = StaticBitset; 21 | using Index = typename Bitset::iterator::value_type; 22 | 23 | // For each node, a bitset indicating the position of non-null child pointers 24 | std::vector bitsets; 25 | // For each node, the index of its first child node + 1 bit indicating whether the node has a value 26 | std::vector firstChild; 27 | // # of values in tree 28 | size_t count; 29 | // Max # of bits of all values in tree. Used for unsigned integers only 30 | int maxBits_; 31 | public: 32 | // Standard big 6 33 | CompressedTrie() : count{ 0 } {} 34 | CompressedTrie(const CompressedTrie&) = delete; 35 | CompressedTrie& operator=(const CompressedTrie&) = delete; 36 | CompressedTrie(CompressedTrie&& other) noexcept 37 | : bitsets{ std::move(other.bitsets) } 38 | , firstChild{ std::move(other.firstChild) } 39 | , maxBits_{ std::exchange(other.maxBits_, 0) } 40 | , count{ std::exchange(other.count, 0) } {} 41 | CompressedTrie& operator=(CompressedTrie&& other) noexcept { 42 | bitsets = std::move(other.bitsets); 43 | firstChild = std::move(other.firstChild); 44 | maxBits_ = std::exchange(other.maxBits_, 0); 45 | count = std::exchange(other.count, 0); 46 | return *this; 47 | } 48 | ~CompressedTrie() { clear(); } 49 | // Convenience ctor 50 | CompressedTrie(std::initializer_list il) : CompressedTrie{ Trie{il} } {} 51 | // (!) 52 | [[nodiscard]] CompressedTrie(const Trie& t) : count{ t.count }, maxBits_{ t.maxBits_ } { 53 | using Node = typename Trie::Node; 54 | std::queue q; 55 | q.push(Node::encode(&t.root, t.rootHasValue)); 56 | unsigned nextIdx = 1; 57 | while (!q.empty()) { 58 | const uintptr_t node = q.front(); 59 | const Node* ptr = Node::decode(node); 60 | q.pop(); 61 | firstChild.push_back(nextIdx << 1 | unsigned(node & 1)); 62 | Bitset curr{}; 63 | for (size_t i = 0; i < Traits::numPointers; ++i) { 64 | if (ptr->ptrs[i]) { 65 | curr.add(Index(i)); 66 | q.push(ptr->ptrs[i]); 67 | ++nextIdx; 68 | } 69 | } 70 | bitsets.push_back(curr); 71 | vassert(bitsets.size() == firstChild.size()); 72 | } 73 | } 74 | // Maximum bit_width of all values ever inserted in the trie. Obviously applicable for unsigned integers only 75 | uint64_t maxBits() const noexcept requires std::unsigned_integral { return maxBits_; } 76 | size_t size() const noexcept { return count; } 77 | bool empty() const noexcept { return (size() == 0); } 78 | void clear() noexcept { 79 | std::exchange(bitsets, {}); // Force deallocations 80 | std::exchange(firstChild, {}); 81 | count = 0; 82 | } 83 | private: 84 | // Typedefs & member functions, required by the TrieTraversal methods 85 | using pointer = unsigned; // Nodes are uniquely identified by index 86 | pointer getRootPtr() const { return 0; } 87 | bool hasValue(const pointer p) const { return bool(firstChild[p] & 1); } 88 | bool hasChild(const pointer p, size_t idx) const { return bitsets[p].contains(Index(idx)); } 89 | pointer getChild(const pointer p, size_t idx) const { 90 | return (firstChild[p] >> 1) + int(bitsets[p].rank(Index(idx))); 91 | } 92 | // (!) 93 | friend TrieTraversal; 94 | }; 95 | -------------------------------------------------------------------------------- /binomial_heap_meta.cxx: -------------------------------------------------------------------------------- 1 | #include // for visualization purposes only 2 | #include // std::min 3 | #include 4 | #include 5 | 6 | // Circle uses libstdc++-10.3, where some C++20 libraries are still missing 7 | // Barebones replacement for std::span 8 | template 9 | struct Span { 10 | const int* ptr; 11 | Span(const int* ptr) : ptr{ptr} {} 12 | Span(const std::array& data) : ptr{ &data[0] } {} 13 | template requires (Off + N1 <= N) 14 | Span sub() const { return { ptr + Off }; } 15 | const int& operator[](size_t idx) const { assert(idx < N); return ptr[idx]; } 16 | static constexpr unsigned size() { return N; } 17 | }; 18 | // More replacements: some functions - thank god GCC's builtins are constexpr 19 | constexpr bool PowerOf2(unsigned n) { return (n > 0 && !(n&(n-1))); } 20 | constexpr int Log2(unsigned n) { return __builtin_ctz(n); } // works for powers of 2 only! 21 | static_assert(Log2(8) == 3 && PowerOf2(16)); 22 | 23 | template 24 | struct BinomialTree { 25 | int value = 0; 26 | // Subtrees of all lesser ranks, in decreasing orders. 27 | [[no_unique_address]] BinomialTree ...ts; 28 | 29 | // Note: getting .nontype_args...[0] in a pack expansion does not work (?) It's not a nested expansion, so why not? 30 | static constexpr unsigned rank() { return Rank; } 31 | bool valid() const { return (... && (value <= ts.value && ts.valid())); } 32 | }; 33 | 34 | template 35 | BinomialTree merge(const BinomialTree& left, const BinomialTree& right) { 36 | if (left.value < right.value) { 37 | return { left.value, right, left. ...ts ...}; 38 | } else { 39 | return { right.value, left, right. ...ts ...}; 40 | } 41 | } 42 | 43 | template requires (PowerOf2(N)) 44 | BinomialTree makeTree(Span values) { 45 | static constexpr unsigned Rank = Log2(N); 46 | if constexpr (Rank == 0) { 47 | return { values[0] }; 48 | } else { 49 | return merge(makeTree(values.template sub<0,N/2>()), 50 | makeTree(values.template sub())); 51 | } 52 | } 53 | 54 | template 55 | class BinomialHeap { 56 | // Holders for the trees' ranks and sizes 57 | static constexpr std::array Ranks = { int...(32).filter(N&(1u<<_0))... }; 58 | static constexpr std::array Sizes = { (1u< ...ts; 60 | public: 61 | // Splits the input array into chunks the size of each tree 62 | // and directly initializes the subtrees from these chunks. 63 | BinomialHeap(Span values) 64 | : ts{ makeTree(values.template sub()) }... {} 65 | int getMin() const { return (... std::min ts.value); } // lol 66 | bool valid() const { return (... && ts.valid()); } 67 | }; 68 | 69 | // Stream output, nothing special 70 | template 71 | std::ostream& operator<<(std::ostream& os, const BinomialTree& t) { 72 | os << '[' << t.value; 73 | os << ' ' << t. ...ts ...; 74 | return (os << ']'); 75 | } 76 | template 77 | std::ostream& operator<<(std::ostream& os, const BinomialHeap& h) { 78 | os << h. ...ts << ' ' ...; 79 | return os; 80 | } 81 | 82 | int main() { 83 | const Span values = std::array{ 2,4,3,8,1,7,6,5 }; 84 | const auto t = makeTree(values); 85 | constexpr unsigned R = decltype(t)::rank(); 86 | std::cout << "Ranks of BinomialTree<" << R << ">'s subtrees:"; 87 | std::cout << ' ' << t.ts.rank() ...; 88 | std::cout << '\n'; 89 | std::cout << "BinomialTree<" << R << ">: " << t << " (valid: " << std::boolalpha << t.valid() << ")\n"; 90 | static_assert(sizeof(t) == (sizeof(t.value) << R)); // A tree of rank R has 2^R values. 91 | 92 | const Span values1 = std::array{ 5,13,9,1,7,11,6,3,4,10,2,12,8 }; 93 | constexpr unsigned N = values1.size(); 94 | const BinomialHeap h{ values1 }; 95 | std::cout << "Ranks of BinomialHeap<" << N << ">'s trees:"; 96 | for (int R : h.Ranks) { std::cout << ' ' << R; } 97 | std::cout << '\n'; 98 | std::cout << "BinomialHeap<" << N << ">: " << h 99 | << " (min=" << h.getMin() << "; valid: " << h.valid() << ")\n"; 100 | static_assert(sizeof(h) == sizeof(t.value) * N); 101 | } 102 | -------------------------------------------------------------------------------- /2-3-4-Tree.hs: -------------------------------------------------------------------------------- 1 | import Prelude hiding (init) 2 | 3 | data Tree a = L2 a | L3 a a | L4 a a a 4 | | N2 a (Tree a) (Tree a) 5 | | N3 a a (Tree a) (Tree a) (Tree a) 6 | | N4 a a a (Tree a) (Tree a) (Tree a) (Tree a) 7 | 8 | -- Initialize a tree form a single value - there are no empty trees :) 9 | init :: a -> Tree a 10 | init = L2 11 | 12 | -- Pretty-printing designed to accent the same depth of the leaves 13 | -- and show which values are in the same internal 3-node or 4-node. 14 | instance Show a => Show (Tree a) where 15 | show t = "\n" ++ show' 0 t 16 | where show' pad (L2 x) = replicate pad ' ' ++ show x ++ "\n" 17 | show' pad (L3 x y) = concatMap (\v -> replicate pad ' ' ++ show v ++ "\n") [x,y] 18 | show' pad (L4 x y z) = concatMap (\v -> replicate pad ' ' ++ show v ++ "\n") [x,y,z] 19 | show' pad (N2 x a b) = show' (pad + newPad) a 20 | ++ replicate pad ' ' ++ show x ++ " \n" 21 | ++ show' (pad + newPad) b 22 | where newPad = succ . length $ show x -- add 1 to compensate for the explicit ' ' or '|' in the other cases 23 | show' pad (N3 x y a b c) = (unlines . map (\l -> replicate pad ' ' ++ " " ++ l) . lines $ show' newPad a) 24 | ++ replicate pad ' ' ++ show x ++ " \n" 25 | ++ (unlines . map (\l -> replicate pad ' ' ++ "|" ++ l) . lines $ show' newPad b) 26 | ++ replicate pad ' ' ++ show y ++ " \n" 27 | ++ (unlines . map (\l -> replicate pad ' ' ++ " " ++ l) . lines $ show' newPad c) 28 | where newPad = maximum $ map (length . show) [x,y] 29 | show' pad (N4 x y z a b c d) = (unlines . map (\l -> replicate pad ' ' ++ " " ++ l) . lines $ show' newPad a) 30 | ++ replicate pad ' ' ++ show x ++ " \n" 31 | ++ (unlines . map (\l -> replicate pad ' ' ++ "|" ++ l) . lines $ show' newPad b) 32 | ++ replicate pad ' ' ++ show y ++ " \n" 33 | ++ (unlines . map (\l -> replicate pad ' ' ++ "|" ++ l) . lines $ show' newPad c) 34 | ++ replicate pad ' ' ++ show z ++ " \n" 35 | ++ (unlines . map (\l -> replicate pad ' ' ++ " " ++ l) . lines $ show' newPad d) 36 | where newPad = maximum $ map (length . show) [x,y,z] 37 | 38 | -- Searches for a value in a 2-3-4 tree 39 | search :: Ord a => a -> Tree a -> Bool 40 | search val (L2 x) = val `elem` [x] 41 | search val (L3 x y) = val `elem` [x,y] 42 | search val (L4 x y z) = val `elem` [x,y,z] 43 | search val (N2 x a b) 44 | | val < x = search val a 45 | | otherwise = val == x || search val b 46 | search val (N3 x y a b c) 47 | | val < y = search val (N2 x a b) -- lol 48 | | otherwise = val == y || search val c 49 | search val (N4 x y z a b c d) 50 | | val < z = search val (N3 x y a b c) 51 | | otherwise = val == z || search val d 52 | 53 | -- Inserts a value into a tree 54 | insert :: Ord a => a -> Tree a -> Tree a 55 | insert val = fst . insert' val 56 | where -- Returns the new tree + whether the root has been split 57 | insert' :: Ord a => a -> Tree a -> (Tree a, Bool) 58 | insert' val (L2 x) 59 | | val < x = (L3 val x, False) 60 | | otherwise = (L3 x val, False) 61 | insert' val (L3 x y) 62 | | val < x = (L4 val x y, False) 63 | | val < y = (L4 x val y, False) 64 | | otherwise = (L4 x y val, False) 65 | insert' val (L4 x y z) = (fst $ insert' val (N2 y (L2 x) (L2 z)), True) -- Split & try again 66 | insert' val (N2 x a b) 67 | | val < x = case insert' val a of (N2 x' a' b', True) -> (N3 x' x a' b' b, False) -- Child was split, adopt middle value 68 | (t, False) -> (N2 x t b, False) 69 | | otherwise = case insert' val b of (N2 x' a' b', True) -> (N3 x x' a a' b', False) -- Child was split, adopt middle value 70 | (t, False) -> (N2 x a t, False) 71 | insert' val (N3 x y a b c) 72 | | val < x = case insert' val a of (N2 x' a' b', True) -> (N4 x' x y a' b' b c, False) -- Child was split, adopt middle value 73 | (t, False) -> (N3 x y t b c, False) 74 | | val < y = case insert' val b of (N2 x' a' b', True) -> (N4 x x' y a a' b' c, False) -- Child was split, adopt middle value 75 | (t, False) -> (N3 x y a t c, False) 76 | | otherwise = case insert' val c of (N2 x' a' b', True) -> (N4 x y x' a b a' b', False) -- Child was split, adopt middle value 77 | (t, False) -> (N3 x y a b t, False) 78 | insert' val (N4 x y z a b c d) = (fst $ insert' val (N2 y (N2 x a b) (N2 z c d)), True) -- Preemptively split & try again 79 | 80 | -- Adds each value from a non-empty list into a tree, 81 | -- in the given order (affecting the shape of the result). 82 | fromList :: Ord a => [a] -> Tree a 83 | fromList (x:xs) = foldl (flip insert) (init x) xs 84 | -------------------------------------------------------------------------------- /d-ary_heap.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include // std::bit_floor 4 | #include // size_t 5 | #include // std::input_iterator 6 | #include // std::min 7 | #include // std::less 8 | #include // std::swap 9 | #include "vassert.h" 10 | 11 | template> 12 | requires (D >= 2) // Tree degree cannot be 1 or 0... 13 | class DHeap { 14 | std::vector data; 15 | Compare comp; 16 | 17 | // Parent/children index manipulation. 18 | static size_t parentIdx(const size_t idx) { return (idx - 1) / D; } 19 | static size_t firstChildIdx(const size_t idx) { return D*idx + 1; } 20 | static size_t lastChildIdx(const size_t idx) { return D*idx + D; } 21 | // Find the index of the smallest child for a given node 22 | size_t minChildIdx(const size_t idx) const { 23 | size_t res = firstChildIdx(idx); 24 | vassert(res < data.size()); // given node should have at least one child 25 | if constexpr (D == 2) { 26 | // Only one other child to check for existence & optimality 27 | if (res + 1 < data.size() && comp(data[res + 1], data[res])) { 28 | return res + 1; 29 | } else { 30 | return res; 31 | } 32 | } else { 33 | // one after the rightmost existing child 34 | const size_t r = std::min(lastChildIdx(idx), data.size() - 1); 35 | for (size_t i = res + 1; i <= r; i++) { 36 | if (comp(data[i], data[res])) { 37 | res = i; 38 | } 39 | } 40 | return res; 41 | } 42 | } 43 | // Bubble up a value at the last index of the array 44 | void bubbleUp() { 45 | using std::swap; 46 | size_t idx = data.size() - 1; 47 | while (idx) { 48 | size_t pIdx = parentIdx(idx); 49 | if (!comp(data[idx], data[pIdx])) { 50 | return; 51 | } 52 | swap(data[idx], data[pIdx]); 53 | idx = pIdx; 54 | } 55 | } 56 | // Bubble down a value at a given index 57 | void bubbleDown(size_t idx = 0) { 58 | using std::swap; 59 | while (firstChildIdx(idx) < data.size()) { // is leaf <=> no children 60 | size_t minIdx = minChildIdx(idx); 61 | if (comp(data[minIdx], data[idx])) { 62 | swap(data[minIdx], data[idx]); 63 | idx = minIdx; 64 | } else { 65 | return; 66 | } 67 | } 68 | } 69 | 70 | // Finding the index of the first leaf is basically the same 71 | // as finding the largest k such that the sum d^0+d^1+...+d^k < n. 72 | static size_t findFirstLeaf(const size_t n) { 73 | if constexpr (D == 2) { 74 | return (std::bit_floor(n) - 1); 75 | } else { 76 | size_t sum = 1, prevSum = 0, nextV = D; 77 | while (sum < n) { 78 | prevSum = sum; 79 | sum += nextV; 80 | nextV *= D; 81 | } 82 | return prevSum; 83 | } 84 | } 85 | 86 | // Constructs a valid heap from a possibly invalid (f.e. currently constructed) 87 | // one by calling bubbleDown for all non-leaf indices. This takes O(n) total time. 88 | void bubbleDownNonChildren() { 89 | const size_t n = size(); 90 | if (n < 2) { 91 | return; 92 | } 93 | const size_t firstLeaf = findFirstLeaf(n); 94 | for (size_t i = firstLeaf - 1; i < firstLeaf; i--) { // we need i>=0, but stupid unsigned ints... 95 | bubbleDown(i); 96 | } 97 | } 98 | public: 99 | template 100 | DHeap(InputIt from, InputIt to, const Compare& comp = Compare{}) 101 | : data{ from, to }, comp{ comp } 102 | { 103 | bubbleDownNonChildren(); 104 | } 105 | 106 | // Standard interface 107 | const T& top() const { return data.front(); } 108 | bool empty() const { return data.empty(); } 109 | size_t size() const { return data.size(); } 110 | 111 | // Insert a single value into the heap 112 | void push(const T& val) { emplace(val); } 113 | void push(T&& val) { emplace(std::move(val)); } 114 | template 115 | void emplace(Args&&... args) { 116 | data.emplace_back(std::forward(args)...); 117 | bubbleUp(); 118 | } 119 | 120 | // Remove the smallest value from the heap 121 | // Note: doesn't return it, similar to std::priority_queue - use top() beforehand. 122 | void pop() { 123 | using std::swap; 124 | swap(data.front(), data.back()); 125 | data.pop_back(); 126 | bubbleDown(); 127 | } 128 | 129 | // Attempts replacing the smallest element with one that's larger 130 | // than it & returns true on success. Note: no reallocations. 131 | bool tryReplaceTop(const T& newTop) { 132 | if (!comp(newTop, data.front())) { 133 | data.front() = newTop; 134 | bubbleDown(); 135 | return true; 136 | } else { 137 | return false; 138 | } 139 | } 140 | }; 141 | 142 | // Some aliases for ease-of-use 143 | template > 144 | using BinaryHeap = DHeap; 145 | 146 | template > 147 | using QuadHeap = DHeap; 148 | 149 | template > 150 | using OctHeap = DHeap; -------------------------------------------------------------------------------- /RangeMinimumQuery/SparseTableRMQ.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include // std::ranges::min_element 4 | #include // std::less 5 | #include 6 | #include "TaggedArray.h" 7 | #include "vassert.h" 8 | #include "IntTags.h" 9 | 10 | // Returns floor(log2(n)), or the index of the highest set bit. 11 | // Undefined for negative n 12 | template 13 | constexpr T logn(const T n) { 14 | vassert(n >= 0); 15 | // Bit operations are defined on unsigned ints only, good thing this is a no-op 16 | using U = std::make_unsigned_t; 17 | return (n == 0 ? 0 : std::bit_width(U(n)) - 1); 18 | } 19 | static_assert(logn(7) == 2 && logn(8) == 3 && logn(9) == 3); 20 | 21 | // Data structure, supporting fast range minimum queries (RMQ-s) on a static set of values. 22 | // Built in O(nlgn) time, uses O(nlgn) additional space & answers queries in O(1) time. 23 | // Can (almost precisely) answer range minimum queries without needing to keep a copy of the values. 24 | template > 25 | class SparseTableRMQ { 26 | // The i-th level contains the indices of the minima in all subranges in values of length 2^(i+1). 27 | // These indices are stored in the subrange [offsets[i],offsets[i+1]) in the array. 28 | // This makes the total # of levels floor(lgn). Total memory is n(lgn-2)+lgn, more precisely. 29 | DynamicArray indices; 30 | DynamicArray> offsets; 31 | Comp comp; 32 | 33 | Idx minByVal(const ArrayView& values, const Idx i, const Idx j) const { 34 | return (comp(values[i], values[j]) ? i : j); 35 | } 36 | ArrayView getLevel(const Int lvl) { 37 | return { &indices[offsets[lvl]], (offsets[lvl + 1] - offsets[lvl]) }; 38 | } 39 | ArrayView getLevel(const Int lvl) const { 40 | return { &indices[offsets[lvl]], (offsets[lvl + 1] - offsets[lvl]) }; 41 | } 42 | public: 43 | SparseTableRMQ() = default; 44 | template 45 | requires std::is_same_v> 46 | SparseTableRMQ(const Range& values_, const Comp& comp = Comp{}) : comp{comp} { 47 | const ArrayView values{ values_ }; 48 | const Idx n = values.size(); 49 | // The exact number of levels in the table 50 | const Int numLevels = Int(logn(int(n))); // Note: this cast to int is dangerous (!) 51 | // First calculate the offsets for each level 52 | offsets = DynamicArray>(numLevels + 1); 53 | offsets[Int(0)] = Idx(0); 54 | for (Int lvl{ 0 }; lvl < numLevels; ++lvl) { 55 | const Idx r = (Idx(1) << int(lvl)); // the previous power of 2 56 | const Idx lvlSize = n - 2*r + 1; // exact level size 57 | offsets[lvl + 1] = offsets[lvl] + lvlSize; 58 | } 59 | // We now know the total indices count 60 | indices = DynamicArray(offsets[numLevels]); 61 | // The first level is trivial... 62 | ArrayView lvl0 = getLevel(Int(0)); 63 | for (Idx i{ 0 }; i < n - 1; ++i) { 64 | lvl0[i] = minByVal(values, i, i+1); 65 | } 66 | // ... then we build up 67 | for (Int lvl{ 1 }; lvl < numLevels; ++lvl) { 68 | const Idx r = (Idx(1) << int(lvl)); // the previous power of 2 69 | const Idx lvlSize = n - 2*r + 1; // exact level size 70 | ArrayView< Idx, Idx> curr = getLevel(lvl); 71 | ArrayView prev = getLevel(lvl - 1); 72 | for (Idx i{ 0 }; i < lvlSize; ++i) { 73 | curr[i] = minByVal(values, prev[i], prev[i + r]); 74 | } 75 | } 76 | } 77 | 78 | // RMQ can be "answered" with uncertainty if the original set of values isn't 79 | // available, by returning two indices such that one of them is guaranteed 80 | // to be the answer (which the callee can then compare in O(1) to check). 81 | // O(1) time complexity 82 | struct Pair { Idx a, b; }; 83 | Pair argmin(const Idx i, const Idx j) const { 84 | vassert(i < j); 85 | if (j == i + 1) { 86 | return { i, i }; 87 | } 88 | const int lvl = logn(int(j - i)); // Note: this cast to int is dangerous (!) 89 | const int n = (1 << lvl); // Biggest power of 2, <= (j-i) 90 | vassert(n <= (j - i) && 2*n > (j - i)); 91 | ArrayView level = getLevel(Int(lvl) - 1); 92 | // Note - sometimes n == j-i, i.e. the range size is a power of 2 and these are the same indices 93 | return { level[i], level[j - n] }; 94 | } 95 | 96 | // Answers an RMQ query precisely, given the original set of values. 97 | // O(1) time complexity. 98 | Idx argmin(const ArrayView& values, const Idx i, const Idx j) const { 99 | const auto [a, b] = argmin(i, j); 100 | return minByVal(values, a, b); 101 | } 102 | 103 | // Answers an RMQ query precisely, given the original set of values. 104 | // O(1) time complexity. 105 | const T& min(const ArrayView& values, const Idx i, const Idx j) const { 106 | return values[argmin(values, i, j)]; 107 | } 108 | 109 | // Same as argmin, but naive and O(n) - to be used for testing only 110 | const Idx argmin_slow(const ArrayView& values, const Idx i, const Idx j) const { 111 | return *std::ranges::min_element(std::views::iota(i, j), comp, [&](const Idx idx) { return values[idx]; }); 112 | } 113 | // Same as min, but naive and O(n) - to be used for testing only 114 | const T& min_slow(const ArrayView& values, const Idx i, const Idx j) const { 115 | return values[argmin_slow(values, i, j)]; 116 | } 117 | }; 118 | -------------------------------------------------------------------------------- /WaveletTree.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TypeFamilies #-} 2 | {-# LANGUAGE FlexibleContexts #-} 3 | {-# LANGUAGE ConstrainedClassMethods #-} 4 | 5 | module WaveletTree (WaveletTree,Sequence,wavelet,(!),rank,select,test) where 6 | import Data.List (partition,sort,nub) 7 | import Data.Hashable (Hashable) -- this & HashMap require package unordered-containers 8 | import Data.Function (on) 9 | import Data.BitVector (BitVector,BV,(#),nil,fromBool,showBin,(!.),most,fromBits) -- requires package bv 10 | import qualified Data.BitVector as BV (foldl) 11 | import Data.HashMap.Lazy (HashMap,empty,singleton,union) 12 | import qualified Data.HashMap.Lazy as HM ((!),null) 13 | 14 | {------ Sequence class ------} 15 | {- A sequence should support the following three operations: 16 | Access: the element at a given position in the sequence 17 | Rank: The number of occurences of a given element among the first i in the sequence 18 | Select: The position of the i-th occurence of a given element in the sequence -} 19 | class Sequence c where 20 | type ElemType c :: * 21 | (!) :: c -> Int -> ElemType c 22 | rank, select :: Eq (ElemType c) => ElemType c -> Int -> c -> Int 23 | 24 | -- Example implementation of the sequence operations for lists 25 | instance Sequence [a] where 26 | type ElemType [a] = a 27 | (!) = (!!) 28 | rank c i lst = length . filter (==c) $ take i lst 29 | select c i (x:xs) 30 | | c == x = if i == 1 then 0 else 1 + select c (i-1) xs 31 | | otherwise = 1 + select c i xs 32 | 33 | instance Sequence BV where -- BitVector is actually a synonym 34 | type ElemType BV = Bool 35 | (!) = (!.) 36 | rank False i bv = i - (rank True i bv) 37 | rank True 0 _ = 0 38 | rank True i bitmap = BV.foldl (\c b -> if b then c+1 else c) 0 $ most i bitmap 39 | select b i bv = select' i 0 40 | where select' i curr 41 | | bv !. curr == b = if i == 1 then curr else select' (i-1) (curr+1) 42 | | otherwise = select' i (curr+1) 43 | 44 | {------ Huffman encoding section ------} 45 | data HTree a = HLeaf a | (HTree a) :^: (HTree a) 46 | data HPair a = HPair { tree :: HTree a, weight :: Int } 47 | instance Eq (HPair a) where (==) = (==) `on` weight 48 | instance Ord (HPair a) where (<=) = (<=) `on` weight 49 | 50 | huffman :: Eq a => [a] -> HTree a 51 | huffman str = tree . head . mergeTrees $ histogram 52 | where histogram = sort [ HPair (HLeaf c) (length $ filter (==c) str) | c<-nub str ] 53 | mergeTrees [p] = [p] 54 | mergeTrees ((HPair t1 w1):(HPair t2 w2):ps) = mergeTrees $ insert (HPair (t1:^:t2) (w1+w2)) ps 55 | where insert p ps = let (a,b) = span ( HTree a -> HashMap a BitVector 58 | codes (HLeaf c) = empty 59 | codes t = codes' t nil 60 | where codes' (HLeaf c) code = singleton c code 61 | codes' (t1:^:t2) code = codes' t1 (code # fromBool False) `union` 62 | codes' t2 (code # fromBool True) 63 | 64 | codeOf :: (Eq a, Hashable a) => a -> HashMap a BitVector -> BitVector 65 | codeOf = flip $ (HM.!) -- to-do: maybe remove this & use lookup for safety 66 | 67 | {------ Wavelet Tree ------} 68 | data WTree a = Leaf a | Node BitVector (WTree a) (WTree a) 69 | data WaveletTree a = WaveletTree (WTree a) (HashMap a BitVector) 70 | 71 | instance Show a => Show (WaveletTree a) where 72 | show (WaveletTree t _) = show' 0 t 73 | where show' pad (Leaf x) = replicate pad ' ' ++ show x 74 | show' pad (Node bitmap left right) = replicate pad ' ' 75 | ++ showBin bitmap 76 | ++ "\n" ++ show' (pad+2) left 77 | ++ "\n" ++ show' (pad+2) right 78 | 79 | wavelet :: (Eq a, Hashable a) => [a] -> WaveletTree a 80 | wavelet xs 81 | | HM.null huffCodes = WaveletTree (Leaf $ xs!!0) empty 82 | | otherwise = WaveletTree (wavelet' huffTree 0 xs) huffCodes 83 | where huffTree = huffman xs 84 | huffCodes = codes huffTree 85 | wavelet' (HLeaf x) _ _= Leaf x 86 | wavelet' (hleft :^: hright) d xs = Node bitmap left right 87 | where (ys,zs) = partition (not.(!.d).(`codeOf` huffCodes)) xs 88 | bitmap = fromBits $ map ((!.d).(`codeOf` huffCodes)) xs 89 | left = wavelet' hleft (d+1) ys 90 | right = wavelet' hright (d+1) zs 91 | 92 | instance Hashable a => Sequence (WaveletTree a) where 93 | type ElemType (WaveletTree a) = a 94 | (WaveletTree t _) ! i = t ! i 95 | where (Leaf x) ! _ = x 96 | (Node bitmap left right) ! i 97 | | bitmap !. i = right ! (rank True i bitmap) 98 | | otherwise = left ! (rank False i bitmap) 99 | 100 | rank c i (WaveletTree w codes) = rank' i w 0 101 | where code = codeOf c codes 102 | rank' i (Leaf _) _ = i -- the leaf should now contain the symbol c 103 | rank' i (Node bitmap left right) d 104 | | code !. d = rank' (rank True i bitmap) right (d+1) 105 | | otherwise = rank' (rank False i bitmap) left (d+1) 106 | 107 | select c i (WaveletTree w codes) = select' (i-1) w 0 108 | where code = codeOf c codes 109 | select' i (Leaf _) _ = i -- the leaf should now contain the symbol c 110 | select' i (Node bitmap left right) d 111 | | code !. d = let j = select' i right (d+1) in select True (j+1) bitmap 112 | | otherwise = let j = select' i left (d+1) in select False (j+1) bitmap 113 | 114 | test :: IO () 115 | test = do 116 | let str = "abracadabra" 117 | (n,w) = (length str, wavelet str) 118 | str' = (w!)<$>[0..n-1] 119 | chars = sort . nub $ str 120 | print str'; print $ str==str' -- this check should always hold (!) 121 | mapM_ (print.(\c -> map (\i->rank c i w) [1..n])) chars 122 | mapM_ (print.(\c -> map (\i->select c i w) [1..length$filter(==c)str])) chars 123 | -------------------------------------------------------------------------------- /Trie.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "TrieTraits.h" 3 | #include "TrieTraversal.h" 4 | 5 | #include // std::unsigned_integral 6 | #include // uintptr_t, size_t 7 | #include 8 | #include // std::bit_width, used for formatted output only 9 | 10 | // Forward declaration 11 | template 12 | requires (TrieTraits::numPointers <= 128) 13 | class CompressedTrie; 14 | 15 | // A trie node with N child nodes. Note that it doesn't depend on trie value type. 16 | template 17 | requires (N >= 2 && N <= 256) 18 | struct Node { 19 | // An array of pointers, in whose less-significant bit is encoded whether the pointed-to node contains a value 20 | std::array ptrs; 21 | Node() : ptrs{ 0 } {} 22 | // Helper pointer encoding/decoding functions 23 | static Node* decode(const uintptr_t ptr) { return (Node*)(ptr & ~uintptr_t(1)); } 24 | static uintptr_t encode( Node* ptr, size_t hasValue) { return (uintptr_t(ptr) | hasValue); } 25 | static uintptr_t encode(const Node* ptr, size_t hasValue) { return (uintptr_t(ptr) | hasValue); } 26 | // Recursively free a subtree's memory 27 | void freeMem() noexcept { 28 | for (uintptr_t ptr : ptrs) { 29 | if (ptr) { 30 | Node::decode(ptr)->freeMem(); 31 | } 32 | } 33 | delete this; 34 | } 35 | }; 36 | 37 | // A generic trie class, supporting all types with complete TrieTraits specializations 38 | template 39 | class Trie : public TrieTraversal { 40 | using Traits = TrieTraits; 41 | using Node = Node; 42 | 43 | Node root; // A trie always has a root node, so no need for a dynamic allocation 44 | size_t count = 0; // # of values in tree. 45 | int maxBits_ = 0; // Max # of bits of all values in tree. Used for unsigned integers only. 46 | bool rootHasValue = 0; // Whether root node contains a value 47 | public: 48 | Trie() = default; 49 | Trie(const Trie&) = delete; 50 | Trie& operator=(const Trie&) = delete; 51 | Trie(Trie&& other) noexcept 52 | : root{ std::exchange(other.root, {}) } 53 | , count{ std::exchange(other.count, 0) } 54 | , maxBits_{ std::exchange(other.maxBits_, 0) } 55 | , rootHasValue{ std::exchange(other.rootHasValue, false) } {} 56 | Trie& operator=(Trie&& other) noexcept { 57 | root = std::exchange(other.root, {}); // No need for (this != &other) checks 58 | count = std::exchange(other.count, 0); 59 | maxBits_ = std::exchange(other.maxBits_, 0); 60 | rootHasValue = std::exchange(other.rootHasValue, false); 61 | return *this; 62 | } 63 | ~Trie() { clear(); } 64 | // Convenience ctor 65 | Trie(std::initializer_list il) { 66 | for (const T& value : il) { insert(value); } 67 | } 68 | // Attempts to insert a value, returns false iff already inserted. 69 | bool insert(T value) { 70 | if constexpr (std::unsigned_integral) { 71 | maxBits_ = std::max(maxBits_, int(std::bit_width(value))); 72 | } 73 | // Traverse the existing path as far as possible 74 | uintptr_t toRoot = Node::encode(&root, rootHasValue); // Dummy variable to bootstrap traversal 75 | uintptr_t* curr = &toRoot; 76 | while (*curr && !Traits::consumed(value)) { 77 | Node* ptr = Node::decode(*curr); 78 | curr = &ptr->ptrs[Traits::advance(value)]; 79 | } 80 | // If the entire path is traversed, a prefix has been inserted - mark the last node & nothing more to do. 81 | const bool inserted = !(*curr) || !((*curr) & 1); 82 | count += inserted; 83 | if (*curr) { 84 | (curr == &toRoot ? rootHasValue = true : *curr |= 1); 85 | } else while (!*curr) { // Otherwise, build the remainder of the path 86 | const bool consumed = Traits::consumed(value); 87 | Node* newNode = new Node; 88 | *curr = Node::encode(newNode, consumed); 89 | if (!consumed) { 90 | curr = &newNode->ptrs[Traits::advance(value)]; 91 | } 92 | } 93 | return inserted; 94 | } 95 | // Attempts to remove a value, return true iff found. 96 | // Note: cannot update root.numBits in an efficient way, so it just doesn't touch it. 97 | bool remove(T value) noexcept(Traits::noexceptConsume && Traits::noexceptStack) { 98 | using Stack = Traits::template Stack; 99 | Stack path = {}; 100 | uintptr_t toRoot = Node::encode(&root, rootHasValue); // Dummy variable to bootstrap traversal 101 | uintptr_t* curr = &toRoot; 102 | // Traverse down the path, saving it in a stack 103 | while (*curr && !Traits::consumed(value)) { 104 | path.push(curr); 105 | Node* ptr = Node::decode(*curr); 106 | curr = &ptr->ptrs[Traits::advance(value)]; 107 | } 108 | if (!*curr || !((*curr) & 1)) { // Value not found, nothing to do 109 | return false; 110 | } 111 | --count; 112 | (curr == &toRoot ? rootHasValue = false : *curr &= ~uintptr_t(1)); 113 | path.push(curr); // One last push - this is the first node to be deleted 114 | // Small, helper function 115 | auto hasChildren = [](const uintptr_t root) { 116 | for (auto ptr : Node::decode(root)->ptrs) if (ptr) { return true; } 117 | return false; 118 | }; 119 | // If the node found was a leaf, remove its entire branch - i.e. either up to a node with >1 children, with a value, or the root. 120 | while (path.size() > 1 && !((*path.top()) & 1) && !hasChildren(*path.top())) { 121 | delete Node::decode(*path.top()); 122 | *path.top() = 0; // Tell the parent node it now has one child less 123 | path.pop(); 124 | } 125 | return true; 126 | } 127 | // Maximum bit_width of all values ever inserted in the trie. Obviously applicable for unsigned integers only 128 | uint64_t maxBits() const noexcept requires std::unsigned_integral { return maxBits_; } 129 | size_t size() const noexcept { return count; } 130 | bool empty() const noexcept { return (size() == 0); } 131 | 132 | // Clears all contents & resets to an empty trie 133 | void clear() noexcept { 134 | // Reminder not to delete &root - it's not dynamically allocated 135 | count = 0; 136 | maxBits_ = 0; 137 | rootHasValue = false; 138 | for (uintptr_t& ptr : root.ptrs) { 139 | if (ptr) { 140 | Node::decode(ptr)->freeMem(); 141 | ptr = 0; 142 | } 143 | } 144 | } 145 | private: 146 | // Typedefs & member functions, required by the TrieTraversal methods 147 | using pointer = uintptr_t; 148 | pointer getRootPtr() const { return Node::encode(&root, rootHasValue); } 149 | static bool hasValue(const pointer p) { return bool(p & 1); } 150 | static bool hasChild(const pointer p, size_t idx) { return bool(Node::decode(p)->ptrs[idx]); } 151 | static pointer getChild(const pointer p, size_t idx) { return Node::decode(p)->ptrs[idx]; } 152 | // (!) 153 | friend TrieTraversal; 154 | friend CompressedTrie; 155 | }; 156 | -------------------------------------------------------------------------------- /binary_heap_static.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include // std::numeric_limits::max 4 | #include // size_t 5 | #include // std::min, std::swap 6 | #include // std::less 7 | #include // std::is_nothrow_swappable 8 | #include // std::pair 9 | 10 | template > 11 | class BinaryHeapStatic { 12 | // Unfortunately false for std::less for trivial types... 13 | static constexpr bool nothrow_comp 14 | = noexcept(std::declval()(std::declval(), std::declval())); 15 | static constexpr bool nothrow_comp_and_swap 16 | = std::is_nothrow_swappable_v && nothrow_comp; 17 | using vertex = size_t; 18 | struct pair { T value; vertex v; }; 19 | 20 | std::vector data; 21 | std::vector indices; 22 | Compare comp; 23 | size_t count; 24 | 25 | static size_t parentIdx(size_t idx) noexcept { return (idx - 1) / 2; } 26 | static size_t leftChildIdx(size_t idx) noexcept { return 2*idx + 1; } 27 | static size_t rightChildIdx(size_t idx) noexcept { return 2*idx + 2; } 28 | size_t minChildIdx(size_t idx) const noexcept(nothrow_comp); 29 | 30 | void bubbleUp(size_t idx) noexcept(nothrow_comp_and_swap); 31 | void bubbleDown(size_t idx) noexcept(nothrow_comp_and_swap); 32 | 33 | // Swap two values in the heap, keeping the two-way referencing 34 | void swapNodes(const size_t u, const size_t v); 35 | public: 36 | // Reserves space for a given count of nodes (vertices) and prepares 37 | // the heap for Dijkstra's algorithm by inserting a value of infinity 38 | // for every node except the starting one, which has value zero. 39 | // If numVertices == 0, no memory is allocated, but operations 40 | // before the next call to reset() may lead to undefined behaviour. 41 | // Important postcondition: for every vertex v its value is data[indices[v]]. 42 | // Requirement: comp(zero, infinity) == true && comp(zero, _) == true && comp(infinity, _) == false 43 | BinaryHeapStatic( 44 | const size_t numVertices, 45 | vertex start, 46 | const T& zero = T{ 0 }, 47 | const T& infinity = std::numeric_limits::max(), 48 | const Compare& comp = Compare{}) 49 | { 50 | reset(numVertices, start, zero, infinity, comp); 51 | } 52 | 53 | // Standard operation; undefined behaviour for empty heaps 54 | const T& peek() const noexcept { return data[0].value; } 55 | 56 | // Also standard 57 | std::pair extractMin(); 58 | 59 | // The most complex operation for this type of heap, using the two-way referencing 60 | bool decreaseKey(const vertex, const T&) noexcept(nothrow_comp); 61 | 62 | // We can also keep track of the exact vertices currently in the heap 63 | bool contains(vertex) const noexcept; 64 | 65 | // More standard methods 66 | size_t size() const noexcept { return count; } 67 | bool empty() const noexcept { return (size() == 0); } 68 | 69 | // Free all memory (!) and reinitialize for an updated number of vertices. 70 | // See the comment for the ctor. 71 | void reset( 72 | const size_t numVertices, 73 | vertex start, 74 | const T& zero = T{ 0 }, 75 | const T& infinity = std::numeric_limits::max(), 76 | const Compare& comp = Compare{}); 77 | }; 78 | 79 | template 80 | size_t BinaryHeapStatic::minChildIdx(size_t idx) const noexcept(nothrow_comp) { 81 | // Invariant: idx has at least one child 82 | const size_t left = leftChildIdx(idx); 83 | const size_t right = rightChildIdx(idx); 84 | if (right >= size() || comp(data[left].value, data[right].value)) { 85 | return left; 86 | } else { 87 | return right; 88 | } 89 | } 90 | 91 | template 92 | void BinaryHeapStatic::bubbleUp(size_t idx) noexcept(nothrow_comp_and_swap) { 93 | while (idx) { 94 | size_t pIdx = parentIdx(idx); 95 | if (!comp(data[idx].value, data[pIdx].value)) { 96 | return; 97 | } 98 | swapNodes(idx, pIdx); 99 | idx = pIdx; 100 | } 101 | } 102 | 103 | template 104 | void BinaryHeapStatic::bubbleDown(size_t idx) noexcept(nothrow_comp_and_swap) { 105 | while (leftChildIdx(idx) < size()) { // is leaf <=> no children 106 | size_t minIdx = minChildIdx(idx); 107 | if (comp(data[minIdx].value, data[idx].value)) { 108 | swapNodes(idx, minIdx); 109 | idx = minIdx; 110 | } else { 111 | return; 112 | } 113 | } 114 | } 115 | 116 | template 117 | void BinaryHeapStatic::swapNodes(const size_t idx1, const size_t idx2) { 118 | using std::swap; 119 | swap(data[idx1], data[idx2]); 120 | swap(indices[data[idx1].v], indices[data[idx2].v]); 121 | } 122 | 123 | template 124 | auto BinaryHeapStatic::extractMin() -> std::pair { 125 | // Push the root node to the end of the array 126 | swapNodes(0, --count); 127 | // Shrinking the vectors may be slow (implementation-specific) => just don't do it 128 | bubbleDown(0); 129 | return { data[count].v, data[count].value }; 130 | } 131 | 132 | template 133 | bool BinaryHeapStatic::decreaseKey(const vertex v, const T& newKey) noexcept(nothrow_comp) { 134 | if (!comp(newKey, data[indices[v]].value)) { 135 | return false; 136 | } 137 | data[indices[v]].value = newKey; 138 | bubbleUp(indices[v]); 139 | return true; 140 | } 141 | 142 | template 143 | bool BinaryHeapStatic::contains(vertex u) const noexcept { 144 | return (indices[u] < count); 145 | } 146 | 147 | template 148 | void BinaryHeapStatic::reset(const size_t numVertices, 149 | vertex start, const T& zero, const T& infinity, const Compare& comp) 150 | { 151 | this->comp = comp; 152 | count = numVertices; 153 | data.clear(); 154 | indices.clear(); 155 | if (numVertices == 0) { 156 | return; 157 | } 158 | // Initialize the vector as if all vertices have value = infinity 159 | data.assign(numVertices, pair{ infinity,0 }); 160 | indices.assign(numVertices, 0); 161 | for (size_t i = 0; i < numVertices; ++i) { 162 | indices[i] = i; 163 | data[i].v = i; 164 | } 165 | // Manually "place" the starting vertex at the top of the heap 166 | data[start].value = zero; 167 | if (numVertices < 2) { 168 | return; 169 | } 170 | using std::swap; 171 | swap(data[start], data[0]); 172 | swap(indices[start], indices[0]); 173 | } 174 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AdvancedDataStructures 2 | A collection of exotic & advanced data structures in C++ and Haskell. 3 | 4 | * **Binary heap (static)**: A modification to the classic data structure, supporting `decreaseKey` and optimized specifically for Prim's and Dijkstra's algorithms: the total number of values is predetermined and no values are added/removed after initialization. Uses two "pointers" per node. Time complexities are O(lgn) for `extractMin` and `decreaseKey` and O(n) for construction/initialization (a trivial operation). 5 | * **Binomial heap (meta)**: A variant of the classic heap data structure, supporting insertions in O(1) amortized time and O(lgn) for all other operations. The heaps & supporting structures carry their size in their type, which makes it impossible (!) to construct an invalid binomial heap or binomial tree - it is verified _at compile time_. It also serves as a correctness proof for the heap operations, as well as their helper functions. 6 | * **d-ary heap**: a generalisation of the binary heap, where every node has *d* children instead of 2. While having basically the same structure and idea as the binary heap, it has better cache behaviour and performs `decreaseKey` quicker, making it a slightly better choice for most purposes. The **4-heap**, specifically, stands out as a good performer. The aforementioned `decreaseKey` is still messy to implement, though, which is why it's missing (for now). All heap modification operations now run in O(log_dn) time in the worst case. 7 | * **FixedEytzingerMap**: a static cache-friendly STL-compatible associative container that packs all keys in an array, similarly to a `flat_map`. Instead of a sorted order for lookup via binary search, it uses the Eytzinger layout to drastically reduce cache misses for faster lookup. Insertion/deletion is slow & cumbersome, which is why the container is static - only the values can be modified, the keys not. Adapted from Michael Kazakov's [implementation](https://github.com/mikekazakov/eytzinger). 8 | * **PairingHeap**: a rather simple, fast and efficient multi-way heap, regarded as a simplified Fibonacci heap. Theoretically slower, but in practice much simpler and therefore more efficient than Fibonacci & Binomial heaps, this is most often the best choice for classical algorithms such as Prim's and Dijkstra's. Has a user-friendly interface with `decreaseKey` functionality. Time complexities for `insert`, `extractMin` and `decreaseKey` are O(1), O(lgn) and O(lgn) *amortized*, respectively. The [`pairing_heap_static`](https://github.com/Andreshk/AdvancedDataStructures/blob/master/pairing_heap_static.h) is a variant of this structure, optimized precisely for use in the aforementioned algorithms (similarly to [`binary_heap_static`](https://github.com/Andreshk/AdvancedDataStructures/blob/master/binary_heap_static.h)): the total number of values is predetermined, memory is allocated once for all nodes and also deallocated once (instead of per-node), all nodes are contained inside this continuous space and no values are added/removed after initialization. [See also.](https://twitter.com/arntzenius/status/1050069364454682627) 9 | * **PersistentVector**: a persistent tree with a large branching factor (usually 32) that supports random access, `push_back` & `pop_back` in O(log₃₂n) time (practically O(1)) via path-copying. All operations & internal structure sharing is thread-safe and memory-safe. Includes optimizations like _transient_ (modifying) operations on rvalues, and keeping the last leaf node (the one most operations affect) outside of the tree structure. This means that 97% of the time `push_back` and `pop_back` will be done in real O(1) time with _at most_ 1 extra allocation. 10 | * **RangeMinimumQuery**: an asymptotically optimal _and_ practical solution to the problem of preprocessing a static array in order to efficiently query for the minimum of any subarray (or its position). Preprocessing is done in O(n) time and O(n) extra memory via a reduction RMQ->LCA->RMQ01, and the queries are then answered in O(1) - with all constants being very low. Uses the tagged integer utilites from [this](https://github.com/Andreshk/SnippySnippets) repo. Tests included. 11 | * **SkewHeap**: a self-adjusting, leftist heap where every operation is done via skew heap merging. Although there are no structural constraints, it has been shown that all heap modification operations work in O(lgn) *amortized* time. The [C++](https://github.com/Andreshk/AdvancedDataStructures/blob/master/SkewHeap.h) version is optimized to use a preallocated buffer for node storage, has reduced memory usage & only supports inserting values and replacing the minimum with a bigger value. 12 | * **Splay tree**: a self-adjusting binary search tree, in which every insertion or lookup of a value rotates it up (_splays_ it) to the tree's root. This way recently accessed elements are quick to access again, making the structure suitable for some kind of cache. Due to careful rotations during splaying, the tree is kept relatively balanced, giving O(lgn) *amortized* time for insertion, lookup and removal. 13 | * **Static hashset**: hashing a static set of values in O(n) storage with *no collisions*. The values hashed are required to be in \[0;p) for a prime number p (can be chosen according to the use case). The total space used is for approx. 5n elements. The only meaningful operation supported is querying whether or not a value is in the set and is performed in, of course, O(1) time. 14 | * **Suffix array**: an array of all the suffixes of a given string, in sorted order. Five algorithms are presented: the naive sorting (O(n²lgn)), Manber-Myers's O(nlgn) prefix doubling algorithm, the linear recursive DC3/Skew algorithm by Kärkkäinen & Sanders, and two implementations of the Induced-Sorting linear algorithm by Ge Nong, Sen Zhang & Wai Hong Chan. 15 | * **Treap**: **tr**ee + h**eap** = treap. A binary search tree where every node has a randomly generated "priority". The values in the nodes form a binary search tree, while the priorities in the nodes form a binary heap at the same time. Relies on rotations for moving values (with their respective priorities) up and down the structure, while preserving the BST invariant. Therefore the tree is roughly well balanced and all modifying operations have O(lgn) *expected* time complexity. Suitable for general-purpose use, such as sets, maps, multisets etc. 16 | * **Trie**: _to-do_ 17 | * **X-fast trie**: a static data structure, supporting searching for a value's predecessor/successor in a given, fixed set of values in just O(**lglgm**) time (where `m` is the maximum value in the set). Constructed in O(nlgm) time and takes O(n) space. The name comes from the trie, constructed during initialization. Due to being static (i.e. no addition/removal of values after construction) the "pointers" from each node in the trie are replaced by indices, used during the search operations, and as a result the finished structure does not actually contain a trie. In order to hit O(lglgm) as a hard, non-amortized bound, it relies on being able to hash perfectly a given, fixed set of integers (currently not supported). 18 | 19 | To-do: 20 | * fix existing structures 21 | * add more structures, such as: ~~Treap and SkewHeap in C++~~, ~~Persistent Vector~~, Tiered Vector, (Indexable) SkipList, dense hashset, ~~perfect hashset~~, perfect dynamic hashset, ~~Binomial Heap~~, ~~Splay trees~~, B-trees, Tries (~~string~~, ~~Radix~~ & Patricia _and_ made persistent), Rope, Suffix Tree, ~~Suffix Array~~, ~~k-d tree,~~ ~~Fibonacci heap~~ and many more... 22 | -------------------------------------------------------------------------------- /FibonacciHeap.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include // std::exchange 4 | #include 5 | #include "DList.h" 6 | 7 | // All Fibonacci heap nodes reside in some doubly-linked circular list, 8 | // so they don't need explicit prev/next pointers. 9 | template 10 | struct FNode { 11 | T val; 12 | bool marked = false; 13 | DList::iterator parent; 14 | DList subtrees; 15 | FNode(const T& val) : val{ val } {} 16 | }; 17 | 18 | template > 19 | class FibonacciHeap { 20 | DList> roots; 21 | int numValues = 0; 22 | [[msvc::no_unique_address]] Comp comp = {}; 23 | public: 24 | FibonacciHeap() = default; 25 | FibonacciHeap(FibonacciHeap&& other) noexcept 26 | : roots{ std::move(other.roots) }, numValues{ std::exchange(other.numValues, 0) }, comp{ std::move(other.comp) } {} 27 | FibonacciHeap& operator=(FibonacciHeap&& other) noexcept { 28 | roots = std::move(other.roots); 29 | numValues = std::exchange(other.numValues, 0); 30 | comp = std::move(other.comp); 31 | return *this; 32 | } 33 | ~FibonacciHeap() { numValues = 0; } 34 | 35 | // A heap "iterator" is just a const-wrapper around a DList iterator. 36 | // There is no iteration possible - use only for dereferencing & decreaseKey() 37 | class iterator { 38 | DList>::iterator it; 39 | friend class FibonacciHeap; // Only the heap can construct iterators to itself 40 | iterator(DList>::iterator it) : it{ it } {} 41 | public: 42 | iterator() = default; 43 | const T& operator*() const { return it->val; } 44 | const T* operator->() const { return &it->val; } 45 | operator bool() const { return bool(it); } 46 | bool operator==(const iterator&) const = default; 47 | }; 48 | 49 | // Inserts a value into the heap. O(1) amortized time 50 | iterator insert(const T& val) { 51 | auto it = roots.insert(FNode{ val }); 52 | const bool newMin = !roots.empty() && comp(val, roots.front()->val); 53 | if (newMin) { // The minimum should be reachable in O(1) 54 | roots.rotate(it); 55 | } 56 | ++numValues; 57 | return { it }; 58 | } 59 | // Removes & returns the minimum value in the heap. O(lgn) amortized time, O(n) worst-case (!) 60 | T extractMin() noexcept { 61 | using handle = DList>::handle; 62 | // 44 is the max degree such that F(d+2) < INT_MAX. 63 | // For UINT_MAX it's 45, and 91 for UINT64_MAX 64 | std::array trees = {}; 65 | auto root = roots.front(); 66 | // Orphan the subtrees before making them roots 67 | for (auto& t : root->subtrees) { t.parent = {}; } 68 | roots.append(std::move(root->subtrees)); 69 | T res = std::move(root->val); 70 | roots.remove(root); 71 | // This is called "consolidation" in the papers & textbooks 72 | while (!roots.empty()) { 73 | handle curr = roots.extract(roots.front()); 74 | int deg = curr->subtrees.size(); 75 | // If there's a tree with the same degree, merge with the current. 76 | // This is the same as merging trees into a binomial heap :) 77 | // Each merge reduces the potential by one, making up for the actual time spent. 78 | while (trees[deg]) { 79 | if (comp(curr->val, trees[deg]->val)) { 80 | trees[deg]->parent = curr.toIter(); 81 | trees[deg]->marked = false; 82 | curr->subtrees.insert(std::move(trees[deg])); 83 | } else { 84 | curr->parent = trees[deg].toIter(); 85 | curr->marked = false; 86 | trees[deg]->subtrees.insert(std::move(curr)); 87 | curr = std::move(trees[deg]); 88 | } 89 | ++deg; 90 | assert(curr->subtrees.size() == deg); 91 | } 92 | trees[deg] = std::move(curr); 93 | } 94 | // Add all different-degree trees to the roots list, 95 | // simultaneously finding the one with the minimum value. 96 | assert(!root); // Will be reused for the new minimum 97 | for (handle& hnd : trees) { 98 | if (hnd) { 99 | if (!root || comp(hnd->val, root->val)) { 100 | root = hnd.toIter(); 101 | } 102 | roots.insert(std::move(hnd)); 103 | } 104 | } 105 | // Rotate the new roots list to the minimum value 106 | roots.rotate(root); 107 | --numValues; 108 | return res; 109 | } 110 | // Decreases the value in the heap, pointed by the given iterator. O(1) amortized time 111 | bool decreaseKey(iterator it, const T& newVal) { 112 | if (!comp(newVal, *it)) { // This is not a decrease 113 | return false; 114 | } 115 | auto curr = it.it; 116 | curr->val = newVal; 117 | bool toRemove = curr->parent && comp(newVal, curr->parent->val); 118 | bool newMin = comp(newVal, roots.front()->val); 119 | // The first iteration may cut a node and the last (may also be the first) may mark one, 120 | // but each iteration between them will unmark & cut a node, decreasing the potential by 1. 121 | // This decrease makes up for the actual time spent :) 122 | while (toRemove) { 123 | // Cut the current node & add it to the roots list 124 | curr->marked = false; // See CUT(H,x,y) in CLRS 125 | auto parent = std::exchange(curr->parent, {}); 126 | roots.insert(parent->subtrees.extract(curr)); 127 | // The first cut node (with the new value) is the newly decreased key and may become the new minimum, 128 | // but the remaining are non-roots and not changed, so will not become minimums when cut. 129 | if (newMin) { 130 | roots.rotate(curr); 131 | newMin = false; 132 | } 133 | // Advance to the parent & mark it for removal if it's not a root 134 | curr = parent; 135 | toRemove = curr->parent && std::exchange(curr->marked, true); 136 | } 137 | return true; 138 | } 139 | // Returns the minimum value in the heap 140 | const T& peekMin() const noexcept { 141 | assert(!empty()); 142 | return roots.front()->val; 143 | } 144 | // Checks whether the heap is empty 145 | bool empty() const noexcept { 146 | assert(roots.empty() == (numValues == 0)); 147 | return roots.empty(); 148 | } 149 | // Returns the # of values in the heap 150 | int size() const noexcept { return numValues; } 151 | // Merge another heap into the current. O(1) time (total potential does not increase) 152 | void merge(FibonacciHeap&& other) noexcept { 153 | assert(this != &other); 154 | if (other.empty()) { 155 | return; 156 | } else if (empty()) { 157 | *this = std::move(other); 158 | return; 159 | } else if (comp(peekMin(), other.peekMin())) { 160 | roots.append(std::move(other.roots)); 161 | } else { 162 | other.roots.append(std::move(roots)); 163 | roots = std::move(other.roots); 164 | } 165 | numValues += std::exchange(other.numValues, 0); 166 | } 167 | // Checks the heap property of each tree & whether every node has the correct parent pointer. 168 | // Obviously O(n), use for testing only. 169 | bool validate() const noexcept { 170 | return [&](this auto&& self, const DList>& nodes, const DList>::loop_iterator parent) -> bool { 171 | // This is std::ranges::all 172 | for (auto it = nodes.begin(); it != nodes.end(); ++it) { 173 | if ((parent && comp(it->val, parent->val)) || it->parent != parent || !self(it->subtrees, it)) { 174 | return false; 175 | } 176 | } 177 | return true; 178 | }(roots, {}); 179 | } 180 | 181 | // Pretty-printing, see FibonacciHeapFormat.h 182 | friend struct fmt::formatter>; 183 | }; 184 | -------------------------------------------------------------------------------- /Treap.hs: -------------------------------------------------------------------------------- 1 | module Treap (mkTreap, empty, valid, insert, remove, search, 2 | merge, split, height, size, fromList, toList, sort, sortIO) where 3 | import System.Random (StdGen, mkStdGen, random, newStdGen) 4 | import qualified System.Random as Random (split) 5 | 6 | -- In order to keep the Treap, a randomized data structure, pure, every Treap 7 | -- carries with itself a random number generator, provided on Treap creation. 8 | -- All complexities are O(lgn) expected, O(n) worst-case except when noted otherwise. 9 | 10 | -- The underlying tree structure 11 | data Tree a = Empty | Node a Double (Tree a) (Tree a) 12 | 13 | -- This structure acts as a wrapper to the real tree 14 | -- and delegates most of the functionality to it. 15 | data Treap a = Treap (Tree a) StdGen 16 | 17 | instance Show a => Show (Treap a) where 18 | show (Treap Empty _) = "Empty treap." 19 | show (Treap t _) = show' 0 t 20 | where show' :: Show a => Int -> Tree a -> String 21 | show' _ Empty = "" 22 | show' pad t@(Node val pr left right) = 23 | replicate pad ' ' ++ peekRoot t ++ " -> " ++ peekRoot left ++ ", " ++ peekRoot right ++ "\n" 24 | ++ show' (pad+2) left 25 | ++ show' (pad+2) right 26 | peekRoot :: Show a => Tree a -> String 27 | peekRoot Empty = "#" 28 | peekRoot (Node val pr _ _) = show val ++ " {" ++ show pr ++ "}" 29 | 30 | -- Constructing an empty treap from an RNG seed 31 | mkTreap :: Int -> Treap a 32 | mkTreap seed = Treap Empty (mkStdGen seed) 33 | 34 | -- Check whether a treap is empty 35 | empty :: Treap a -> Bool 36 | empty (Treap Empty _) = True 37 | empty _ = False 38 | 39 | -- Minimum and maximum value in a tree 40 | getMin, getMax :: Tree a -> a 41 | getMin (Node v _ Empty _) = v 42 | getMin (Node _ _ left _) = getMin left 43 | getMax (Node v _ _ Empty) = v 44 | getMax (Node _ _ _ right) = getMax right 45 | 46 | -- Treap validity check: 47 | -- - the node values should form a valid binary search tree 48 | -- - the node priorities should form a valid min-heap 49 | valid :: (Eq a, Ord a) => Treap a -> Bool 50 | valid (Treap t _) = valid' t (getMin t) (getMax t) 0.0 51 | where valid' :: (Eq a, Ord a) => Tree a -> a -> a -> Double -> Bool 52 | valid' Empty _ _ _ = True 53 | valid' (Node val pr left right) min' max' pr' 54 | = pr >= pr' && min' <= val && val <= max' 55 | && valid' left min' val pr 56 | && valid' right val max' pr 57 | 58 | 59 | -- Left and right, or counter-clockwise and clockwise tree rotations. 60 | -- Note: does not violate BST invariant. 61 | rotateLeft, rotateRight :: Tree a -> Tree a 62 | rotateLeft (Node xv xp a (Node yv yp b c)) = Node yv yp (Node xv xp a b) c 63 | rotateRight (Node yv yp (Node xv xp a b) c) = Node xv xp a (Node yv yp b c) 64 | 65 | -- When a value is inserted/deleted, it is done so by the BST property. 66 | -- Afterwards the tree is rebalanced via a series of rotations on the path 67 | -- to this value in order to restore the heap property of the priorities. 68 | -- This function either calls itself recursively during deletion (to "sink" a value) 69 | -- or is called during the back-tracking after insertion (to "float" a value). 70 | -- Complexity: O(lgn) expected for a recursive call, O(1) otherwise (local rebalance) 71 | rebalance :: Bool -> Tree a -> Tree a 72 | rebalance recurse t@(Node _ _ Empty Empty) = if recurse then Empty {- cut this leaf -} else t 73 | rebalance recurse t@(Node _ pr left right) 74 | | pr <= leftpr && pr <= rightpr = t -- Heap property restored 75 | | leftpr < rightpr && recurse = let (Node v p l r) = rotateRight t in (Node v p l (rebalance True r)) 76 | | leftpr < rightpr = rotateRight t 77 | | leftpr >= rightpr && recurse = let (Node v p l r) = rotateLeft t in (Node v p (rebalance True l) r) 78 | | leftpr >= rightpr = rotateLeft t 79 | where leftpr = specialPr left 80 | rightpr = specialPr right 81 | specialPr :: Tree a -> Double 82 | specialPr Empty = 2.0 83 | specialPr (Node _ pr _ _) = pr 84 | 85 | -- Helper function for insertion, also used cleverly by split & merge. 86 | insert' :: (Eq a, Ord a) => a -> Double -> Tree a -> Tree a 87 | insert' x newpr Empty = Node x newpr Empty Empty 88 | insert' x newpr t@(Node val pr left right) 89 | | x < val = rebalance False $ Node val pr (insert' x newpr left) right 90 | | x > val = rebalance False $ Node val pr left (insert' x newpr right) 91 | | newpr == (-1.0) = rebalance False $ Node x newpr left right 92 | | otherwise = t 93 | 94 | -- Value insertion - add as in a BST, but recover the heap property on the way back 95 | insert :: (Eq a, Ord a) => a -> Treap a -> Treap a 96 | insert x (Treap t gen) = Treap (insert' x newpr t) newgen 97 | where (newpr, newgen) = random gen 98 | 99 | -- Value deletion - use a "heavy" priority value to move the value to a leaf & just cut that leaf 100 | remove :: (Eq a, Ord a) => a -> Treap a -> Treap a 101 | remove x (Treap t gen) = Treap (remove' x t) gen 102 | where remove' :: (Eq a, Ord a) => a -> Tree a -> Tree a 103 | remove' _ Empty = Empty 104 | remove' x (Node val pr left right) 105 | | x < val = Node val pr (remove' x left) right 106 | | x > val = Node val pr left (remove' x right) 107 | | otherwise = rebalance True $ Node undefined 2.0 left right 108 | 109 | -- Value search works exactly like in a standard BST 110 | search :: (Eq a, Ord a) => a -> Treap a -> Bool 111 | search x (Treap t _) = search' x t 112 | where search' :: (Eq a, Ord a) => a -> Tree a -> Bool 113 | search' _ Empty = False 114 | search' x (Node val _ left right) 115 | | x < val = search' x left 116 | | x > val = search' x right 117 | | x == val = True 118 | 119 | -- Merging of two treaps requires the maximum value in the left treap 120 | -- be smaller than the minimum value in the right treap. This is done 121 | -- by adding a dummy value, which adopts the two treaps, and sinking 122 | -- this dummy value to a leaf and cutting it. 123 | merge :: (Eq a, Ord a) => Treap a -> Treap a -> Maybe (Treap a) 124 | merge (Treap Empty _) _ = Nothing 125 | merge _ (Treap Empty _) = Nothing 126 | merge (Treap t1 gen) (Treap t2 _) 127 | | (getMax t1) >= (getMin t2) = Nothing 128 | | otherwise = Just (Treap (rebalance True $ Node undefined 2.0 t1 t2) gen) -- (!) 129 | 130 | -- Splitting a treap in two, such that one of them contains all 131 | -- values less than a given value x and the other all bigger values. 132 | -- Note: x will not be present in any of the two resulting trees. 133 | split :: (Eq a, Ord a) => a -> Treap a -> (Treap a, Treap a) 134 | split x (Treap t gen) = (Treap left gen1, Treap right gen2) 135 | where (Node _ _ left right) = insert' x (-1.0) t 136 | (gen1, gen2) = Random.split gen 137 | 138 | -- Treap height, unfortunately O(n) 139 | height :: Treap a -> Int 140 | height (Treap t _) = height' t 141 | where height' :: Tree a -> Int 142 | height' Empty = 0 143 | height' (Node _ _ left right) = 1 + max (height' left) (height' right) 144 | 145 | -- Contained value count, also in O(n) 146 | size :: Treap a -> Int 147 | size (Treap t _) = size' t 148 | where size' :: Tree a -> Int 149 | size' Empty = 0 150 | size' (Node _ _ left right) = 1 + (size' left) + (size' right) 151 | 152 | -- Constructing a treap from a list of numbers and seed in O(nlgn) 153 | fromList :: (Eq a, Ord a) => StdGen -> [a] -> Treap a 154 | fromList gen = foldr insert (Treap Empty gen) 155 | 156 | -- Sorted list from all the values in a treap. 157 | -- "toList t1 ++ toList t2 == (toList $ merge t1 t2)" 158 | -- holds true iff merging is allowed, whereas 159 | -- "let (l,r) = split x t in toList l ++ toList r == toList t" 160 | -- is true only when x was not present in t beforehand. 161 | toList :: Treap a -> [a] 162 | toList (Treap t _) = toList' t 163 | where toList' :: Tree a -> [a] 164 | toList' Empty = [] 165 | toList' (Node val _ left right) = toList' left ++ (val : toList' right) 166 | 167 | -- Sorting a list by inserting all values in a treap (constructed w/ the given random 168 | -- generator) and extracting them in order. Obviously O(nlgn) expected complexity. 169 | sort :: (Eq a, Ord a) => StdGen -> [a] -> [a] 170 | sort gen = toList . fromList gen 171 | 172 | -- Convenience that runs in the IO monad & uses the global random generator. 173 | sortIO :: (Eq a, Ord a) => [a] -> IO [a] 174 | sortIO lst = do 175 | gen <- newStdGen 176 | return $ sort gen lst 177 | 178 | -- to-do: an invariant-checking function that can be called 179 | -- after insert'/remove' and lots, lots of tests... 180 | -------------------------------------------------------------------------------- /RangeMinimumQuery/Source.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | // Poor man's global #define - in the only .cpp, before all include-s 9 | //#define DISABLE_TAGGED_INTS 10 | #include "LinearRMQ.h" 11 | 12 | struct Printer { 13 | // A cartesian tree does not keep a copy of the values it's built from, 14 | // so they should be provided for detailed printing. 15 | template 16 | static void print(const CartesianTree& t, const Range& values_) { 17 | if (t.nodes.empty()) { 18 | fmt::print("Empty tree.\n"); 19 | return; 20 | } 21 | const ArrayView> values{ values_ }; 22 | // Again keeping a local stack of nodes waiting to be 23 | // visited & printed. pad is used for pretty-printing. 24 | struct frame { Int idx; int pad; }; 25 | std::vector stack; 26 | stack.emplace_back(t.rootIdx, 0); 27 | // This print() function is not supposed to be fast, 28 | // so we can afford some maybe-not-inlined stateful lambdas for code clarity... 29 | auto printVal = [&](Int idx) { 30 | if (idx == invalidIdx) { 31 | fmt::print("#"); 32 | } else { 33 | fmt::print("{}", values[idx]); 34 | } 35 | }; 36 | auto stackPush = [&](Int idx, int pad) { 37 | if (idx != invalidIdx) { 38 | stack.emplace_back(idx, pad); 39 | } 40 | }; 41 | // until there are no nodes left, print: 42 | while (!stack.empty()) { 43 | // These should be bound by value, rather than reference (!) 44 | const auto [idx, pad] = stack.back(); 45 | stack.pop_back(); 46 | const Node& nd = t.nodes[idx]; 47 | // Print the padding, corresponding to the node depth 48 | fmt::print("{:{}}{}-> ", "", pad, values[idx]); 49 | const Int leftIdx = nd.left, rightIdx = nd.right; 50 | // Print the values in the children nodes 51 | printVal(leftIdx); 52 | fmt::print(", "); 53 | printVal(rightIdx); 54 | fmt::print("\n"); 55 | // The right child is pushed deeper in the stack, 56 | // since we want to visit the left subtree first. 57 | stackPush(rightIdx, pad + 2); 58 | stackPush(leftIdx, pad + 2); 59 | } 60 | } 61 | }; 62 | 63 | template > 64 | void testSparseTableRMQ(const Range& values_, const Comp& comp = Comp{}) { 65 | // This shouldn't output anything 66 | using T = std::ranges::range_value_t; 67 | const SparseTableRMQ, Comp> test{ values_,comp }; 68 | const ArrayView> values{ values_ }; 69 | const Int n = values.size(); 70 | for (Int i{ 0 }; i < n; ++i) { 71 | for (Int j = i + 1; j <= n; ++j) { 72 | // Note: comparing argmin may lead to false negatives since the 73 | // two algorithms may break ties between equal values differently. 74 | const T& result = test.min(values, i, j); 75 | const auto [a,b] = test.argmin(i, j); 76 | const T& result2 = values[comp(values[a], values[b]) ? a : b]; 77 | const T& expected = test.min_slow(values, i, j); 78 | // check x!=y only using the supplied operator< (!) 79 | if (comp(result, expected) || comp(expected, result) 80 | || comp(result2, expected) || comp(expected, result2)) 81 | { 82 | vassert(false); 83 | fmt::print("FAIL: (i,j)=({},{})\n", i, j); 84 | fmt::print(" res={} res2={} exp={}\n", result, result2, expected); 85 | } 86 | } 87 | } 88 | } 89 | 90 | void testTableArray(const std::vector& values, const bool print = false) { 91 | // Example table usage: cache all RMQ results for a given array 92 | // in linear time (w/ respect to table size) 93 | const int n = int(values.size()); 94 | TableArray table{ n, 1 }; 95 | for (int i = 0; i < n; ++i) { 96 | table.at(0, i, i + 1) = i; 97 | } 98 | // Make a recursive computation for a range [i;j) 99 | // based on the results for some subrange(s) of it. 100 | for (int d = 1; d < n; ++d) { 101 | for (int i = 0; i < (n - d); ++i) { 102 | const int j = i + d + 1; 103 | // We can use any [i;k) and [k;j), alternatively 104 | const int a = table.at(0, i, j - 1); 105 | const int b = table.at(0, i + 1, j); 106 | table.at(0, i, j) = (values[a] < values[b] ? a : b); 107 | } 108 | } 109 | for (int i = 0; i < n; ++i) { 110 | for (int j = i + 1; j <= n; ++j) { 111 | const int result = table.at(0, i, j); 112 | const int expected = *std::ranges::min_element(std::views::iota(i, j), std::less<>{}, [&](const int i) { return values[i]; }); 113 | vassert(result == expected); 114 | } 115 | } 116 | if (print) { 117 | fmt::print(" {}\n", fmt::join(values, " ")); 118 | for (int i = 0; i < n; ++i) { 119 | for (int j = 0; j <= i; ++j) { 120 | fmt::print(". "); 121 | } 122 | for (int j = i + 1; j <= n; ++j) { 123 | fmt::print("{} ", table.at(0, i, j)); 124 | } 125 | fmt::print("\n"); 126 | } 127 | } 128 | } 129 | 130 | template > 131 | void testLinearRMQ(const Range& values, const Comp comp = Comp{}) { 132 | const int n = int(std::ranges::size(values)); 133 | using T = std::ranges::range_value_t; 134 | const LinearRMQ rmq{ values, comp }; 135 | for (int i = 0; i < n; ++i) { 136 | for (int j = i + 1; j <= n; ++j) { 137 | const int result = rmq.argmin(i, j); 138 | const int expected = *std::ranges::min_element(std::views::iota(i, j), std::less{}, [&](const int i) { return values[i]; }); 139 | vassert(result == expected); 140 | } 141 | } 142 | } 143 | 144 | /* notes: 145 | * - add user-friendly aliases for Int64 = Int, etc. 146 | * -> use the smallest possible values for Int & Int 147 | * -> check whether the explicit conversions are ok 148 | * - consider compressing the bitvectors in a big one, too (!) 149 | * -> this may remove the need to have small Int 150 | * - try serialization/deserialization in as small of a footprint as possible 151 | * - benchmarks, benchmarks, benchmarks 152 | * - add getMemUsage() methods to CartesianTree, RMQ01Info, SparseTableRMQ, TableArray, LinearRMQ 153 | */ 154 | int main() { 155 | const std::vector values{ 8,7,2,2,8,6,9,4,5 }; 156 | 157 | const CartesianTree ct{ values }; 158 | Printer::print(ct, values); 159 | const RMQ01Info rmqInfo = ct.to01RMQ(); 160 | fmt::print(" Values: {}\n", fmt::join(values, " ")); 161 | fmt::print(" Depths: {}\n", fmt::join(rmqInfo.depths, " ")); 162 | fmt::print(" Indices: {}\n", fmt::join(rmqInfo.indices, " ")); 163 | fmt::print("Occurences: {}\n", fmt::join(rmqInfo.occurences, " ")); 164 | fmt::print("\n"); 165 | 166 | // Test simple RMQ 167 | testSparseTableRMQ(values); 168 | 169 | // Test RMQ for a non-trivial type w/ functor 170 | auto cmpByLength = [](const std::string_view& lhs, const std::string_view& rhs) { 171 | return (lhs.length() < rhs.length()); 172 | }; 173 | const std::vector words{ "asdasdas", "foofoo", "xy", "xy", "12345678", "qwerty", "bigsad:((" }; 174 | testSparseTableRMQ(words, cmpByLength); 175 | 176 | // Test RMQ w/ functor with capture 177 | auto cmpIndex = [values = ArrayView>(values)](const Int i, const Int j) { 178 | return (values[i] < values[j]); 179 | }; 180 | testSparseTableRMQ(DynamicArray>{ 2,1,5,6,3,8,7,4,0 }, cmpIndex); 181 | 182 | // Test the table (relies on the vector's bounds-checking) 183 | testTableArray({ 2,1,5,3,4 }); 184 | 185 | // Test linear RMQ w/ a random permutation of [1..64] 186 | const std::vector manyValues = { 43,18,19,38,28,9,12,11,30,58,41,44,15, 187 | 46,6,29,17,56,53,59,1,40,25,47,31,23,32,21,10,54,50,34,8,27,13,55,16,63, 188 | 7,64,4,3,36,26,24,20,51,49,37,62,48,2,60,35,52,61,33,42,14,22,57,39,5,45 }; 189 | testLinearRMQ(manyValues); 190 | 191 | return 0; 192 | } -------------------------------------------------------------------------------- /DList.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include // std::exchange 4 | 5 | #ifdef NDEBUG // Same macro that controls assert() 6 | #define DEBUG_ONLY(x) 7 | #else 8 | #define DEBUG_ONLY(x) x 9 | #endif // NDEBUG 10 | 11 | // Circular doubly-linked list, with standard O(1) insertion/deletion & referential stability. 12 | template 13 | class DList { 14 | // Important: list nodes must be as barebones as possible 15 | struct Node { 16 | T val; 17 | Node* prev DEBUG_ONLY(= nullptr); 18 | Node* next DEBUG_ONLY(= nullptr); 19 | Node(const T& val) : val{ val } {} 20 | Node(T&& val) : val{ std::move(val) } {} 21 | }; 22 | 23 | Node* head = nullptr; 24 | int numValues = 0; 25 | // Checks whether a given node is part of this list, i.e. reachable from head. 26 | // Obviously O(n), use with care - f.e. for testing. 27 | bool reachable(const Node* ptr) const noexcept { 28 | Node* curr = head; 29 | do { 30 | if (curr == ptr) { 31 | return true; 32 | } 33 | curr = curr->next; 34 | } while (curr != head); 35 | return false; 36 | } 37 | public: 38 | DList() = default; 39 | // Convenience ctor 40 | DList(std::initializer_list il) : DList{} { for (const T& x : il) { insert(x); } } 41 | // No list copying, only moving 42 | DList(const DList&) = delete; 43 | DList& operator=(const DList&) = delete; 44 | DList(DList&& other) noexcept 45 | : head{ std::exchange(other.head, nullptr) }, numValues{ std::exchange(other.numValues, 0) } {} 46 | DList& operator=(DList&& other) noexcept { 47 | if (this != &other) { 48 | clear(); 49 | head = std::exchange(other.head, nullptr); 50 | numValues = std::exchange(other.numValues, 0); 51 | } 52 | return *this; 53 | } 54 | ~DList() { clear(); } 55 | // The basic iterator is just a reference to a node and shouldn't be much more! 56 | // Can be used for iteration, but cannot detect the end - use loop_iterator for that. 57 | class iterator { 58 | Node* ptr = nullptr; 59 | friend class DList; // Only the list can construct non-empty iterators to itself 60 | iterator(Node* ptr) : ptr{ ptr } {} 61 | public: 62 | iterator() = default; 63 | T& operator*() const noexcept { return ptr->val; } 64 | T* operator->() const noexcept { return &ptr->val; } 65 | explicit operator bool() const noexcept { return (ptr != nullptr); } 66 | iterator& operator++() noexcept { ptr = ptr->next; return *this; } 67 | iterator& operator--() noexcept { ptr = ptr->prev; return *this; } 68 | bool operator==(const iterator&) const noexcept = default; 69 | }; 70 | // A handle is a (usually) temporary owner of a node, used to move nodes between lists. 71 | // Obtained only from extract(), owns the memory for the node until it's used in insert(). 72 | class handle { 73 | friend class DList; // Only the list can create non-empty handles to its nodes 74 | Node* ptr; 75 | handle(Node* ptr) : ptr{ ptr } {} 76 | public: 77 | handle() : ptr{ nullptr } {} 78 | handle(handle&& other) noexcept : ptr{ std::exchange(other.ptr, nullptr) } {} 79 | handle& operator=(handle&& other) noexcept { 80 | ptr = std::exchange(other.ptr, nullptr); 81 | return *this; 82 | } 83 | // Avoid memory leaks from un-inserted handles 84 | ~handle() { 85 | if (ptr) { delete ptr; } 86 | } 87 | T& operator*() const noexcept { return ptr->val; } 88 | T* operator->() const noexcept { return &ptr->val; } 89 | explicit operator bool() const noexcept { return (ptr != nullptr); } 90 | // Convert to reguler, non-owning iterator - not for direct use (!) 91 | iterator toIter() const noexcept { return { ptr }; } 92 | }; 93 | 94 | // Insertion is done at the end of the list (so, immediately before head) 95 | iterator insert(const T& val) { 96 | return insert(handle{ new Node{val} }); 97 | } 98 | iterator insert(T&& val) { 99 | return insert(handle{ new Node{std::move(val)} }); 100 | } 101 | // Note that this resets the handle, so it doesn't attempt to free the memory on scope exit later. 102 | iterator insert(handle&& hnd) noexcept { 103 | assert(hnd.ptr->prev == nullptr && hnd.ptr->next == nullptr); 104 | if (head == nullptr) { 105 | head = hnd.ptr; 106 | head->prev = head->next = head; 107 | } else { 108 | // The "back" in a circular list is just before head, or the "front" 109 | head->prev->next = hnd.ptr; 110 | hnd.ptr->prev = head->prev; 111 | head->prev = hnd.ptr; 112 | hnd.ptr->next = head; 113 | } 114 | ++numValues; 115 | return { std::exchange(hnd.ptr, nullptr) }; 116 | } 117 | 118 | // Removes a value from the list, resetting the iterator to avoid use-after-free 119 | void remove(iterator& it) noexcept { 120 | handle h = extract(it); // memory freed when this goes out of scope 121 | it.ptr = nullptr; 122 | } 123 | // Extracts a node from the list, without destroying the value or deallocating the memory. 124 | // This node can then be inserted into some (possibly other) list. 125 | handle extract(iterator it) noexcept { 126 | Node* ptr = it.ptr; 127 | // Mostly safe otherwise - unless it's the only node in its list! So, better safe than sorry 128 | assert(head && reachable(ptr)); 129 | // Advance to reduce # of cases afterwards 130 | if (ptr == head) { 131 | head = head->next; 132 | } 133 | if (ptr == head) { 134 | assert(head->prev == head && head->next == head); 135 | head = nullptr; 136 | } else { 137 | Node* prev = ptr->prev; 138 | Node* next = ptr->next; 139 | assert(ptr == prev->next && ptr == next->prev); 140 | prev->next = next; 141 | next->prev = prev; 142 | } 143 | DEBUG_ONLY(ptr->prev = ptr->next = nullptr); // not relied on :) 144 | --numValues; 145 | return { ptr }; 146 | } 147 | 148 | // Checks whether the list is empty 149 | [[nodiscard("Did you mean .clear()?")]] 150 | bool empty() const noexcept { 151 | assert((head == nullptr) == (numValues == 0)); 152 | return (head == nullptr); 153 | } 154 | // Returns the # of values in the list 155 | int size() const noexcept { return numValues; } 156 | // Empty the list, freeing all allocated memory 157 | void clear() noexcept { 158 | if (!head) { return; } 159 | Node* ptr = head; 160 | do { 161 | Node* tmp = ptr->next; 162 | delete ptr; 163 | ptr = tmp; 164 | } while (ptr != head); 165 | head = nullptr; 166 | numValues = 0; 167 | } 168 | 169 | // Append another list, leaving it empty afterwards 170 | void append(DList&& other) noexcept { 171 | assert(this != &other); // Copying nodes would be needed 172 | if (!other.head) { 173 | return; // Nothing to do 174 | } 175 | if (!head) { 176 | head = other.head; 177 | numValues = other.numValues; 178 | } else { 179 | Node* last = head->prev; 180 | Node* last2 = other.head->prev; 181 | last->next = other.head; 182 | other.head->prev = last; 183 | last2->next = head; 184 | head->prev = last2; 185 | numValues += other.numValues; 186 | } 187 | // This is a "destructive" operation for the appended list 188 | other.head = nullptr; 189 | other.numValues = 0; 190 | } 191 | // Returns an iterator to the first value in the list. 192 | iterator front() const noexcept { return { head }; } 193 | // "Rotate" the list to have a given node as the head 194 | // (reachable in O(1)) - trivial due to the circularity. 195 | void rotate(iterator it) noexcept { 196 | assert(head ? it.ptr && reachable(it.ptr) : !it.ptr); 197 | head = it.ptr; 198 | } 199 | 200 | // Note: looping over values requires a different kind of iterators, storing more data! 201 | class loop_iterator { 202 | iterator it; 203 | bool b = false; 204 | friend class DList; 205 | loop_iterator(iterator it, bool b) : it{ it }, b{ b } {} 206 | public: 207 | using value_type = T; 208 | using difference_type = int; 209 | loop_iterator() = default; 210 | T& operator*() const noexcept { return *it; } 211 | T* operator->() const noexcept { return &*it; } 212 | explicit operator bool() const noexcept { return bool(it); } // Only a default-constructed can be empty 213 | loop_iterator& operator++() noexcept { ++it; b = false; return *this; } 214 | loop_iterator operator++(int) noexcept { auto copy = *this; ++*this; return copy; } 215 | bool operator==(const loop_iterator&) const noexcept = default; 216 | bool operator==(const iterator& other) const noexcept { return (it == other); } 217 | }; 218 | loop_iterator begin() const noexcept { return { {head}, !empty() }; } 219 | loop_iterator end() const noexcept { return { {head}, false }; } 220 | }; -------------------------------------------------------------------------------- /RangeMinimumQuery/CartesianTree.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include // std::ranges::{all_of,none_of} 5 | #include // std::less 6 | #include "vassert.h" 7 | #include "TaggedArray.h" 8 | #include "IntTags.h" 9 | 10 | namespace { 11 | const Int invalidIdx{ -1 }; 12 | struct Node { 13 | Int left = invalidIdx, right = invalidIdx; 14 | }; 15 | 16 | // Instead of having a recursive function for 0-1RMQ construction, 17 | // we simulate the call stack with a local one, containing all the info we need: 18 | // idx - index of the node 19 | // state - the next subtree to be visited 20 | // Depending on the state we choose either to visit the left subtree next, 21 | // visit the right one, or just backtrack up when there's nothing left to do. 22 | enum class State { Left, Right, Done }; 23 | struct RMQFrame { Int idx; State state; }; 24 | } // anonymous namespace 25 | 26 | struct RMQ01Info { 27 | DynamicArray, Int> depths; 28 | DynamicArray, Int> indices; 29 | DynamicArray, Int> occurences; 30 | RMQ01Info() = default; 31 | explicit RMQ01Info(const int n) 32 | : depths(Int(2 * n - 1)) 33 | , indices(Int(2 * n - 1)) 34 | , occurences(Int(n)) 35 | { 36 | for (auto& x : occurences) { x = Int(-1); } 37 | } 38 | }; 39 | 40 | // Min-cartesian tree, constructed for a sequence of values. 41 | template > 42 | class CartesianTree { 43 | // Nodes are only added and never removed, so we keep them all in contiguous 44 | // memory. The nodes themselves store indices to this array, instead of pointers 45 | // to other nodes (with invalidIdx corresponding to the null pointer). 46 | // Invariant: the node at index i correspongs to the i-th added value. 47 | DynamicArray> nodes; 48 | // Index of the tree's root node 49 | Int rootIdx; 50 | // Comparison functor 51 | Comp comp; 52 | public: 53 | // Constructs a cartesian tree from a range of value & 54 | // optionally adds a number of sentinel values afterwards. 55 | template 56 | requires std::is_same_v> 57 | explicit CartesianTree(const Range& values, const int numSentinels = 0, const Comp& comp = Comp{}) 58 | : nodes(Int(int(values.size()) + numSentinels)) 59 | , rootIdx{ 0 } 60 | , comp{ comp } 61 | { 62 | // We maintain the "right spine" of the tree, i.e. the nodes 63 | // visited by starting from the root and only traveling right. 64 | // This vector contains the indices of these nodes in the vector 65 | // and their corresponding values. 66 | struct Pair { T val; Int idx; }; 67 | const auto from = std::ranges::cbegin(values); 68 | const auto to = std::ranges::cend(values); 69 | std::vector rightSpine{ { *from, rootIdx } }; 70 | // Index of the next value to be added (can be inferred by it) 71 | Int progress{ 1 }; 72 | for (auto it = from + 1; it != to; ++it) { 73 | // Note that (obviously) every node is pushed "to the right", 74 | // i.e. all other nodes are to the left of it 75 | // - either "down left" or "up left" in the tree. 76 | const T& x = *it; 77 | if (comp(rightSpine.back().val, x)) { 78 | // Add another node to the bottom of the right spine, i.e. right child 79 | nodes[rightSpine.back().idx].right = progress; 80 | rightSpine.emplace_back(x, progress); 81 | ++progress; 82 | } else { 83 | // Find where to split the spine in order to insert the new node 84 | // Every node is initially inserted into the right spine, but once 85 | // removed, it will not be inserted back. This is why this loop 86 | // will take O(n) time total, resulting in the complexity of the algorithm. 87 | while (!rightSpine.empty() && !comp(rightSpine.back().val, x)) { 88 | rightSpine.pop_back(); 89 | } 90 | // If there are no nodes left in the spine, then the new node 91 | // should be the root (it is currently the smallest value in the tree) 92 | if (rightSpine.empty()) { 93 | nodes[progress].left = rootIdx; 94 | rootIdx = progress; 95 | rightSpine.emplace_back(x, rootIdx); 96 | ++progress; 97 | } else { 98 | // Otherwise, we insert the new node at the split point of the spine 99 | // and it becomes the "rightmost" node, inheriting the bottom part of 100 | // the old spine as its left subtree. 101 | const Int rightmostIdx = rightSpine.back().idx; 102 | // These correspond to the "pointer" redirections for the nodes 103 | nodes[progress].left = nodes[rightmostIdx].right; 104 | nodes[rightmostIdx].right = progress; 105 | // Should be noted that the newly inserted node always ends up 106 | // as the rightmost node, i.e. the last node in the right spine. 107 | rightSpine.emplace_back(x, progress); 108 | ++progress; 109 | } 110 | } 111 | } 112 | // Simulate adding a number of sentinel values that compare larger 113 | // than all the given values, by forcing new nodes to the bottom of the right spine. 114 | Int lastIdx = rightSpine.back().idx; 115 | for (int i = 0; i < numSentinels; ++i) { 116 | nodes[lastIdx].right = progress; 117 | lastIdx = progress; 118 | ++progress; 119 | } 120 | vassert(progress == nodes.size()); 121 | } 122 | 123 | RMQ01Info to01RMQ() const { 124 | if (nodes.empty()) { 125 | return {}; 126 | } 127 | RMQ01Info res{ int(nodes.size()) }; 128 | // The next index in the results arrays to be filled 129 | Int progress{ 0 }; 130 | std::vector stack; 131 | // In the beginning, there was the root... 132 | stack.emplace_back(Int(rootIdx), State::Left); 133 | while (!stack.empty()) { 134 | auto& [idx,state] = stack.back(); 135 | // We can infer the node's depth by the current stack size. 136 | const Int depth{ int(stack.size() - 1) }; 137 | // Each iteration corresponds to one node visit, 138 | // so we add its depth (and others) to the result no matter the state. 139 | res.depths[progress] = depth; 140 | res.indices[progress] = idx; 141 | res.occurences[idx] = progress; 142 | ++progress; 143 | const Node& nd = nodes[idx]; 144 | // If the left subtree is due for visiting, but is empty, 145 | // change the state to the next (as if it's already visited) 146 | if (state == State::Left && nd.left == invalidIdx) { 147 | state = State::Right; 148 | } 149 | // Same for the right subtree - if it's empty, we are done with this node. 150 | if (state == State::Right && nd.right == invalidIdx) { 151 | state = State::Done; 152 | } 153 | // The above two checks also "recognize" the leaves in the tree. 154 | // Now either backtrack up, or select the next tree for visiting. 155 | // Update state before push_back, or the reference may be invalidated on reallocation (!) 156 | if (state == State::Left) { 157 | state = State::Right; 158 | stack.emplace_back(nd.left, State::Left); 159 | } else if (state == State::Right) { 160 | // Mark for popping, but only on the way back from the right subtree 161 | state = State::Done; 162 | stack.emplace_back(nd.right, State::Left); 163 | } else { 164 | stack.pop_back(); 165 | } 166 | } 167 | // Postconditions: 168 | if constexpr (vassertEnabled) { 169 | // The vector should be completely filled by now. 170 | vassert(progress == Int(res.depths.size())); 171 | // Each values should have at least one occurence noted 172 | vassert(std::ranges::none_of(res.occurences, [](const Int x) { return (x == -1); })); 173 | // Each value's occurence is the last (not actually required for correctness). 174 | vassert(std::ranges::all_of(std::views::iota(Int(0), res.indices.size()), 175 | [&](const Int pos) { 176 | const Int valueIdx = res.indices[pos]; 177 | return (pos <= res.occurences[valueIdx]); 178 | })); 179 | } 180 | return res; 181 | } 182 | 183 | // External access (for debugging purposes) 184 | friend struct Printer; 185 | }; 186 | // Deduction guide 187 | template 188 | CartesianTree(const Range&) -> CartesianTree>; 189 | template 190 | CartesianTree(const Range&, const int, const Comp&) -> CartesianTree, Comp>; 191 | -------------------------------------------------------------------------------- /pairing_heap_static.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include // std::numeric_limits::max 4 | #include // size_t 5 | #include // std::swap 6 | #include // std::is_trivially_destructible_v 7 | #include // std::pair, std::distance 8 | 9 | // On construction allocates continuous space for a predetermined number 10 | // of nodes, and then all node pointers point to some node in this space. 11 | // No memory is freed when a value is extracted - its node is only 12 | // "isolated" and orphaned and the contained value is not destroyed. 13 | // This is why a trivial (no side-effects) destructor is required. 14 | template 15 | class PairingHeapStatic { 16 | static_assert(std::is_trivially_destructible_v, 17 | "PairingHeap requires the contained type to have a trivial destructor."); 18 | 19 | using vertex = size_t; 20 | struct node { 21 | T value; 22 | node* leftChild; 23 | node* rightSibling; 24 | node* predecessor; // parent or left sibling 25 | node(const T& val) 26 | : value{ val }, leftChild{ nullptr } , rightSibling{ nullptr }, predecessor{ nullptr } {}; 27 | }; 28 | 29 | // The "arena" of all nodes for this heap. This vector contains 30 | // all heap nodes and we say the heap "resides" in the vector. 31 | // Its size is set during initialization, and extracting values will leave 32 | // "holes" inside it. All node pointers can be replaced by indices to this 33 | // vector, but keeping pointers is faster & more convenient. 34 | std::vector nodes; 35 | // The address of the root node (equal to &nodes[i] for some i) 36 | node* root; 37 | // Number of actual, pointed-to nodes, i.e. "orphaned" nodes not included. 38 | size_t count; 39 | 40 | // Merges a heap into *this. The given heap must "reside" in the nodes vector. 41 | void merge(node* other) { root = merge(root, other); } 42 | // Merges two heaps, "residing" in the nodes vector, and returns a pointer to the new root. 43 | node* merge(node*, node*); 44 | 45 | // All insertions are performed sequentially on construction (heap initialization). 46 | // This leads to tighter invariants and a couple of optimizations more. 47 | void insert(const T&); 48 | public: 49 | // Reserves space for a given count of nodes (vertices) and prepares 50 | // the heap for Dijkstra's algorithm by inserting a value of infinity 51 | // for every node except the starting one, which has value zero. 52 | // If numVertices == 0, no memory is allocated, but operations 53 | // before the next call to reset() may lead to undefined behaviour. 54 | // Important postcondition: for every vertex v its value is contained in nodes[v]. 55 | PairingHeapStatic(const size_t numVertices, const vertex start, 56 | const T& zero = T{ 0 }, const T& infinity = std::numeric_limits::max()) 57 | { 58 | reset(numVertices, start, zero, infinity); 59 | } 60 | 61 | // We don't really need these. Not trivial, so cannot be defaulted. 62 | PairingHeapStatic(const PairingHeapStatic&) = delete; 63 | PairingHeapStatic(PairingHeapStatic&&) = delete; 64 | PairingHeapStatic& operator=(const PairingHeapStatic&) = delete; 65 | PairingHeapStatic& operator=(PairingHeapStatic&&) = delete; 66 | 67 | // Standard operation; undefined behaviour for empty heaps 68 | const T& peek() const { return root->value; } 69 | 70 | // The most complex operation: removing the root and merging all of its children 71 | std::pair extractMin(); 72 | 73 | // Special (!) 74 | bool decreaseKey(vertex, const T&); 75 | 76 | // We can also keep track of the exact vertices currently in the heap 77 | bool contains(vertex) const; 78 | 79 | // More standard methods 80 | size_t size() const noexcept { return count; } 81 | bool empty() const noexcept { return (size() == 0); } 82 | 83 | // Free all memory (!) and reinitialize for an updated number of vertices. 84 | // See the comment for the ctor. 85 | void reset(const size_t numVertices, const vertex start, 86 | const T& zero = T{ 0 }, const T& infinity = std::numeric_limits::max()); 87 | }; 88 | 89 | template 90 | auto PairingHeapStatic::merge(node* root1, node* root2) -> node* { 91 | // Since reset() takes care of heap initialization by manually inserting 92 | // the first node, we can be sure that at this point both root1 and 93 | // root2 point to non-empty heaps - saving a couple of runtime checks. 94 | // For simplicity, let root1 be the node that "adopts" the other node 95 | if (root2->value < root1->value) { 96 | std::swap(root1, root2); 97 | } 98 | // Cache the old left child 99 | root2->rightSibling = root1->leftChild; 100 | // The left child will be changed, so the old one (if any) has to know 101 | if (root1->leftChild) { 102 | root1->leftChild->predecessor = root2; 103 | } 104 | // Finally, link the two root nodes 105 | root1->leftChild = root2; 106 | root2->predecessor = root1; 107 | return root1; 108 | } 109 | 110 | template 111 | void PairingHeapStatic::insert(const T& val) { 112 | // Only called a predetermined amount of times by reset() 113 | // => no need to check whether the allocated node space is full. 114 | // Simple: make a new heap and merge it 115 | node* newNode = &nodes.emplace_back(val); 116 | merge(newNode); 117 | } 118 | 119 | template 120 | auto PairingHeapStatic::extractMin() -> std::pair { 121 | // Saving the root's value & leftChild before "freeing" the node 122 | const std::pair result{ std::distance(&nodes[0], root), peek() }; 123 | node* nextChild = root->leftChild; 124 | // The node is detached from the heap, but not deallocated. 125 | // Setting its predecessor to nullptr is required for contains() to work. 126 | root->predecessor = root->leftChild = root->rightSibling = nullptr; 127 | // The old root's children (also heaps) 128 | std::vector children; 129 | while (nextChild) { 130 | children.push_back(nextChild); 131 | node* curr = nextChild; 132 | nextChild = nextChild->rightSibling; 133 | curr->rightSibling = curr->predecessor = nullptr; 134 | } 135 | const size_t n = children.size(); 136 | // First merge the children in pairs - that's where the name comes from 137 | if (n > 1) { 138 | for (size_t i = 0; i <= (n - 2 - (n % 2)); i += 2) 139 | children[i] = merge(children[i], children[i + 1]); 140 | } 141 | // Then merge the resulting heaps from the last to the first 142 | if (n > 0) { 143 | for (size_t i = (n - 2 + (n % 2)); i > 0; i -= 2) 144 | children[i - 2] = merge(children[i - 2], children[i]); 145 | // The only heap left in the array is the one 146 | std::swap(root, children[0]); 147 | } 148 | --count; 149 | return result; 150 | } 151 | 152 | template 153 | bool PairingHeapStatic::decreaseKey(const vertex v, const T& newKey) { 154 | // Undefined behaviour if the vertex has already been removed 155 | node* const location = &nodes[v]; 156 | // In case of invalid input, simply do nothing 157 | if (!(newKey < location->value)) { 158 | return false; 159 | } 160 | // Update the value 161 | location->value = newKey; 162 | // If the value is at the root (<=> no predecessor), nothing to change 163 | if (location == root) { 164 | return true; 165 | } 166 | // Tell its left sibling/parent it has a new right sibling/left child 167 | if (location == location->predecessor->rightSibling) { 168 | location->predecessor->rightSibling = location->rightSibling; 169 | } else { 170 | location->predecessor->leftChild = location->rightSibling; 171 | } 172 | // Tell its right sibling (if any) it has a new left sibling 173 | if (location->rightSibling) { 174 | location->rightSibling->predecessor = location->predecessor; 175 | } 176 | // Isolate the current node as a root of a new heap... 177 | location->rightSibling = location->predecessor = nullptr; 178 | // ...and merge it with the current heap 179 | merge(location); 180 | return true; 181 | } 182 | 183 | template 184 | bool PairingHeapStatic::contains(vertex u) const { 185 | // Only the root node has no predecessor, and we explicitly 186 | // set each removed node's predecessor to nullptr. 187 | return (nodes[u].predecessor != nullptr || root == &nodes[u]); 188 | } 189 | 190 | template 191 | void PairingHeapStatic::reset(const size_t numVertices, const vertex start, const T& zero, const T& infinity) { 192 | nodes.clear(); 193 | count = numVertices; 194 | if (numVertices == 0) { 195 | root = nullptr; 196 | return; 197 | } 198 | nodes.reserve(numVertices); 199 | // The first insert must be done manually in order for the invariant in merge() to hold. 200 | root = &nodes.emplace_back(start == 0 ? zero : infinity); 201 | // Insert a new node for every vertex, preserving the ordering: first for the 202 | // vertices < start, then for the starting vertex, and finally for those > start 203 | for (vertex i = 1; i < start; ++i) { 204 | insert(infinity); 205 | } 206 | if (start != 0) { 207 | insert(zero); 208 | } 209 | for (vertex i = start + 1; i < numVertices; ++i) { 210 | insert(infinity); 211 | } 212 | } 213 | -------------------------------------------------------------------------------- /suffix_array.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include // std::numeric_limits::{min,max} 5 | #include // std::sort 6 | #include 7 | #include "bitvector.h" // use this from the Andreshk/SnippySnippets repo 8 | 9 | enum class SuffixMode { Naive, ManberMyers, DC3Skew, SAIS_Original, SAIS_Yuta, InPlace }; 10 | 11 | template 12 | std::vector buildSuffixArray(std::string_view); 13 | 14 | template<> // O(n^2 * lgn) due to slow comparison of substrings (think strcmp) 15 | std::vector buildSuffixArray(std::string_view str) { 16 | const size_t n = str.size(); 17 | std::vector SA(n); 18 | for (size_t i = 0; i < n; ++i) 19 | SA[i] = i; 20 | std::sort(SA.begin(), SA.end(), [=](size_t i, size_t j) { return (str.substr(i) < str.substr(j)); }); 21 | return SA; 22 | } 23 | 24 | template<> // O(nlgn) 25 | std::vector buildSuffixArray(std::string_view str) { 26 | const size_t n = str.size(); 27 | std::vector SA(n), iSA(n), count(n), next(n); 28 | BitVector bh(n), b2h(n); 29 | // Sort the suffixes by their first character 30 | for (size_t i = 0; i < n; ++i) 31 | SA[i] = i; 32 | std::sort(SA.begin(), SA.end(), [=](size_t i, size_t j) {return str[i] < str[j]; }); 33 | // Mark the beginnings of each bucket 34 | bh.set(0); 35 | for (size_t i = 1; i < n; ++i) 36 | if (str[SA[i]] != str[SA[i - 1]]) 37 | bh.set(i); 38 | 39 | // Prefix-doubling: sort the suffixes by a twice the amount of characters 40 | for (size_t h = 1; h < n; h <<= 1) { 41 | // bh[i] == false if the first h characters of SA[i-1] == the first h characters of SA[i] 42 | size_t buckets = 0; 43 | for (size_t i = 0, j; i < n; i = j) { 44 | j = i + 1; 45 | while (j < n && !bh[j]) 46 | j++; 47 | next[i] = j; 48 | ++buckets; 49 | } 50 | if (buckets == n) 51 | break; // Shorter prefixes turned out to be enough for sorting 52 | // Suffixes are now separated in buckets by their first h characters 53 | for (size_t i = 0; i < n; i = next[i]) { 54 | count[i] = 0; 55 | for (size_t j = i; j < next[i]; ++j) { 56 | iSA[SA[j]] = i; 57 | } 58 | } 59 | // Here be magic 60 | count[iSA[n - h]]++; 61 | b2h.set(iSA[n - h]); 62 | for (size_t i = 0; i < n; i = next[i]) { 63 | for (size_t j = i; j < next[i]; ++j) 64 | if (SA[j] >= h) { 65 | const size_t s = SA[j] - h; 66 | const size_t head = iSA[s]; 67 | iSA[s] = head + count[head]++; 68 | b2h.set(iSA[s]); 69 | } 70 | for (size_t j = i; j < next[i]; ++j) 71 | if (SA[j] >= h) { 72 | const size_t s = SA[j] - h; 73 | if (b2h[iSA[s]]) { 74 | for (size_t k = iSA[s] + 1; k < n && !bh[k] && b2h[k]; k++) 75 | b2h.clear(k); 76 | } 77 | } 78 | } 79 | // Update the arrays with the new values 80 | bh |= b2h; 81 | for (size_t i = 0; i < n; ++i) 82 | SA[iSA[i]] = i; 83 | } 84 | /*for (size_t i = 0; i < n; ++i) 85 | iSA[SA[i]] = i;*/ 86 | 87 | return SA; 88 | } 89 | 90 | namespace DC3Skew_private { // Helper functions for the DC3/Skew algorithm 91 | // Lexicographic order for pairs and triples 92 | bool leq(size_t a1, size_t a2, size_t b1, size_t b2) { 93 | return(a1 < b1 || a1 == b1 && a2 <= b2); 94 | } 95 | bool leq(size_t a1, size_t a2, size_t a3, size_t b1, size_t b2, size_t b3) { 96 | return(a1 < b1 || a1 == b1 && leq(a2, a3, b2, b3)); 97 | } 98 | // Stably sort a[0..n-1] to b[0..n-1] with keys in {0..K} from r 99 | void radixPass(const std::vector& a, std::vector& b, const size_t* const r, const size_t n, const size_t K) { 100 | std::vector count(K + 1, 0); 101 | for (size_t i = 0; i < n; i++) // Count occurrences of each 'character' 102 | count[r[a[i]]]++; 103 | for (size_t i = 0, sum = 0; i <= K; i++) { // build the exclusive prefix sums 104 | const size_t t = count[i]; 105 | count[i] = sum; 106 | sum += t; 107 | } 108 | for (size_t i = 0; i < n; i++) // Sort 109 | b[count[r[a[i]]]++] = a[i]; 110 | } 111 | // Build (in SA) the suffix array of str[0..n-1], where all elements of str are in {1..K}. 112 | // Requires str to be padded with three zeroes: str[n]=str[n+1]=str[n+2]=0, and n>=2 113 | void suffixArray(const std::vector& str, std::vector& SA, const size_t n, const size_t K) { 114 | const size_t n0 = (n + 2) / 3, n1 = (n + 1) / 3, n2 = n / 3, n02 = n0 + n2; // note: n0 >= n1 115 | std::vector str12(n02 + 3, 0); // The mod1 and mod2 triples, numbered according to their lexicographic ordering 116 | std::vector SA12(n02 + 3, 0); // The suffix array for the aforementioned triples 117 | // Generate positions of mod1 and mod2 suffixes 118 | // the "+(n0-n1)" adds a dummy mod1 suffix if n%3 == 1 119 | for (size_t i = 0, j = 0; i < n + (n0 - n1); i++) 120 | if (i % 3 != 0) 121 | str12[j++] = i; 122 | // LSB radix sort the mod1 and mod2 triples 123 | radixPass(str12, SA12, &str[2], n02, K); 124 | radixPass(SA12, str12, &str[1], n02, K); 125 | radixPass(str12, SA12, &str[0], n02, K); 126 | // Find lexicographic "names" (identifiers) of triples 127 | size_t names = 0, c0 = -1, c1 = -1, c2 = -1; 128 | for (size_t i = 0; i < n02; i++) { 129 | if (str[SA12[i]] != c0 || str[SA12[i] + 1] != c1 || str[SA12[i] + 2] != c2) { 130 | ++names; 131 | c0 = str[SA12[i]]; 132 | c1 = str[SA12[i] + 1]; 133 | c2 = str[SA12[i] + 2]; 134 | } 135 | if (SA12[i] % 3 == 1) { // left half 136 | str12[SA12[i] / 3] = names; 137 | } else { // right half 138 | str12[SA12[i] / 3 + n0] = names; 139 | } 140 | } 141 | // Recurse if the names are not yet unique 142 | if (names < n02) { 143 | suffixArray(str12, SA12, n02, names); 144 | // Store unique names in s12 using the suffix array 145 | for (size_t i = 0; i < n02; i++) 146 | str12[SA12[i]] = i + 1; 147 | } else { // otherwise, generate the suffix array of s12 directly 148 | for (size_t i = 0; i < n02; i++) 149 | SA12[str12[i] - 1] = i; 150 | } 151 | // Stably sort the mod0 suffixes from SA12 by their first character 152 | std::vector str0(n0), SA0(n0); 153 | for (size_t i = 0, j = 0; i < n02; i++) 154 | if (SA12[i] < n0) 155 | str0[j++] = 3 * SA12[i]; 156 | radixPass(str0, SA0, &str[0], n0, K); 157 | // Finally, merge the sorted mod0 suffixes and sorted mod12 suffixes 158 | for (size_t p = 0, t = n0 - n1, k = 0; k < n; k++) { 159 | #define GetI() (SA12[t] < n0 ? SA12[t]*3+1: (SA12[t] - n0) * 3 + 2) 160 | size_t i = GetI(); // pos of current offset 12 suffix 161 | size_t j = SA0[p]; // pos of current offset 0 suffix 162 | if (SA12[t] < n0 ? // different compares for mod1 and mod2 suffixes 163 | leq(str[i], str12[SA12[t] + n0], str[j], str12[j / 3]) : 164 | leq(str[i], str[i + 1], str12[SA12[t] - n0 + 1], str[j], str[j + 1], str12[j / 3 + n0])) 165 | { // suffix from SA12 is smaller 166 | SA[k] = i; 167 | ++t; 168 | if (t == n02) // ...done -> only SA0 suffixes left 169 | for (k++; p < n0; p++, k++) 170 | SA[k] = SA0[p]; 171 | } else {// suffix from SA0 is smaller 172 | SA[k] = j; 173 | ++p; 174 | if (p == n0) // ...done -> only SA12 suffixes left 175 | for (k++; t < n02; t++, k++) 176 | SA[k] = GetI(); 177 | } 178 | #undef GetI 179 | } 180 | } 181 | } // namespace DC3Skew_private 182 | 183 | template<> // O(n) 184 | std::vector buildSuffixArray