├── .github └── workflows │ └── rust.yml ├── .gitignore ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-BITINTR ├── LICENSE-MIT ├── benches ├── bp.rs ├── common │ └── mod.rs ├── elias_fano.rs ├── elias_fano_adversarial.rs ├── elias_fano_construction.rs ├── elias_fano_iterator.rs ├── rank.rs ├── rmq.rs ├── select.rs ├── select_adversarial.rs ├── select_iter.rs └── sparse_equals.rs ├── images ├── elias_fano_access_random.svg ├── elias_fano_pred_adversarial.svg ├── elias_fano_pred_random.svg ├── heap.svg ├── rank_comparison.svg ├── rmq_comparison.svg └── select_comparison.svg ├── readme.md └── src ├── bit_vec ├── fast_rs_vec │ ├── bitset.rs │ ├── iter.rs │ ├── mod.rs │ ├── select.rs │ └── tests.rs ├── mask.rs ├── mod.rs ├── sparse.rs └── tests.rs ├── elias_fano ├── mod.rs └── tests.rs ├── lib.rs ├── rmq ├── binary_rmq │ ├── mod.rs │ └── tests.rs ├── fast_rmq │ ├── mod.rs │ └── tests.rs └── mod.rs ├── trees ├── bp │ ├── builder.rs │ ├── lookup.rs │ ├── lookup_query.rs │ ├── mod.rs │ └── tests.rs ├── mmt.rs └── mod.rs ├── util ├── elias_fano_iter.rs ├── general_iter.rs ├── mod.rs ├── pdep.rs └── unroll.rs └── wavelet ├── mod.rs └── tests.rs /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | 13 | jobs: 14 | build: 15 | runs-on: ubuntu-latest 16 | env: 17 | RUSTFLAGS: -C target-cpu=native 18 | steps: 19 | - uses: actions/checkout@v4 20 | - name: Build 21 | run: cargo build --verbose --all-features 22 | - name: Run tests 23 | run: cargo test --verbose --all-features 24 | 25 | test-fallbacks: 26 | runs-on: ubuntu-latest 27 | env: 28 | RUSTFLAGS: -C target-cpu=x86-64 29 | steps: 30 | - uses: actions/checkout@v4 31 | - name: Build 32 | run: cargo build --verbose --features serde 33 | - name: Run tests 34 | run: cargo test --verbose --features serde -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /Cargo.lock 3 | 4 | **.iml 5 | .idea 6 | 7 | /run -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "vers-vecs" 3 | version = "1.6.3" 4 | edition = "2021" 5 | authors = ["Johannes \"Cydhra\" Hengstler"] 6 | description = "A collection of succinct data structures supported by fast implementations of rank and select queries." 7 | readme = "readme.md" 8 | repository = "https://github.com/Cydhra/vers" 9 | license = "MIT OR Apache-2.0" 10 | keywords = ["succinct", "elias-fano", "bitvector", "rank", "wavelet-matrix"] 11 | categories = ["data-structures", "algorithms"] 12 | documentation = "https://docs.rs/vers-vecs" 13 | exclude = [ 14 | "images/*", 15 | ".github/*", 16 | ] 17 | 18 | [dependencies] 19 | serde = { version = "1.0", optional = true, features = ["derive"] } 20 | 21 | [dev-dependencies] 22 | # benchmarking 23 | criterion = { version = "0.5.1", features = ["html_reports"] } 24 | rand = { version = "0.8", features = ["alloc"] } 25 | 26 | [features] 27 | simd = [] 28 | bp_u16_lookup = [] 29 | docsrs = [] # special feature for docs.rs to enable doc_auto_cfg on nightly 30 | 31 | [[bench]] 32 | name = "rank" 33 | harness = false 34 | 35 | [[bench]] 36 | name = "select" 37 | harness = false 38 | 39 | [[bench]] 40 | name = "select_iter" 41 | harness = false 42 | 43 | [[bench]] 44 | name = "select_adversarial" 45 | harness = false 46 | 47 | [[bench]] 48 | name = "sparse_equals" 49 | harness = false 50 | 51 | [[bench]] 52 | name = "elias_fano" 53 | harness = false 54 | 55 | [[bench]] 56 | name = "elias_fano_adversarial" 57 | harness = false 58 | 59 | [[bench]] 60 | name = "elias_fano_iterator" 61 | harness = false 62 | 63 | [[bench]] 64 | name = "rmq" 65 | harness = false 66 | 67 | [[bench]] 68 | name = "bp" 69 | harness = false 70 | 71 | [[bench]] 72 | name = "elias_fano_construction" 73 | harness = false 74 | 75 | [profile.bench] 76 | lto = true 77 | 78 | [package.metadata.docs.rs] 79 | all-features = true 80 | rustc-args = ["-C", "target-cpu=native"] 81 | rustdoc-args = ["-C", "target-cpu=native", "--cfg", "docsrs"] 82 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS -------------------------------------------------------------------------------- /LICENSE-BITINTR: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016-2017 Gonzalo Brito Gadeschi 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Johannes Hengstler 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /benches/bp.rs: -------------------------------------------------------------------------------- 1 | #![allow(long_running_const_eval)] 2 | 3 | use criterion::{black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion}; 4 | use rand::rngs::StdRng; 5 | use rand::{Rng, SeedableRng}; 6 | use std::cmp::Reverse; 7 | use std::collections::{BinaryHeap, HashSet}; 8 | use vers_vecs::trees::bp::BpBuilder; 9 | use vers_vecs::trees::bp::BpTree; 10 | use vers_vecs::trees::{Tree, TreeBuilder}; 11 | 12 | mod common; 13 | 14 | const BLOCK_SIZE: usize = 1024; 15 | 16 | // TODO this function has nlogn runtime, which is a bit too much for the largest trees 17 | fn generate_tree(rng: &mut R, nodes: u64) -> BpTree { 18 | // generate prüfer sequence 19 | let mut sequence = vec![0; (nodes - 2) as usize]; 20 | for i in 0..nodes - 2 { 21 | sequence[i as usize] = rng.gen_range(0..nodes - 1); 22 | } 23 | 24 | // decode prüfer sequence 25 | let mut degrees = vec![1; nodes as usize]; 26 | sequence.iter().for_each(|i| degrees[*i as usize] += 1); 27 | 28 | let mut prefix_sum = vec![0; nodes as usize]; 29 | let mut sum = 0; 30 | degrees.iter().enumerate().for_each(|(i, d)| { 31 | prefix_sum[i] = sum; 32 | sum += d; 33 | }); 34 | 35 | let mut children = vec![0u64; sum]; 36 | let mut assigned_children = vec![0; nodes as usize]; 37 | 38 | // keep a priority queue of nodes with degree one to reduce runtime from O(n^2) to O(n log n) 39 | let mut degree_one_set = BinaryHeap::new(); 40 | degrees 41 | .iter() 42 | .enumerate() 43 | .filter(|(_, &v)| v == 1) 44 | .for_each(|(idx, _)| degree_one_set.push(Reverse(idx as u64))); 45 | 46 | sequence.iter().for_each(|&i| { 47 | let j = degree_one_set.pop().unwrap().0; 48 | children[prefix_sum[i as usize] + assigned_children[i as usize]] = j; 49 | children[prefix_sum[j as usize] + assigned_children[j as usize]] = i; 50 | degrees[i as usize] -= 1; 51 | if degrees[i as usize] == 1 { 52 | degree_one_set.push(Reverse(i)) 53 | } 54 | 55 | degrees[j as usize] -= 1; 56 | if degrees[j as usize] == 1 { 57 | degree_one_set.push(Reverse(j)) 58 | } 59 | 60 | assigned_children[i as usize] += 1; 61 | assigned_children[j as usize] += 1; 62 | }); 63 | 64 | assert_eq!(degrees.iter().sum::(), 2); 65 | let u = degree_one_set.pop().unwrap().0; 66 | let v = degree_one_set.pop().unwrap().0; 67 | 68 | children[prefix_sum[u as usize] + assigned_children[u as usize]] = v; 69 | children[prefix_sum[v as usize] + assigned_children[v as usize]] = u; 70 | 71 | // build tree 72 | let mut bpb = BpBuilder::with_capacity(nodes); 73 | let mut stack = Vec::new(); 74 | let mut visited = HashSet::with_capacity(nodes as usize); 75 | visited.insert(0); 76 | stack.push((0, 0u64, true)); 77 | while let Some((depth, node, enter)) = stack.pop() { 78 | if enter { 79 | bpb.enter_node(); 80 | stack.push((depth, node, false)); 81 | for child in children 82 | .iter() 83 | .take(*prefix_sum.get(node as usize + 1).unwrap_or(&children.len())) 84 | .skip(prefix_sum[node as usize]) 85 | { 86 | if visited.insert(*child) { 87 | stack.push((depth + 1, *child, true)) 88 | } 89 | } 90 | } else { 91 | bpb.leave_node(); 92 | } 93 | } 94 | 95 | bpb.build().unwrap() 96 | } 97 | 98 | fn bench_navigation(b: &mut Criterion) { 99 | let mut group = b.benchmark_group("bp"); 100 | group.plot_config(common::plot_config()); 101 | 102 | for l in common::SIZES { 103 | // fix the rng seed because the measurements depend on the input structure. 104 | // to make multiple runs of the benchmark comparable, we fix the seed. 105 | // this is only a valid approach to check for performance improvements, it may not give 106 | // an accurate summary of the library's runtime 107 | let mut rng = StdRng::from_seed([0; 32]); 108 | 109 | let bp = generate_tree(&mut rng, l as u64); 110 | let node_handles = (0..l).map(|i| bp.node_handle(i)).collect::>(); 111 | 112 | group.bench_with_input(BenchmarkId::new("parent", l), &l, |b, _| { 113 | b.iter_batched( 114 | || node_handles[rng.gen_range(0..node_handles.len())], 115 | |h| black_box(bp.parent(h)), 116 | BatchSize::SmallInput, 117 | ) 118 | }); 119 | 120 | group.bench_with_input(BenchmarkId::new("last_child", l), &l, |b, _| { 121 | b.iter_batched( 122 | || node_handles[rng.gen_range(0..node_handles.len())], 123 | |h| black_box(bp.last_child(h)), 124 | BatchSize::SmallInput, 125 | ) 126 | }); 127 | 128 | group.bench_with_input(BenchmarkId::new("next_sibling", l), &l, |b, _| { 129 | b.iter_batched( 130 | || node_handles[rng.gen_range(0..node_handles.len())], 131 | |h| black_box(bp.next_sibling(h)), 132 | BatchSize::SmallInput, 133 | ) 134 | }); 135 | 136 | group.bench_with_input(BenchmarkId::new("prev_sibling", l), &l, |b, _| { 137 | b.iter_batched( 138 | || node_handles[rng.gen_range(0..node_handles.len())], 139 | |h| black_box(bp.previous_sibling(h)), 140 | BatchSize::SmallInput, 141 | ) 142 | }); 143 | } 144 | } 145 | 146 | criterion_group!(benches, bench_navigation); 147 | criterion_main!(benches); 148 | -------------------------------------------------------------------------------- /benches/common/mod.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | 3 | use criterion::PlotConfiguration; 4 | use rand::distributions::{Distribution, Uniform}; 5 | use rand::prelude::ThreadRng; 6 | use vers_vecs::{BitVec, RsVec}; 7 | 8 | pub const SIZES: [usize; 10] = [ 9 | 1 << 8, 10 | 1 << 10, 11 | 1 << 12, 12 | 1 << 14, 13 | 1 << 16, 14 | 1 << 18, 15 | 1 << 20, 16 | 1 << 22, 17 | 1 << 24, 18 | 1 << 26, 19 | ]; 20 | 21 | pub fn construct_vers_vec(rng: &mut ThreadRng, len: usize) -> RsVec { 22 | let sample = Uniform::new(0, u64::MAX); 23 | 24 | let mut bit_vec = BitVec::new(); 25 | for _ in 0..len / 64 { 26 | bit_vec.append_word(sample.sample(rng)); 27 | } 28 | 29 | RsVec::from_bit_vec(bit_vec) 30 | } 31 | 32 | pub fn fill_random_vec(rng: &mut ThreadRng, len: usize) -> Vec { 33 | let sample = Uniform::new(0, u64::MAX); 34 | 35 | let mut vec = Vec::with_capacity(len); 36 | for _ in 0..len { 37 | vec.push(sample.sample(rng)); 38 | } 39 | 40 | vec 41 | } 42 | 43 | pub fn plot_config() -> PlotConfiguration { 44 | PlotConfiguration::default().summary_scale(criterion::AxisScale::Logarithmic) 45 | } 46 | -------------------------------------------------------------------------------- /benches/elias_fano.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion}; 2 | use rand::distributions::{Distribution, Standard, Uniform}; 3 | use rand::{thread_rng, Rng}; 4 | use vers_vecs::EliasFanoVec; 5 | 6 | mod common; 7 | 8 | fn bench_ef(b: &mut Criterion) { 9 | let mut rng = thread_rng(); 10 | 11 | let mut group = b.benchmark_group("Elias-Fano: Randomized Input"); 12 | group.plot_config(common::plot_config()); 13 | 14 | for l in common::SIZES { 15 | let mut sequence = (&mut rng) 16 | .sample_iter(Standard) 17 | .take(l) 18 | .collect::>(); 19 | sequence.sort_unstable(); 20 | let ef_vec = EliasFanoVec::from_slice(&sequence); 21 | let pred_sample = Uniform::new(ef_vec.get_unchecked(0), sequence.last().unwrap()); 22 | 23 | group.bench_with_input(BenchmarkId::new("predecessor", l), &l, |b, _| { 24 | b.iter_batched( 25 | || pred_sample.sample(&mut rng), 26 | |e| black_box(ef_vec.predecessor_unchecked(e)), 27 | BatchSize::SmallInput, 28 | ) 29 | }); 30 | 31 | group.bench_with_input(BenchmarkId::new("successor", l), &l, |b, _| { 32 | b.iter_batched( 33 | || pred_sample.sample(&mut rng), 34 | |e| black_box(ef_vec.successor_unchecked(e)), 35 | BatchSize::SmallInput, 36 | ) 37 | }); 38 | 39 | group.bench_with_input(BenchmarkId::new("rank", l), &l, |b, _| { 40 | b.iter_batched( 41 | || pred_sample.sample(&mut rng), 42 | |e| black_box(ef_vec.rank(e)), 43 | BatchSize::SmallInput, 44 | ) 45 | }); 46 | 47 | group.bench_with_input(BenchmarkId::new("bin search", l), &l, |b, _| { 48 | b.iter_batched( 49 | || pred_sample.sample(&mut rng), 50 | |e| black_box(sequence.partition_point(|&x| x <= e) - 1), 51 | BatchSize::SmallInput, 52 | ) 53 | }); 54 | } 55 | group.finish(); 56 | } 57 | 58 | criterion_group!(benches, bench_ef); 59 | criterion_main!(benches); 60 | -------------------------------------------------------------------------------- /benches/elias_fano_adversarial.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion}; 2 | use rand::distributions::{Distribution, Standard, Uniform}; 3 | use rand::{thread_rng, Rng}; 4 | use vers_vecs::EliasFanoVec; 5 | 6 | mod common; 7 | 8 | fn bench_ef(b: &mut Criterion) { 9 | let mut rng = thread_rng(); 10 | 11 | let mut group = b.benchmark_group("Elias-Fano: Adversarial Input"); 12 | group.plot_config(common::plot_config()); 13 | 14 | let dist_high = Uniform::new(u64::MAX / 2 - 200, u64::MAX / 2 - 1); 15 | for l in common::SIZES { 16 | // a distribution clustered at the low end with some but not too many duplicates 17 | let dist_low = Uniform::new(0, l as u64); 18 | let query_distribution = Uniform::new(0, l); 19 | 20 | // prepare a uniformly distributed sequence 21 | let mut sequence = (&mut rng) 22 | .sample_iter(Standard) 23 | .take(l) 24 | .collect::>(); 25 | sequence.sort_unstable(); 26 | let uniform_ef_vec = EliasFanoVec::from_slice(&sequence); 27 | 28 | // query random values from the actual sequences, to be equivalent to the worst case 29 | // benchmark below 30 | group.bench_with_input(BenchmarkId::new("uniform predecessor", l), &l, |b, _| { 31 | b.iter_batched( 32 | || sequence[query_distribution.sample(&mut rng)], 33 | |e| black_box(uniform_ef_vec.predecessor_unchecked(e)), 34 | BatchSize::SmallInput, 35 | ) 36 | }); 37 | drop(uniform_ef_vec); 38 | 39 | // prepare a sequence of low values with a few high values at the end 40 | let mut sequence = (&mut rng) 41 | .sample_iter(dist_low) 42 | .take(l - 100) 43 | .collect::>(); 44 | sequence.sort_unstable(); 45 | let mut sequence_top = (&mut rng) 46 | .sample_iter(dist_high) 47 | .take(100) 48 | .collect::>(); 49 | sequence_top.sort_unstable(); 50 | sequence.append(&mut sequence_top); 51 | 52 | let bad_ef_vec = EliasFanoVec::from_slice(&sequence); 53 | // query random values from the actual sequences, to force long searches in the lower vec 54 | group.bench_with_input( 55 | BenchmarkId::new("adversarial predecessor", l), 56 | &l, 57 | |b, _| { 58 | b.iter_batched( 59 | || sequence[query_distribution.sample(&mut rng)], 60 | |e| black_box(bad_ef_vec.predecessor_unchecked(e)), 61 | BatchSize::SmallInput, 62 | ) 63 | }, 64 | ); 65 | 66 | group.bench_with_input(BenchmarkId::new("adversarial rank", l), &l, |b, _| { 67 | b.iter_batched( 68 | || sequence[query_distribution.sample(&mut rng)], 69 | |e| black_box(bad_ef_vec.rank(e)), 70 | BatchSize::SmallInput, 71 | ) 72 | }); 73 | drop(bad_ef_vec); 74 | 75 | group.bench_with_input(BenchmarkId::new("bin search", l), &l, |b, _| { 76 | b.iter_batched( 77 | || sequence[query_distribution.sample(&mut rng)], 78 | |e| black_box(sequence.partition_point(|&x| x <= e) - 1), 79 | BatchSize::SmallInput, 80 | ) 81 | }); 82 | } 83 | group.finish(); 84 | } 85 | 86 | criterion_group!(benches, bench_ef); 87 | criterion_main!(benches); 88 | -------------------------------------------------------------------------------- /benches/elias_fano_construction.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion}; 2 | use rand::distributions::Standard; 3 | use rand::{thread_rng, Rng}; 4 | 5 | use vers_vecs::EliasFanoVec; 6 | 7 | mod common; 8 | 9 | fn bench_ef(b: &mut Criterion) { 10 | let mut rng = thread_rng(); 11 | 12 | let mut group = b.benchmark_group("Elias-Fano: Construction"); 13 | 14 | for &l in common::SIZES[0..8].iter() { 15 | group.bench_with_input(BenchmarkId::new("construction", l), &l, |b, _| { 16 | b.iter_batched( 17 | || { 18 | let mut sequence = (&mut rng) 19 | .sample_iter(Standard) 20 | .take(l) 21 | .collect::>(); 22 | sequence.sort_unstable(); 23 | sequence 24 | }, 25 | |e| black_box(EliasFanoVec::from_slice(&e)), 26 | BatchSize::LargeInput, 27 | ) 28 | }); 29 | } 30 | group.finish(); 31 | } 32 | 33 | criterion_group!(benches, bench_ef); 34 | criterion_main!(benches); 35 | -------------------------------------------------------------------------------- /benches/elias_fano_iterator.rs: -------------------------------------------------------------------------------- 1 | use std::time::{Duration, Instant}; 2 | 3 | use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; 4 | use rand::distributions::Standard; 5 | use rand::{thread_rng, Rng}; 6 | 7 | use vers_vecs::EliasFanoVec; 8 | 9 | mod common; 10 | 11 | fn bench_ef(b: &mut Criterion) { 12 | let mut rng = thread_rng(); 13 | 14 | let mut group = b.benchmark_group("Elias-Fano: Iteration"); 15 | group.plot_config(common::plot_config()); 16 | 17 | for l in common::SIZES { 18 | let mut sequence = (&mut rng) 19 | .sample_iter(Standard) 20 | .take(l) 21 | .collect::>(); 22 | sequence.sort_unstable(); 23 | let ef_vec = EliasFanoVec::from_slice(&sequence); 24 | 25 | group.bench_with_input(BenchmarkId::new("manual indexing", l), &l, |b, _| { 26 | b.iter_custom(|iters| { 27 | let mut time = Duration::new(0, 0); 28 | let mut i = 0; 29 | 30 | let start = Instant::now(); 31 | while i < iters { 32 | black_box(ef_vec.get_unchecked(i as usize % l)); 33 | i += 1; 34 | } 35 | time += start.elapsed(); 36 | 37 | time 38 | }) 39 | }); 40 | 41 | group.bench_with_input(BenchmarkId::new("iterator", l), &l, |b, _| { 42 | b.iter_custom(|iters| { 43 | let mut time = Duration::new(0, 0); 44 | let mut i = 0; 45 | 46 | while i < iters { 47 | let start = Instant::now(); 48 | for e in ef_vec.iter().take((iters - i) as usize) { 49 | black_box(e); 50 | i += 1; 51 | } 52 | time += start.elapsed(); 53 | } 54 | 55 | time 56 | }) 57 | }); 58 | } 59 | group.finish(); 60 | } 61 | 62 | criterion_group!(benches, bench_ef); 63 | criterion_main!(benches); 64 | -------------------------------------------------------------------------------- /benches/rank.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion}; 2 | use rand::distributions::{Distribution, Uniform}; 3 | 4 | mod common; 5 | 6 | fn bench_rank(b: &mut Criterion) { 7 | let mut rng = rand::thread_rng(); 8 | 9 | let mut group = b.benchmark_group("Rank: Randomized Input"); 10 | group.plot_config(common::plot_config()); 11 | 12 | for l in common::SIZES { 13 | let bit_vec = common::construct_vers_vec(&mut rng, l); 14 | let sample = Uniform::new(0, bit_vec.len()); 15 | group.bench_with_input(BenchmarkId::new("rank", l), &l, |b, _| { 16 | b.iter_batched( 17 | || sample.sample(&mut rng), 18 | |e| black_box(bit_vec.rank0(e)), 19 | BatchSize::SmallInput, 20 | ) 21 | }); 22 | } 23 | group.finish(); 24 | } 25 | 26 | criterion_group!(benches, bench_rank); 27 | criterion_main!(benches); 28 | -------------------------------------------------------------------------------- /benches/rmq.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion}; 2 | use rand::distributions::{Distribution, Uniform}; 3 | use rand::Rng; 4 | use vers_vecs::rmq::fast_rmq::FastRmq; 5 | 6 | mod common; 7 | 8 | fn bench_rmq(b: &mut Criterion) { 9 | let mut rng = rand::thread_rng(); 10 | 11 | let mut group = b.benchmark_group("Range Minimum Query: Randomized Input"); 12 | group.plot_config(common::plot_config()); 13 | 14 | for l in common::SIZES { 15 | let rmq = FastRmq::from_vec(common::fill_random_vec(&mut rng, l)); 16 | let sample = Uniform::new(0, rmq.len()); 17 | group.bench_with_input(BenchmarkId::new("range_min", l), &l, |b, _| { 18 | b.iter_batched( 19 | || { 20 | let begin = sample.sample(&mut rng); 21 | let end = begin + rng.gen_range(0..rmq.len() - begin); 22 | (begin, end) 23 | }, 24 | |e| black_box(rmq.range_min(e.0, e.1)), 25 | BatchSize::SmallInput, 26 | ) 27 | }); 28 | } 29 | 30 | group.finish(); 31 | } 32 | 33 | criterion_group!(benches, bench_rmq); 34 | criterion_main!(benches); 35 | -------------------------------------------------------------------------------- /benches/select.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion}; 2 | use rand::distributions::{Distribution, Uniform}; 3 | 4 | mod common; 5 | 6 | fn bench_select(b: &mut Criterion) { 7 | let mut rng = rand::thread_rng(); 8 | 9 | let mut group = b.benchmark_group("Select: Randomized Input"); 10 | group.plot_config(common::plot_config()); 11 | 12 | for l in common::SIZES { 13 | let bit_vec = common::construct_vers_vec(&mut rng, l); 14 | let sample = Uniform::new(0, bit_vec.len() / 4); 15 | group.bench_with_input(BenchmarkId::new("select", l), &l, |b, _| { 16 | b.iter_batched( 17 | || sample.sample(&mut rng), 18 | |e| black_box(bit_vec.select0(e)), 19 | BatchSize::SmallInput, 20 | ) 21 | }); 22 | } 23 | group.finish(); 24 | } 25 | 26 | criterion_group!(benches, bench_select); 27 | criterion_main!(benches); 28 | -------------------------------------------------------------------------------- /benches/select_adversarial.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion}; 2 | use rand::distributions::Uniform; 3 | use rand::{thread_rng, Rng}; 4 | use vers_vecs::{BitVec, RsVec}; 5 | 6 | mod common; 7 | 8 | fn select_worst_case(b: &mut Criterion) { 9 | let mut rng = thread_rng(); 10 | let mut group = b.benchmark_group("Select: Adversarial Input"); 11 | group.plot_config(common::plot_config()); 12 | 13 | for order_of_magnitude in [14, 16, 18, 20, 22, 24, 26] { 14 | let length = 1 << order_of_magnitude; 15 | 16 | // uniformly distributed sequence 17 | let bit_vec = common::construct_vers_vec(&mut rng, length); 18 | let uniform_sample = Uniform::new( 19 | bit_vec.rank0(bit_vec.len()) / 4 * 3, 20 | bit_vec.rank0(bit_vec.len()), 21 | ); 22 | group.bench_with_input( 23 | BenchmarkId::new("uniform input", length), 24 | &length, 25 | |b, _| { 26 | b.iter_batched( 27 | || rng.sample(uniform_sample), 28 | |e| black_box(bit_vec.select1(e)), 29 | BatchSize::SmallInput, 30 | ) 31 | }, 32 | ); 33 | drop(bit_vec); 34 | 35 | // construct a vector with only one select block and put its last one bit at the end 36 | // of the vector 37 | 38 | let mut bit_vec = BitVec::with_capacity(length / 64); 39 | for _ in 0..(1usize << 13) / 64 - 1 { 40 | bit_vec.append_word(u64::MAX); 41 | } 42 | bit_vec.append_word(u64::MAX >> 1); 43 | 44 | for _ in 0..(length - (1 << 13)) / 64 - 1 { 45 | bit_vec.append_word(0); 46 | } 47 | bit_vec.append_word(2); 48 | let bit_vec = RsVec::from_bit_vec(bit_vec); 49 | 50 | group.bench_with_input( 51 | BenchmarkId::new("worst case input", length), 52 | &length, 53 | |b, _| b.iter(|| black_box(bit_vec.select1((1 << 13) - 1))), 54 | ); 55 | drop(bit_vec); 56 | } 57 | group.finish(); 58 | } 59 | 60 | criterion_group!(benches, select_worst_case); 61 | criterion_main!(benches); 62 | -------------------------------------------------------------------------------- /benches/select_iter.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; 2 | use std::time::{Duration, Instant}; 3 | 4 | mod common; 5 | 6 | fn bench_select_iter(b: &mut Criterion) { 7 | let mut rng = rand::thread_rng(); 8 | 9 | let mut group = b.benchmark_group("Select Iterator: Randomized Input"); 10 | group.plot_config(common::plot_config()); 11 | 12 | for l in common::SIZES { 13 | let bit_vec = common::construct_vers_vec(&mut rng, l); 14 | 15 | group.bench_with_input(BenchmarkId::new("select queries", l), &l, |b, _| { 16 | b.iter_custom(|iters| { 17 | let mut time = Duration::new(0, 0); 18 | let mut i = 0usize; 19 | let rank1 = bit_vec.rank1(bit_vec.len()); 20 | 21 | let start = Instant::now(); 22 | while (i as u64) < iters { 23 | black_box(bit_vec.select1(i % rank1)); 24 | i += 1; 25 | } 26 | time += start.elapsed(); 27 | 28 | time 29 | }) 30 | }); 31 | 32 | group.bench_with_input(BenchmarkId::new("select iterator", l), &l, |b, _| { 33 | b.iter_custom(|iters| { 34 | let mut time = Duration::new(0, 0); 35 | let mut i = 0; 36 | 37 | while i < iters { 38 | let iter = bit_vec.iter1().take((iters - i) as usize); 39 | let start = Instant::now(); 40 | for e in iter { 41 | black_box(e); 42 | i += 1; 43 | } 44 | time += start.elapsed(); 45 | } 46 | 47 | time 48 | }) 49 | }); 50 | 51 | #[cfg(all( 52 | feature = "simd", 53 | target_arch = "x86_64", 54 | target_feature = "avx", 55 | target_feature = "avx2", 56 | target_feature = "avx512f", 57 | target_feature = "avx512bw", 58 | ))] 59 | group.bench_with_input(BenchmarkId::new("bitset iterator", l), &l, |b, _| { 60 | b.iter_custom(|iters| { 61 | let mut time = Duration::new(0, 0); 62 | let mut i = 0; 63 | 64 | while i < iters { 65 | let iter = bit_vec.bit_set_iter1().take((iters - i) as usize); 66 | let start = Instant::now(); 67 | for e in iter { 68 | black_box(e); 69 | i += 1; 70 | } 71 | time += start.elapsed(); 72 | } 73 | 74 | time 75 | }) 76 | }); 77 | } 78 | group.finish(); 79 | } 80 | 81 | criterion_group!(benches, bench_select_iter); 82 | criterion_main!(benches); 83 | -------------------------------------------------------------------------------- /benches/sparse_equals.rs: -------------------------------------------------------------------------------- 1 | use criterion::measurement::{Measurement, ValueFormatter}; 2 | use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; 3 | use rand::rngs::ThreadRng; 4 | use rand::seq::index::sample; 5 | use rand::Rng; 6 | use std::time::Instant; 7 | use vers_vecs::{BitVec, RsVec}; 8 | 9 | mod common; 10 | 11 | pub const SIZES: [usize; 7] = [ 12 | 1 << 14, 13 | 1 << 16, 14 | 1 << 18, 15 | 1 << 20, 16 | 1 << 22, 17 | 1 << 24, 18 | 1 << 26, 19 | ]; 20 | 21 | /// How full the vector is filled with ones. 22 | const FILL_FACTORS: [f64; 6] = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5]; 23 | 24 | /// Generate a bitvector with `fill_factors` percent ones at random positions 25 | fn generate_vector_with_fill(rng: &mut ThreadRng, len: usize, fill_factor: f64) -> BitVec { 26 | let mut bit_vec1 = BitVec::from_zeros(len); 27 | 28 | // flip exactly fill-factor * len bits so the equality check is not trivial 29 | sample(rng, len, (fill_factor * len as f64) as usize) 30 | .iter() 31 | .for_each(|i| { 32 | bit_vec1.flip_bit(i); 33 | }); 34 | 35 | bit_vec1 36 | } 37 | 38 | fn bench(b: &mut Criterion) { 39 | let mut rng = rand::thread_rng(); 40 | 41 | for len in SIZES { 42 | let mut group = b.benchmark_group(format!("Equals Benchmark: {}", len)); 43 | group.plot_config(common::plot_config()); 44 | 45 | for fill_factor in FILL_FACTORS { 46 | group.bench_with_input( 47 | BenchmarkId::new("sparse overhead equal", fill_factor), 48 | &fill_factor, 49 | |b, _| { 50 | b.iter_custom(|iters| { 51 | let mut time_diff = TimeDiff.zero(); 52 | 53 | for _ in 0..iters { 54 | let vec = generate_vector_with_fill(&mut rng, len, fill_factor); 55 | let vec = RsVec::from_bit_vec(vec); 56 | 57 | let start_full = TimeDiff.start(); 58 | black_box(vec.full_equals(&vec)); 59 | time_diff -= TimeDiff.end(start_full); 60 | 61 | let start_sparse = TimeDiff.start(); 62 | black_box(vec.sparse_equals::(&vec)); 63 | time_diff += TimeDiff.end(start_sparse); 64 | } 65 | 66 | time_diff 67 | }); 68 | }, 69 | ); 70 | 71 | group.bench_with_input( 72 | BenchmarkId::new("sparse overhead unequal", fill_factor), 73 | &fill_factor, 74 | |b, _| { 75 | b.iter_custom(|iters| { 76 | let mut time_diff = TimeDiff.zero(); 77 | 78 | for _ in 0..iters { 79 | let vec = generate_vector_with_fill(&mut rng, len, fill_factor); 80 | let mut vec2 = vec.clone(); 81 | let vec = RsVec::from_bit_vec(vec); 82 | 83 | vec2.flip_bit(vec.select1(vec.rank1(len) - 1)); 84 | vec2.flip_bit(vec.select0(rng.gen_range(0..(vec.rank0(len) - 1)))); 85 | let vec2 = RsVec::from_bit_vec(vec2); 86 | 87 | let start_full = TimeDiff.start(); 88 | black_box(vec.full_equals(&vec2)); 89 | time_diff -= TimeDiff.end(start_full); 90 | 91 | let start_sparse = TimeDiff.start(); 92 | black_box(vec.sparse_equals::(&vec2)); 93 | time_diff += TimeDiff.end(start_sparse); 94 | } 95 | 96 | time_diff 97 | }); 98 | }, 99 | ); 100 | } 101 | 102 | group.finish(); 103 | } 104 | } 105 | 106 | /// Measurement for differential time measurements. 107 | struct TimeDiff; 108 | 109 | impl Measurement for TimeDiff { 110 | type Intermediate = Instant; 111 | type Value = isize; 112 | 113 | fn start(&self) -> Self::Intermediate { 114 | Instant::now() 115 | } 116 | 117 | fn end(&self, i: Self::Intermediate) -> Self::Value { 118 | i.elapsed().as_nanos() as isize 119 | } 120 | 121 | fn add(&self, v1: &Self::Value, v2: &Self::Value) -> Self::Value { 122 | v1 + v2 123 | } 124 | 125 | fn zero(&self) -> Self::Value { 126 | 0 127 | } 128 | 129 | fn to_f64(&self, value: &Self::Value) -> f64 { 130 | *value as f64 131 | } 132 | 133 | fn formatter(&self) -> &dyn ValueFormatter { 134 | &NanoSecondFormatter 135 | } 136 | } 137 | 138 | struct NanoSecondFormatter; 139 | 140 | impl ValueFormatter for NanoSecondFormatter { 141 | fn format_value(&self, value: f64) -> String { 142 | let absolute = value.abs(); 143 | if absolute < 1.0 { 144 | // ns = time in nanoseconds per iteration 145 | format!("{:.2} ps", value * 1e3) 146 | } else if absolute < 10f64.powi(3) { 147 | format!("{:.2} ns", value) 148 | } else if absolute < 10f64.powi(6) { 149 | format!("{:.2} us", value / 1e3) 150 | } else if absolute < 10f64.powi(9) { 151 | format!("{:.2} ms", value / 1e6) 152 | } else { 153 | format!("{:.2} s", value / 1e9) 154 | } 155 | } 156 | 157 | fn format_throughput(&self, _throughput: &Throughput, _value: f64) -> String { 158 | unimplemented!("throughput formatting not supported") 159 | } 160 | 161 | fn scale_values(&self, _typical_value: f64, _values: &mut [f64]) -> &'static str { 162 | "ns" 163 | } 164 | 165 | fn scale_throughputs( 166 | &self, 167 | _typical_value: f64, 168 | _throughput: &Throughput, 169 | _values: &mut [f64], 170 | ) -> &'static str { 171 | unimplemented!("throughput scaling not supported") 172 | } 173 | 174 | fn scale_for_machines(&self, _values: &mut [f64]) -> &'static str { 175 | "ns" 176 | } 177 | } 178 | 179 | fn differential_measuring() -> Criterion { 180 | Criterion::default().with_measurement(TimeDiff) 181 | } 182 | 183 | criterion_group! { 184 | name = benches; 185 | config=differential_measuring(); 186 | targets = bench 187 | } 188 | criterion_main!(benches); 189 | -------------------------------------------------------------------------------- /images/elias_fano_access_random.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | Elias-Fano: random-access: Comparison 4 | 5 | 6 | Average time (ms) 7 | 8 | 9 | Input 10 | 11 | 12 | 13 | 0.0001 14 | 15 | 16 | 17 | 0.001 18 | 19 | 20 | 21 | 0.01 22 | 23 | 24 | 25 | 0.1 26 | 27 | 28 | 29 | 1 30 | 31 | 32 | 33 | 10 34 | 35 | 36 | 37 | 100 38 | 39 | 40 | 41 | 42 | 1 000 43 | 44 | 45 | 46 | 10 000 47 | 48 | 49 | 50 | 100 000 51 | 52 | 53 | 54 | 1 000 000 55 | 56 | 57 | 58 | 10 000 000 59 | 60 | 61 | 62 | 100 000 000 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | cseq elias fano vector 115 | 116 | 117 | elias-fano vector 118 | 119 | 120 | sucds elias fano vector 121 | 122 | 123 | vers vector 124 | 125 | 126 | 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /images/elias_fano_pred_adversarial.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | Elias-Fano Predecessor: Adversarial Input: Comparison 4 | 5 | 6 | Average time (ms) 7 | 8 | 9 | Input 10 | 11 | 12 | 13 | 1e-5 14 | 15 | 16 | 17 | 0.0001 18 | 19 | 20 | 21 | 0.001 22 | 23 | 24 | 25 | 0.01 26 | 27 | 28 | 29 | 0.1 30 | 31 | 32 | 33 | 1 34 | 35 | 36 | 37 | 10 38 | 39 | 40 | 41 | 100 42 | 43 | 44 | 45 | 46 | 1 000 47 | 48 | 49 | 50 | 10 000 51 | 52 | 53 | 54 | 100 000 55 | 56 | 57 | 58 | 1 000 000 59 | 60 | 61 | 62 | 10 000 000 63 | 64 | 65 | 66 | 100 000 000 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | binary search 107 | 108 | 109 | sucds 110 | 111 | 112 | vers 113 | 114 | 115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /images/elias_fano_pred_random.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | Elias-Fano Predecessor: Random Input: Comparison 4 | 5 | 6 | Average time (ns) 7 | 8 | 9 | Input 10 | 11 | 12 | 13 | 10 14 | 15 | 16 | 17 | 100 18 | 19 | 20 | 21 | 22 | 1 000 23 | 24 | 25 | 26 | 10 000 27 | 28 | 29 | 30 | 100 000 31 | 32 | 33 | 34 | 1 000 000 35 | 36 | 37 | 38 | 10 000 000 39 | 40 | 41 | 42 | 100 000 000 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | binary search 83 | 84 | 85 | sucds 86 | 87 | 88 | vers 89 | 90 | 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /images/heap.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Heap Size comparison 5 | 6 | 7 | % Overhead 8 | 9 | 10 | vector size (bytes) 11 | 12 | 13 | 14 | 0 15 | 16 | 17 | 18 | 10 19 | 20 | 21 | 22 | 20 23 | 24 | 25 | 26 | 30 27 | 28 | 29 | 30 | 40 31 | 32 | 33 | 34 | 50 35 | 36 | 37 | 38 | 60 39 | 40 | 41 | 42 | 70 43 | 44 | 45 | 46 | 80 47 | 48 | 49 | 50 | 90 51 | 52 | 53 | 54 | 100 55 | 56 | 57 | 58 | 59 | 10000 60 | 61 | 62 | 63 | 100000 64 | 65 | 66 | 67 | 1000000 68 | 69 | 70 | 71 | 10000000 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | Vers: 4.68% 83 | 84 | 85 | RsDict: 40.62% 86 | 87 | 88 | SucdR9: 28.12% 89 | 90 | 91 | SucdDa: 53.12% 92 | 93 | 94 | Rank9: 25% 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /images/rmq_comparison.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | RMQ: Randomized Input: Comparison 4 | 5 | 6 | Average time (ns) 7 | 8 | 9 | Input: Number of Elements 10 | 11 | 12 | 13 | 10 14 | 15 | 16 | 17 | 100 18 | 19 | 20 | 21 | 22 | 1 000 23 | 24 | 25 | 26 | 10 000 27 | 28 | 29 | 30 | 100 000 31 | 32 | 33 | 34 | 1 000 000 35 | 36 | 37 | 38 | 10 000 000 39 | 40 | 41 | 42 | 100 000 000 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | crates rmq 93 | 94 | 95 | librualg 96 | 97 | 98 | vers binary rmq 99 | 100 | 101 | vers fast rmq 102 | 103 | 104 | 105 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Vers - Very Efficient Rank and Select 2 | 3 | [![crates.io](https://img.shields.io/crates/v/vers-vecs.svg)](https://crates.io/crates/vers-vecs) 4 | [![rust](https://github.com/cydhra/vers/actions/workflows/rust.yml/badge.svg)](https://github.com/Cydhra/vers) 5 | [![docs](https://docs.rs/vers-vecs/badge.svg)](https://docs.rs/vers-vecs) 6 | 7 | Vers (vers-vecs on crates.io) 8 | contains pure-Rust implementations of several data structures backed by rank and select operations. 9 | When using this library, it is strongly recommended to enable the `BMI2` and `popcnt` features for x86_64 CPUs 10 | or compile with the `target-cpu=native` flag, 11 | since the intrinsics speed up both `rank` and `select` operations by a factor of 2-3. 12 | 13 | ## Data Structures 14 | - A fully-featured bit vector with no memory overhead. 15 | - A succinct bit vector supporting fast rank and select queries. 16 | - An Elias-Fano encoding of monotone sequences supporting constant-time predecessor/successor queries. 17 | - Two Range Minimum Query vector structures for constant-time range minimum queries. 18 | - A Wavelet Matrix supporting `O(k)` rank, select, statistical, predecessor, and successor queries. 19 | - A succinct tree structure (BP Tree) supporting level-ordered and depth-first-ordered tree navigation and subtree queries. 20 | 21 | ## Why Vers? 22 | - Vers is among the fastest publicly available bit vector implementations for rank and select operations. 23 | - Vers has a substantially lower memory overhead than its competitors. 24 | - Without crate features, all data structures are implemented in pure Rust and have no dependencies outside the standard library. 25 | - Every functionality is extensively documented. 26 | - Vers aims to provide more functionality for its data structures than competitors 27 | (e.g., Elias-Fano sequences and the Wavelet Matrix support predecessor and successor queries, 28 | the Wavelet Matrix supports statistical queries, all data structures implement various iterators, etc.). 29 | 30 | ## Crate Features 31 | - `simd`: Enables the use of SIMD instructions for rank and select operations. 32 | This feature requires AVX-512 support and uses unsafe code. 33 | It also enables a special iterator for the rank/select bit vector that uses vectorized operations. 34 | The feature only works on nightly Rust. 35 | Enabling it on stable Rust is a no-op, because the required CPU features are not available there. 36 | - `serde`: Enables serialization and deserialization of the data structures using the `serde` crate. 37 | - `u16_lookup` Enables a larger lookup table for BP tree queries. The larger table requires 128 KiB instead of 4 KiB. 38 | 39 | ## Benchmarks 40 | I benchmarked the implementations against publicly available implementations of the same data structures. 41 | The benchmarking code is available in the [vers-benchmarks](https://github.com/Cydhra/vers_benchmarks) repository. 42 | The benchmark uses the `simd` feature of rsdict, which requires nightly Rust. 43 | 44 | I performed the benchmarks on a Ryzen 9 7950X with 32GB of RAM. 45 | Some of the results are shown below. 46 | All benchmarks were run with the `target-cpu=native` flag enabled, and the `simd` feature enabled for Vers. 47 | More results can be found in the benchmark repository. 48 | 49 | Benchmarks for the Wavelet Matrix are still missing because I want to improve the benchmarking code before I do them. 50 | Because Wavelet Matrices have very little room for engineering, there aren't any surprising results to be expected, though. 51 | The performance solely depends on the bit vector implementation, so the results will be similar to the bit vector benchmarks. 52 | The only exception is the [qwt](https://crates.io/crates/qwt) crate, which uses quad vectors instead, 53 | and is substantially faster than any other crate due to the reduced number of cache misses. 54 | 55 | ### Bit-Vector 56 | #### Rank & Select 57 | The bit vector implementation is among the fastest publicly available implementations for rank and select operations. 58 | Note that the `succinct` crate substantially outperforms Vers' `rank` operation but does not provide an efficient select operation. 59 | 60 | The x-axis is the number of bits in the bit vector. 61 | An increase in all runtimes can be observed for input sizes exceeding the L2 cache size (16 MB). 62 | 63 | | Legend | Crate | Notes | 64 | |-------------------|-----------------------------------------|-------------------------------------| 65 | | bio | https://crates.io/crates/bio | with adaptive block-size | 66 | | fair bio | https://crates.io/crates/bio | with constant block-size | 67 | | fid | https://crates.io/crates/fid | | 68 | | indexed bitvector | https://crates.io/crates/indexed_bitvec | | 69 | | rank9 | https://crates.io/crates/succinct | Fastest of multiple implementations | 70 | | rsdict | https://crates.io/crates/rsdict | | 71 | | vers | https://github.com/Cydhra/vers | | 72 | | sucds-rank9 | https://crates.io/crates/sucds | | 73 | | sucds-darray | https://crates.io/crates/sucds | Dense Set Implementation | 74 | | bitm | https://crates.io/crates/bitm | | 75 | 76 | ![Bit-Vector Rank Benchmark](images/rank_comparison.svg) 77 | ![Bit-Vector Select Benchmark](images/select_comparison.svg) 78 | 79 | #### Heap Size 80 | 81 | The memory overhead of the bit vector implementation is significantly lower than that of other implementations. 82 | The x-axis is the number of bits in the bit vector, 83 | the y-axis is the additional overhead in percent compared to the size of the bit vector. 84 | Only the fastest competitors are shown, to make the graph more readable 85 | (I would like to add the bio crate data structure as well, since it is the only truly succinct one, 86 | but it does not offer an operation to measure the heap size. 87 | The same is true for the `bitm` crate, which claims to have a lower memory overhead compared to `Vers`, 88 | but does not offer a convenient way of measuring it). 89 | Vers achieves its high speeds with significantly less memory overhead, as can be seen in the heap size benchmark. 90 | The legend contains the measurement for the biggest input size, 91 | because I assume that the overhead approaches a constant value for large inputs. 92 | 93 | ![Bit-Vector Heap Size Benchmark](images/heap.svg) 94 | 95 | ### Elias-Fano 96 | The benchmark compares the access times for random elements in the sequence. 97 | The x-axis is the number of elements in the sequence. 98 | Note, that the elias-fano crate is inefficient with random order access. 99 | In-order access benchmarks can be found in the benchmark repository. 100 | 101 | ![Elias-Fano Randomized](images/elias_fano_access_random.svg) 102 | 103 | The following two benchmarks show the predecessor query times for average element distribution and the 104 | worst-case element distribution. 105 | Note that Vers worst-case query times are logarithmic, while `sucds` has linear worst-case query times. 106 | 107 | ![Elias-Fano Worst Case](images/elias_fano_pred_random.svg) 108 | ![Elias-Fano Worst Case](images/elias_fano_pred_adversarial.svg) 109 | 110 | ### Range Minimum Query 111 | The Range Minimum Query implementations are compared against the 112 | [range_minimum_query](https://crates.io/crates/range_minimum_query) and 113 | [librualg](https://crates.io/crates/librualg) crate. 114 | Vers outperforms both crates by a significant margin with both implementations. 115 | An increase in runtime can be observed for input sizes exceeding the L3 cache size (64 MB). 116 | The increase is earlier for the `BinaryRMQ` implementation, because it has a substantially higher memory overhead. 117 | For the same reason, the final two measurements for the `BinaryRMQ` implementation are missing (the data structure 118 | exceeded the available 32 GB main memory). 119 | 120 | (Yes, the naming of both implementations is unfortunate, but they will stay until I do a major version bump.) 121 | 122 | ![RMQ Comparison](images/rmq_comparison.svg) 123 | 124 | # Intrinsics 125 | This crate uses compiler intrinsics for bit manipulation. The intrinsics are supported by 126 | all modern x86_64 CPUs, but not by other architectures. 127 | There are fallback implementations if the intrinsics are not available, but they are significantly slower. 128 | Using this library on `x86` CPUs without enabling `BMI2` and `popcnt` target features is not recommended. 129 | 130 | The intrinsics in question are `popcnt` (supported since SSE4.2 resp. SSE4a on AMD, 2007-2008), 131 | `pdep` (supported with BMI2 since Intel Haswell resp. AMD Excavator, in hardware since AMD Zen 3, 2011-2013), 132 | and `tzcnt` (supported with BMI1 since Intel Haswell resp. AMD Jaguar, ca. 2013). 133 | 134 | ## Safety 135 | This crate uses no unsafe code, with the only exception being compiler intrinsic for `pdep`. 136 | The intrinsics cannot fail with the provided inputs (provided they are 137 | supported by the target machine), so even if they were to be implemented incorrectly, no 138 | memory corruption can occur (only incorrect results). 139 | 140 | Unsafe code is hidden behind public API. 141 | 142 | ## Dependencies 143 | The library has no dependencies outside the Rust standard library by default. 144 | It has a plethora of dependencies for benchmarking purposes, but these are not required for normal use. 145 | Optionally, the `serde` feature can be enabled to allow serialization and deserialization of the data structures, 146 | which requires the `serde` crate and its `derive` feature. 147 | 148 | ## License 149 | Licensed under either of 150 | 151 | * Apache License, Version 2.0 152 | ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) 153 | * MIT license 154 | ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) 155 | 156 | at your option. 157 | 158 | This project includes code developed by [Gonzalo Brito Gadeschi](https://github.com/gnzlbg/bitintr) 159 | originally licensed under the MIT license. 160 | It is redistributed under the above dual license. 161 | 162 | ## Contribution 163 | Unless you explicitly state otherwise, any contribution intentionally submitted 164 | for inclusion in the work by you, as defined in the Apache-2.0 license, shall be 165 | dual licensed as above, without any additional terms or conditions. 166 | -------------------------------------------------------------------------------- /src/bit_vec/fast_rs_vec/bitset.rs: -------------------------------------------------------------------------------- 1 | //! Module that contains the bitset iterator over a `RsVec`. 2 | //! The iterator does the same as the `iter1`/`iter0` methods of `RsVec`, but it is faster for dense vectors. 3 | //! It only exists with the `simd` feature enabled, and since it is slower for sparse vectors, 4 | //! it is not used as a replacement for the `iter1`/`iter0` methods. 5 | 6 | use crate::RsVec; 7 | use std::mem::size_of; 8 | 9 | /// The number of bits in a RsVec that can be processed by AVX instructions at once. 10 | const VECTOR_SIZE: usize = 16; 11 | 12 | // add iterator functions to RsVec 13 | impl RsVec { 14 | /// Get an iterator over the 0-bits in the vector. 15 | /// The iterator returns the indices of the 0-bits in the vector, just as [`iter0`] 16 | /// and [`select0`] do. 17 | /// 18 | /// This method is faster than [`iter0`] for dense vectors, but slower for sparse vectors. 19 | /// 20 | /// See [`BitSetIter`] for more information. 21 | /// 22 | /// [`iter0`]: RsVec::iter0 23 | /// [`select0`]: RsVec::select0 24 | /// [`BitSetIter`]: BitSetIter 25 | #[must_use] 26 | pub fn bit_set_iter0(&self) -> BitSetIter<'_, true> { 27 | BitSetIter::new(self) 28 | } 29 | 30 | /// Get an iterator over the 1-bits in the vector. 31 | /// The iterator returns the indices of the 1-bits in the vector, just as [`iter1`] 32 | /// and [`select1`] do. 33 | /// 34 | /// This method is faster than [`iter1`] for dense vectors, but slower for sparse vectors. 35 | /// 36 | /// See [`BitSetIter`] for more information. 37 | /// 38 | /// [`iter1`]: RsVec::iter1 39 | /// [`select1`]: RsVec::select1 40 | /// [`BitSetIter`]: BitSetIter 41 | #[must_use] 42 | pub fn bit_set_iter1(&self) -> BitSetIter<'_, false> { 43 | BitSetIter::new(self) 44 | } 45 | } 46 | 47 | /// An iterator that iterates over 1-bits or 0-bits and returns their indices. 48 | /// It uses AVX vector instructions to process 16 bits at once. 49 | /// It is faster than [`SelectIter`] for dense vectors. 50 | /// 51 | /// This is also faster than manually calling `select` on each rank, 52 | /// because the select data structures are not parsed by this iterator. 53 | /// 54 | /// The iterator can be constructed by calling [`bit_set_iter0`] or [`bit_set_iter1`]. 55 | /// 56 | /// # Example 57 | /// ```rust 58 | /// use vers_vecs::{BitVec, RsVec}; 59 | /// 60 | /// let mut bit_vec = BitVec::new(); 61 | /// bit_vec.append_word(u64::MAX); 62 | /// bit_vec.append_word(u64::MAX); 63 | /// bit_vec.flip_bit(4); 64 | /// 65 | /// let rs_vec = RsVec::from_bit_vec(bit_vec); 66 | /// 67 | /// let mut iter = rs_vec.bit_set_iter0(); 68 | /// 69 | /// assert_eq!(iter.next(), Some(4)); 70 | /// assert_eq!(iter.next(), None); 71 | /// ``` 72 | /// 73 | /// [`bit_set_iter0`]: RsVec::bit_set_iter0 74 | /// [`bit_set_iter1`]: RsVec::bit_set_iter1 75 | /// [`SelectIter`]: super::SelectIter 76 | pub struct BitSetIter<'a, const ZERO: bool> { 77 | vec: &'a RsVec, 78 | base: usize, 79 | offsets: [u32; VECTOR_SIZE], 80 | content_len: u8, 81 | cursor: u8, 82 | } 83 | 84 | impl<'a, const ZERO: bool> BitSetIter<'a, ZERO> { 85 | pub(super) fn new(vec: &'a RsVec) -> Self { 86 | let mut iter = Self { 87 | vec, 88 | base: 0, 89 | offsets: [0; VECTOR_SIZE], 90 | content_len: 0, 91 | cursor: 0, 92 | }; 93 | 94 | if vec.len() > VECTOR_SIZE { 95 | iter.load_chunk(vec.get_bits_unchecked(0, VECTOR_SIZE) as u16); 96 | } 97 | 98 | iter 99 | } 100 | 101 | fn load_chunk(&mut self, data: u16) { 102 | use std::arch::x86_64::{__mmask16, _mm512_mask_compressstoreu_epi32, _mm512_setr_epi32}; 103 | 104 | unsafe { 105 | let offsets = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 106 | assert!(VECTOR_SIZE <= size_of::() * 8, "change data types"); 107 | let mut mask = __mmask16::from(data); 108 | if ZERO { 109 | mask = !mask; 110 | } 111 | _mm512_mask_compressstoreu_epi32(self.offsets.as_mut_ptr() as *mut _, mask, offsets); 112 | self.content_len = mask.count_ones() as u8; 113 | self.cursor = 0; 114 | } 115 | } 116 | 117 | fn load_next_chunk(&mut self) -> Option<()> { 118 | while self.cursor == self.content_len { 119 | if self.base + VECTOR_SIZE >= self.vec.len() { 120 | return None; 121 | } 122 | 123 | self.base += VECTOR_SIZE; 124 | let data = self.vec.get_bits_unchecked(self.base, VECTOR_SIZE) as u16; 125 | self.load_chunk(data); 126 | } 127 | Some(()) 128 | } 129 | } 130 | 131 | impl Iterator for BitSetIter<'_, ZERO> { 132 | type Item = usize; 133 | 134 | fn next(&mut self) -> Option { 135 | if self.base >= self.vec.len() { 136 | return None; 137 | } 138 | 139 | if self.cursor == self.content_len { 140 | if self.load_next_chunk().is_none() { 141 | if ZERO { 142 | while self.base < self.vec.len() && self.vec.get_unchecked(self.base) != 0 { 143 | self.base += 1; 144 | } 145 | } else { 146 | while self.base < self.vec.len() && self.vec.get_unchecked(self.base) != 1 { 147 | self.base += 1; 148 | } 149 | } 150 | 151 | return if self.base < self.vec.len() { 152 | self.base += 1; 153 | Some(self.base - 1) 154 | } else { 155 | None 156 | }; 157 | } 158 | } 159 | 160 | let offset = self.offsets[self.cursor as usize]; 161 | self.cursor += 1; 162 | Some(self.base + offset as usize) 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /src/bit_vec/mask.rs: -------------------------------------------------------------------------------- 1 | //! This module defines a struct for lazily masking [`BitVec`]. It offers all immutable operations 2 | //! of `BitVec` but applies a bit-mask during the operation. The struct is created through 3 | //! [`BitVec::mask_xor`], [`BitVec::mask_and`], [`BitVec::mask_or`], or [`BitVec::mask_custom`]. 4 | 5 | use super::WORD_SIZE; 6 | use crate::BitVec; 7 | 8 | /// A bit vector that is masked with another bit vector via a masking function. Offers the same 9 | /// functions as an unmasked vector. The mask is applied lazily. 10 | #[derive(Debug, Clone)] 11 | pub struct MaskedBitVec<'a, 'b, F: Fn(u64, u64) -> u64> { 12 | vec: &'a BitVec, 13 | mask: &'b BitVec, 14 | bin_op: F, 15 | } 16 | 17 | impl<'a, 'b, F> MaskedBitVec<'a, 'b, F> 18 | where 19 | F: Fn(u64, u64) -> u64, 20 | { 21 | #[inline] 22 | pub(crate) fn new(vec: &'a BitVec, mask: &'b BitVec, bin_op: F) -> Result { 23 | if vec.len != mask.len { 24 | return Err(String::from( 25 | "mask cannot have different length than vector", 26 | )); 27 | } 28 | 29 | Ok(MaskedBitVec { vec, mask, bin_op }) 30 | } 31 | 32 | /// Iterate over the limbs of the masked vector 33 | #[inline] 34 | fn iter_limbs<'s>(&'s self) -> impl Iterator + 's 35 | where 36 | 'a: 's, 37 | 'b: 's, 38 | { 39 | self.vec 40 | .data 41 | .iter() 42 | .zip(&self.mask.data) 43 | .map(|(&a, &b)| (self.bin_op)(a, b)) 44 | } 45 | 46 | /// Return the bit at the given position. 47 | /// The bit takes the least significant bit of the returned u64 word. 48 | /// If the position is larger than the length of the vector, None is returned. 49 | #[inline] 50 | #[must_use] 51 | pub fn get(&self, pos: usize) -> Option { 52 | if pos >= self.vec.len { 53 | None 54 | } else { 55 | Some(self.get_unchecked(pos)) 56 | } 57 | } 58 | 59 | /// Return the bit at the given position. 60 | /// The bit takes the least significant bit of the returned u64 word. 61 | /// 62 | /// # Panics 63 | /// If the position is larger than the length of the vector, 64 | /// the function will either return unpredictable data, or panic. 65 | /// Use [`get`] to properly handle this case with an `Option`. 66 | /// 67 | /// [`get`]: MaskedBitVec::get 68 | #[inline] 69 | #[must_use] 70 | pub fn get_unchecked(&self, pos: usize) -> u64 { 71 | ((self.bin_op)( 72 | self.vec.data[pos / WORD_SIZE], 73 | self.mask.data[pos / WORD_SIZE], 74 | ) >> (pos % WORD_SIZE)) 75 | & 1 76 | } 77 | 78 | /// Return whether the bit at the given position is set. 79 | /// If the position is larger than the length of the vector, None is returned. 80 | #[inline] 81 | #[must_use] 82 | pub fn is_bit_set(&self, pos: usize) -> Option { 83 | if pos >= self.vec.len { 84 | None 85 | } else { 86 | Some(self.is_bit_set_unchecked(pos)) 87 | } 88 | } 89 | 90 | /// Return whether the bit at the given position is set. 91 | /// 92 | /// # Panics 93 | /// If the position is larger than the length of the vector, 94 | /// the function will either return unpredictable data, or panic. 95 | /// Use [`is_bit_set`] to properly handle this case with an `Option`. 96 | /// 97 | /// [`is_bit_set`]: MaskedBitVec::is_bit_set 98 | #[inline] 99 | #[must_use] 100 | pub fn is_bit_set_unchecked(&self, pos: usize) -> bool { 101 | self.get_unchecked(pos) != 0 102 | } 103 | 104 | /// Return multiple bits at the given position. The number of bits to return is given by `len`. 105 | /// At most 64 bits can be returned. 106 | /// If the position at the end of the query is larger than the length of the vector, 107 | /// None is returned (even if the query partially overlaps with the vector). 108 | /// If the length of the query is larger than 64, None is returned. 109 | #[inline] 110 | #[must_use] 111 | pub fn get_bits(&self, pos: usize, len: usize) -> Option { 112 | if len > WORD_SIZE || len == 0 { 113 | return None; 114 | } 115 | if pos + len > self.vec.len { 116 | None 117 | } else { 118 | Some(self.get_bits_unchecked(pos, len)) 119 | } 120 | } 121 | 122 | /// Return multiple bits at the given position. The number of bits to return is given by `len`. 123 | /// At most 64 bits can be returned. 124 | /// 125 | /// This function is always inlined, because it gains a lot from loop optimization and 126 | /// can utilize the processor pre-fetcher better if it is. 127 | /// 128 | /// # Errors 129 | /// If the length of the query is larger than 64, unpredictable data will be returned. 130 | /// Use [`get_bits`] to avoid this. 131 | /// 132 | /// # Panics 133 | /// If the position or interval is larger than the length of the vector, 134 | /// the function will either return any valid results padded with unpredictable 135 | /// data or panic. 136 | /// 137 | /// [`get_bits`]: MaskedBitVec::get_bits 138 | #[must_use] 139 | #[allow(clippy::inline_always)] 140 | #[allow(clippy::comparison_chain)] // rust-clippy #5354 141 | #[inline] 142 | pub fn get_bits_unchecked(&self, pos: usize, len: usize) -> u64 { 143 | debug_assert!(len <= WORD_SIZE); 144 | let partial_word = (self.bin_op)( 145 | self.vec.data[pos / WORD_SIZE], 146 | self.mask.data[pos / WORD_SIZE], 147 | ) >> (pos % WORD_SIZE); 148 | 149 | if pos % WORD_SIZE + len == WORD_SIZE { 150 | partial_word 151 | } else if pos % WORD_SIZE + len < WORD_SIZE { 152 | partial_word & ((1 << (len % WORD_SIZE)) - 1) 153 | } else { 154 | let next_half = (self.bin_op)( 155 | self.vec.data[pos / WORD_SIZE + 1], 156 | self.mask.data[pos / WORD_SIZE + 1], 157 | ) << (WORD_SIZE - pos % WORD_SIZE); 158 | 159 | (partial_word | next_half) & ((1 << (len % WORD_SIZE)) - 1) 160 | } 161 | } 162 | 163 | /// Return the number of zeros in the masked bit vector. 164 | /// This method calls [`count_ones`]. 165 | /// 166 | /// [`count_ones`]: MaskedBitVec::count_ones 167 | #[inline] 168 | #[must_use] 169 | pub fn count_zeros(&self) -> u64 { 170 | self.vec.len as u64 - self.count_ones() 171 | } 172 | 173 | /// Return the number of ones in the masked bit vector. 174 | #[inline] 175 | #[must_use] 176 | #[allow(clippy::missing_panics_doc)] // can't panic because of bounds check 177 | pub fn count_ones(&self) -> u64 { 178 | let mut ones = self 179 | .iter_limbs() 180 | .take(self.vec.len / WORD_SIZE) 181 | .map(|limb| u64::from(limb.count_ones())) 182 | .sum(); 183 | if self.vec.len % WORD_SIZE > 0 { 184 | ones += u64::from( 185 | ((self.bin_op)( 186 | *self.vec.data.last().unwrap(), 187 | *self.mask.data.last().unwrap(), 188 | ) & ((1 << (self.vec.len % WORD_SIZE)) - 1)) 189 | .count_ones(), 190 | ); 191 | } 192 | ones 193 | } 194 | 195 | /// Collect the masked [`BitVec`] into a new `BitVec` by applying the mask to all bits. 196 | #[inline] 197 | #[must_use] 198 | pub fn to_bit_vec(&self) -> BitVec { 199 | BitVec { 200 | data: self.iter_limbs().collect(), 201 | len: self.vec.len, 202 | } 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /src/bit_vec/sparse.rs: -------------------------------------------------------------------------------- 1 | //! A sparse bit vector with `rank1`, `rank0` and `select1` support. 2 | //! The vector requires `O(n log u/n) + 2n + o(n)` bits of space, where `n` is the number of bits in the vector 3 | //! and `u` is the number of 1-bits. 4 | //! The vector is constructed from a sorted list of indices of 1-bits, or from an existing 5 | //! [`BitVec`](crate::BitVec). 6 | 7 | use crate::{BitVec, EliasFanoVec}; 8 | 9 | /// A succinct representation of a sparse vector with rank and select support. 10 | /// It is a thin wrapper around an [`EliasFanoVec`] that compresses the indices of 1-bits. 11 | /// 12 | /// Therefore, no `select0` function is provided. 13 | /// However, the constructor [`from_bitvec_inverted`] can be used to cheaply invert the input `BitVec`, 14 | /// reversing the roles of 1-bits and 0-bits. 15 | /// 16 | /// # Examples 17 | /// ``` 18 | /// use vers_vecs::SparseRSVec; 19 | /// 20 | /// let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12); 21 | /// assert_eq!(sparse.get(5), Some(1)); 22 | /// assert_eq!(sparse.get(11), Some(0)); 23 | /// assert_eq!(sparse.get(12), None); 24 | /// 25 | /// assert_eq!(sparse.rank1(5), 2); 26 | /// assert_eq!(sparse.select1(2), 5); 27 | /// ``` 28 | /// 29 | /// It cn also be constructed from a `BitVec` directly: 30 | /// ``` 31 | /// use vers_vecs::SparseRSVec; 32 | /// use vers_vecs::BitVec; 33 | /// 34 | /// let mut bv = BitVec::from_zeros(12); 35 | /// bv.flip_bit(6); 36 | /// bv.flip_bit(7); 37 | /// 38 | /// let sparse = SparseRSVec::from_bitvec(&bv); 39 | /// assert_eq!(sparse.rank1(5), 0); 40 | /// assert_eq!(sparse.select1(0), 6); 41 | /// ``` 42 | /// 43 | /// [`EliasFanoVec`]: struct.EliasFanoVec.html 44 | /// [`from_bitvec_inverted`]: #method.from_bitvec_inverted 45 | #[derive(Debug, Clone)] 46 | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 47 | pub struct SparseRSVec { 48 | vec: EliasFanoVec, 49 | len: u64, 50 | } 51 | 52 | impl SparseRSVec { 53 | /// Creates a new `SparseRSVec` from a sequence of set bits represented as indices. 54 | /// The input must be sorted in ascending order and free of duplicates. 55 | /// 56 | /// The length of the vector must be passed as well, as it cannot be inferred from the input, 57 | /// if the last bit in the vector is not set. 58 | /// 59 | /// # Parameters 60 | /// - `input`: The positions of set bits, or unset bits if the sparse vector should compress 61 | /// zeros. 62 | /// - `len`: The length of the vector, which is needed if the last bit is not in the input slice. 63 | #[must_use] 64 | pub fn new(input: &[u64], len: u64) -> Self { 65 | debug_assert!(input.is_sorted(), "input must be sorted"); 66 | debug_assert!( 67 | input.windows(2).all(|w| w[0] != w[1]), 68 | "input must be free of duplicates" 69 | ); 70 | 71 | Self { 72 | vec: EliasFanoVec::from_slice(input), 73 | len, 74 | } 75 | } 76 | 77 | /// Creates a new `SparseRSVec` from a `BitVec`, by compressing the sparse 1-bits. 78 | /// 79 | /// # Parameters 80 | /// - `input`: The input `BitVec` to compress. 81 | #[must_use] 82 | pub fn from_bitvec(input: &BitVec) -> Self { 83 | let len = input.len() as u64; 84 | Self::new( 85 | input 86 | .iter() 87 | .enumerate() 88 | .filter(|&(_, bit)| bit == 1) 89 | .map(|(i, _)| i as u64) 90 | .collect::>() 91 | .as_slice(), 92 | len, 93 | ) 94 | } 95 | 96 | /// Creates a new `SparseRSVec` from a `BitVec`. 97 | /// However, before compressing the 1-bits, the input is inverted. 98 | /// This means that the sparse vector will compress the 0-bits instead of the 1-bits, 99 | /// and the [`rank1`] and [`select1`] functions will return the number of 0-bits and the position of 0-bits. 100 | /// 101 | /// This is a convenience function to allow for easy creation of sparse vectors that compress 102 | /// zeros, despite the lack of a `select0` function. 103 | /// 104 | /// However, do note that [`get`] will return the inverted value of the bit at position `i` from 105 | /// the original `BitVec`. 106 | /// 107 | /// # Parameters 108 | /// - `input`: The input `BitVec` to compress. 109 | /// 110 | /// # Example 111 | /// ``` 112 | /// use vers_vecs::SparseRSVec; 113 | /// use vers_vecs::BitVec; 114 | /// 115 | /// let mut bv = BitVec::from_ones(12); 116 | /// // set 6 and 7 to 0 117 | /// bv.flip_bit(6); 118 | /// bv.flip_bit(7); 119 | /// 120 | /// let sparse = SparseRSVec::from_bitvec_inverted(&bv); 121 | /// // now select1 gives the position of 0-bits 122 | /// assert_eq!(sparse.select1(1), 7); 123 | /// ``` 124 | /// 125 | /// [`rank1`]: #method.rank1 126 | /// [`select1`]: #method.select1 127 | /// [`get`]: #method.get 128 | #[must_use] 129 | pub fn from_bitvec_inverted(input: &BitVec) -> Self { 130 | let len = input.len() as u64; 131 | Self::new( 132 | input 133 | .iter() 134 | .enumerate() 135 | .filter(|&(_, bit)| bit == 0) 136 | .map(|(i, _)| i as u64) 137 | .collect::>() 138 | .as_slice(), 139 | len, 140 | ) 141 | } 142 | 143 | /// Returns true if the bit at position `i` is set. 144 | /// 145 | /// If `i` is out of bounds the function produces incorrect results. 146 | /// Use [`is_set`] for a checked version. 147 | /// 148 | /// [`is_set`]: #method.is_set 149 | #[must_use] 150 | pub fn is_set_unchecked(&self, i: u64) -> bool { 151 | self.vec.predecessor_unchecked(i) == i 152 | } 153 | 154 | /// Returns true if the bit at position `i` is set. 155 | /// 156 | /// Returns `None` if `i` is out of bounds. 157 | #[must_use] 158 | pub fn is_set(&self, i: u64) -> Option { 159 | if i >= self.len { 160 | None 161 | } else { 162 | // if the predecessor is None, the bit is left of the first 1-bit 163 | Some(self.vec.predecessor(i).is_some_and(|p| p == i)) 164 | } 165 | } 166 | 167 | /// Gets the bit at position `i`. 168 | /// Returns 1 if the bit is set, 0 if it is not set. 169 | /// 170 | /// # Panics 171 | /// If `i` is out of bounds the function might panic or produce incorrect results. 172 | /// Use [`get`] for a checked version. 173 | #[must_use] 174 | pub fn get_unchecked(&self, i: u64) -> u64 { 175 | self.is_set_unchecked(i).into() 176 | } 177 | 178 | /// Gets the bit at position `i`. 179 | /// Returns `Some(1)` if the bit is set, `Some(0)` if it is not set, and `None` if `i` is out of bounds. 180 | #[must_use] 181 | pub fn get(&self, i: u64) -> Option { 182 | self.is_set(i).map(std::convert::Into::into) 183 | } 184 | 185 | /// Return the position of the 1-bit with the given rank. 186 | /// The following holds for all `pos` with 1-bits: 187 | /// ``select1(rank1(pos)) == pos`` 188 | /// 189 | /// If the rank is larger than the number of sparse bits in the vector, the vector length is returned. 190 | #[must_use] 191 | pub fn select1(&self, i: usize) -> u64 { 192 | self.vec.get(i).unwrap_or(self.len) 193 | } 194 | 195 | /// Returns the number of 1-bits in the vector up to position `i`. 196 | /// 197 | /// If `i` is out of bounds, the number of 1-bits in the vector is returned. 198 | #[must_use] 199 | pub fn rank1(&self, i: u64) -> u64 { 200 | self.vec.rank(i) 201 | } 202 | 203 | /// Returns the number of 0-bits in the vector up to position `i`. 204 | /// 205 | /// If `i` is out of bounds, the number of 0-bits in the vector is returned. 206 | #[must_use] 207 | pub fn rank0(&self, i: u64) -> u64 { 208 | if i >= self.len { 209 | self.len - self.vec.rank(self.len) 210 | } else { 211 | i - self.vec.rank(i) 212 | } 213 | } 214 | 215 | /// Returns an iterator over the 1-bits in the vector. 216 | /// The iterator yields the positions of the 1-bits in ascending order. 217 | pub fn iter1(&self) -> impl Iterator + '_ { 218 | self.vec.iter() 219 | } 220 | 221 | /// Returns the length of the bit vector if it was uncompressed. 222 | #[must_use] 223 | pub fn len(&self) -> u64 { 224 | self.len 225 | } 226 | 227 | /// Returns true if the vector is empty. 228 | #[must_use] 229 | pub fn is_empty(&self) -> bool { 230 | self.len == 0 231 | } 232 | 233 | /// Returns the number of bytes used by the vector on the heap. 234 | /// Does not include allocated memory that isn't used. 235 | #[must_use] 236 | pub fn heap_size(&self) -> usize { 237 | self.vec.heap_size() 238 | } 239 | } 240 | 241 | impl From for SparseRSVec { 242 | fn from(input: BitVec) -> Self { 243 | Self::from_bitvec_inverted(&input) 244 | } 245 | } 246 | 247 | impl<'a> From<&'a BitVec> for SparseRSVec { 248 | fn from(input: &'a BitVec) -> Self { 249 | Self::from_bitvec_inverted(input) 250 | } 251 | } 252 | 253 | #[cfg(test)] 254 | mod tests { 255 | use super::SparseRSVec; 256 | use crate::BitVec; 257 | use rand::prelude::StdRng; 258 | use rand::{Rng, SeedableRng}; 259 | 260 | #[test] 261 | fn test_sparse_rank() { 262 | let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12); 263 | assert_eq!(sparse.rank1(0), 0); 264 | assert_eq!(sparse.rank1(1), 0); 265 | assert_eq!(sparse.rank1(2), 1); 266 | assert_eq!(sparse.rank1(3), 1); 267 | assert_eq!(sparse.rank1(4), 2); 268 | assert_eq!(sparse.rank1(5), 2); 269 | assert_eq!(sparse.rank1(6), 3); 270 | assert_eq!(sparse.rank1(7), 3); 271 | assert_eq!(sparse.rank1(8), 4); 272 | assert_eq!(sparse.rank1(9), 4); 273 | assert_eq!(sparse.rank1(10), 5); 274 | assert_eq!(sparse.rank1(11), 5); 275 | assert_eq!(sparse.rank1(12), 5); 276 | assert_eq!(sparse.rank1(999), 5); 277 | } 278 | 279 | #[test] 280 | fn test_sparse_select() { 281 | let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12); 282 | assert_eq!(sparse.select1(0), 1); 283 | assert_eq!(sparse.select1(1), 3); 284 | assert_eq!(sparse.select1(2), 5); 285 | assert_eq!(sparse.select1(3), 7); 286 | assert_eq!(sparse.select1(4), 9); 287 | assert_eq!(sparse.select1(5), 12); 288 | assert_eq!(sparse.select1(6), 12); 289 | } 290 | 291 | #[test] 292 | fn test_sparse_rank0() { 293 | let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12); 294 | assert_eq!(sparse.rank0(0), 0); 295 | assert_eq!(sparse.rank0(1), 1); 296 | assert_eq!(sparse.rank0(2), 1); 297 | assert_eq!(sparse.rank0(3), 2); 298 | assert_eq!(sparse.rank0(4), 2); 299 | assert_eq!(sparse.rank0(5), 3); 300 | assert_eq!(sparse.rank0(6), 3); 301 | assert_eq!(sparse.rank0(7), 4); 302 | assert_eq!(sparse.rank0(8), 4); 303 | assert_eq!(sparse.rank0(9), 5); 304 | assert_eq!(sparse.rank0(10), 5); 305 | assert_eq!(sparse.rank0(11), 6); 306 | assert_eq!(sparse.rank0(12), 7); 307 | assert_eq!(sparse.rank0(999), 7); 308 | } 309 | 310 | #[test] 311 | fn test_empty_sparse() { 312 | let sparse = SparseRSVec::new(&[], 0); 313 | assert_eq!(sparse.rank1(0), 0); 314 | assert_eq!(sparse.rank1(1), 0); 315 | assert_eq!(sparse.rank1(999), 0); 316 | assert_eq!(sparse.select1(0), 0); 317 | assert_eq!(sparse.select1(1), 0); 318 | assert_eq!(sparse.select1(999), 0); 319 | assert_eq!(sparse.rank0(0), 0); 320 | assert_eq!(sparse.rank0(1), 0); 321 | assert_eq!(sparse.rank0(999), 0); 322 | assert!(sparse.is_empty()); 323 | assert_eq!(sparse.len(), 0); 324 | } 325 | 326 | #[test] 327 | fn test_sparse_get() { 328 | let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12); 329 | assert_eq!(sparse.get(0), Some(0)); 330 | assert_eq!(sparse.get(1), Some(1)); 331 | assert_eq!(sparse.get(2), Some(0)); 332 | assert_eq!(sparse.get(3), Some(1)); 333 | assert_eq!(sparse.get(4), Some(0)); 334 | assert_eq!(sparse.get(5), Some(1)); 335 | assert_eq!(sparse.get(6), Some(0)); 336 | assert_eq!(sparse.get(7), Some(1)); 337 | assert_eq!(sparse.get(8), Some(0)); 338 | assert_eq!(sparse.get(9), Some(1)); 339 | assert_eq!(sparse.get(10), Some(0)); 340 | assert_eq!(sparse.get(11), Some(0)); 341 | assert_eq!(sparse.get(12), None); 342 | assert_eq!(sparse.get(999), None); 343 | } 344 | 345 | #[test] 346 | fn test_from_bitvector() { 347 | let mut bv = BitVec::from_ones(12); 348 | bv.flip_bit(6); 349 | bv.flip_bit(7); 350 | 351 | let sparse = SparseRSVec::from_bitvec(&bv); 352 | assert_eq!(sparse.rank1(0), 0); 353 | assert_eq!(sparse.rank1(1), 1); 354 | assert_eq!(sparse.rank1(2), 2); 355 | assert_eq!(sparse.rank1(7), 6); 356 | assert_eq!(sparse.rank1(8), 6); 357 | assert_eq!(sparse.rank1(9), 7); 358 | assert_eq!(sparse.rank1(12), 10); 359 | 360 | let sparse = SparseRSVec::from_bitvec_inverted(&bv); 361 | assert_eq!(sparse.rank1(0), 0); 362 | assert_eq!(sparse.rank1(1), 0); 363 | assert_eq!(sparse.rank1(2), 0); 364 | assert_eq!(sparse.rank1(7), 1); 365 | assert_eq!(sparse.rank1(8), 2); 366 | assert_eq!(sparse.rank1(9), 2); 367 | assert_eq!(sparse.rank1(12), 2); 368 | } 369 | 370 | #[test] 371 | fn test_large_block() { 372 | // test that the implementation works correctly if the search triggers a binary search 373 | let sparse = SparseRSVec::new( 374 | &[ 375 | 1, 100_000, 100_001, 100_002, 100_003, 100_004, 100_005, 100_006, 100_007, 100_008, 376 | 100_009, 100_010, 1_000_000, 377 | ], 378 | 2_000_000, 379 | ); 380 | assert_eq!(sparse.rank1(100_008), 9); 381 | assert_eq!(sparse.rank1(100_012), 12); 382 | } 383 | 384 | #[test] 385 | fn test_fuzzy() { 386 | const L: usize = 100_000; 387 | let mut bv = BitVec::from_zeros(L); 388 | let mut rng = StdRng::from_seed([0; 32]); 389 | 390 | for _ in 0..L / 4 { 391 | bv.flip_bit(rng.gen_range(0..L)); 392 | } 393 | 394 | let sparse = SparseRSVec::from_bitvec(&bv); 395 | 396 | let mut ones = 0; 397 | for i in 0..L { 398 | assert_eq!(bv.get(i), sparse.get(i as u64)); 399 | assert_eq!(ones, sparse.rank1(i as u64)); 400 | assert_eq!(i as u64 - ones, sparse.rank0(i as u64)); 401 | if bv.get(i) == Some(1) { 402 | assert_eq!(i, sparse.select1(ones as usize).try_into().unwrap()); 403 | ones += 1; 404 | } 405 | } 406 | } 407 | 408 | #[test] 409 | fn test_from_padded_bitvec() { 410 | // test no garbage is added to the sparse vec when the bit vector contains trailing data 411 | let mut bv = BitVec::new(); 412 | bv.append_bit(1); 413 | bv.append_bit(0); 414 | bv.append_bits(u64::MAX, 10); 415 | bv.drop_last(10); 416 | bv.append_bit(0); 417 | bv.drop_last(1); 418 | 419 | let sparse = SparseRSVec::from_bitvec(&bv); 420 | assert_eq!(sparse.len(), 2); 421 | assert_eq!(sparse.get(0), Some(1)); 422 | assert_eq!(sparse.get(1), Some(0)); 423 | assert_eq!(sparse.iter1().collect::>(), vec![0]); 424 | } 425 | } 426 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr( 2 | all( 3 | feature = "simd", 4 | target_arch = "x86_64", 5 | target_feature = "avx", 6 | target_feature = "avx2", 7 | target_feature = "avx512f", 8 | target_feature = "avx512bw", 9 | ), 10 | feature(stdarch_x86_avx512) 11 | )] 12 | #![warn(missing_docs)] 13 | #![allow(clippy::module_name_repetitions)] 14 | #![allow(clippy::assertions_on_constants)] // for asserts warning about incompatible constant values 15 | #![allow(clippy::inline_always)] // we actually measure performance increases with most of these 16 | #![cfg_attr(docsrs, feature(doc_cfg), feature(doc_auto_cfg))] // for conditional compilation in docs 17 | 18 | //! This crate provides a collection of data structures supported by fast implementations of 19 | //! rank and select queries. The data structures are static, meaning that they cannot be modified 20 | //! after they have been created. 21 | //! 22 | //! # Data structures 23 | //! - [Bit-Vector][bit_vec::BitVec] with no overhead. The only data structure that can be modified after creation. 24 | //! - [Succinct Bit-Vector][bit_vec::fast_rs_vec::RsVec] supporting fast rank and select queries. 25 | //! - [Elias-Fano][elias_fano::EliasFanoVec] encoding of monotone sequences supporting constant-time predecessor queries. 26 | //! - Two [Range Minimum Query][rmq] structures for constant-time range minimum queries. 27 | //! - [Wavelet Matrix][wavelet::WaveletMatrix] encoding `k`-bit symbols, supporting rank, select, statistical, and predecessor/successor queries in `O(k)`. 28 | //! - [Succinct Tree][trees::bp::BpTree] supporting tree navigation in `O(log n)` time, 29 | //! as well as subtree size, level-order, and ancestor queries, and fast depth-first iteration. 30 | //! 31 | //! # Performance 32 | //! Performance was benchmarked against publicly available implementations of the same (or similar) 33 | //! data structures on crates.io. 34 | //! Vers is among the fastest for all benchmarked operations. 35 | //! The benchmark results can be found 36 | //! in the [Benchmark repository](https://github.com/Cydhra/vers_benchmarks). 37 | //! Some tradeoffs between average time, worst-case time, and available API features should be taken 38 | //! into consideration when selecting among the fastest libraries 39 | //! (see the GitHub repository for a discussion). 40 | //! 41 | //! # Intrinsics 42 | //! This crate uses compiler intrinsics for bit-manipulation. The intrinsics are supported by 43 | //! all modern ``x86_64`` CPUs, but not by other architectures. The crate will compile on other 44 | //! architectures using fallback implementations, 45 | //! but the performance will be significantly worse. It is strongly recommended to 46 | //! enable the ``BMI2`` and ``popcnt`` target features when using this crate. 47 | //! 48 | //! The intrinsics in question are `popcnt` (supported since ``SSE4.2`` resp. ``SSE4a`` on AMD, 2007-2008), 49 | //! `pdep` (supported with ``BMI2`` since Intel Haswell resp. AMD Excavator, in hardware since AMD Zen 3, 2011-2013), 50 | //! and `tzcnt` (supported with ``BMI1`` since Intel Haswell resp. AMD Jaguar, ca. 2013). 51 | //! 52 | //! # Safety 53 | //! When the `simd` crate feature is not enabled (default), 54 | //! this crate uses no unsafe code, with the only exception being compiler intrinsics for 55 | //! bit-manipulation, if available. 56 | //! The intrinsics do not operate on addresses, so even if they were to be implemented incorrectly, 57 | //! no memory safety issues would arise. 58 | //! 59 | //! # Crate Features 60 | //! - `simd` (disabled by default): Enables the use of SIMD instructions in the `RsVec` 61 | //! implementation, and an additional iterator for the `RsVec` data structure. 62 | //! - `serde` (disabled by default): Enables serialization and deserialization support for all 63 | //! data structures in this crate using the `serde` crate. 64 | //! - `bp_u16_lookup` (disabled by default): Uses a 16-bit lookup table for the balanced parenthesis 65 | //! tree data structure. This is faster, but requires 128 KiB instead of 4 KiB. 66 | 67 | pub use bit_vec::fast_rs_vec::RsVec; 68 | pub use bit_vec::sparse::SparseRSVec; 69 | pub use bit_vec::BitVec; 70 | pub use elias_fano::EliasFanoVec; 71 | pub use rmq::binary_rmq::BinaryRmq; 72 | pub use rmq::fast_rmq::FastRmq; 73 | pub use trees::bp::{BpBuilder, BpTree}; 74 | pub use trees::{IsAncestor, LevelTree, SubtreeSize, Tree, TreeBuilder}; 75 | pub use wavelet::WaveletMatrix; 76 | 77 | pub mod bit_vec; 78 | 79 | #[forbid(unsafe_code)] 80 | pub mod elias_fano; 81 | 82 | #[forbid(unsafe_code)] 83 | pub mod rmq; 84 | 85 | #[forbid(unsafe_code)] 86 | pub mod trees; 87 | 88 | #[forbid(unsafe_code)] 89 | pub mod wavelet; 90 | 91 | pub(crate) mod util; 92 | -------------------------------------------------------------------------------- /src/rmq/binary_rmq/mod.rs: -------------------------------------------------------------------------------- 1 | //! This module contains a range minimum query data structure. It pre-computes the 2 | //! minimum element in intervals 2^k for all k and uses this information to answer queries in 3 | //! constant-time. This uses O(n log n) space overhead. 4 | 5 | use std::cmp::min_by; 6 | use std::collections::Bound; 7 | use std::mem::size_of; 8 | use std::ops::{Deref, RangeBounds}; 9 | 10 | /// A Range Minimum Query data structure that pre-calculates some queries. 11 | /// The minimum element in intervals 2^k for all k is precalculated and each query is turned into 12 | /// two overlapping sub-queries. This leads to constant-time queries and O(n log n) space overhead. 13 | /// The pre-calculation is done in O(n log n) time. 14 | /// This RMQ data structure is slightly faster than the [fast RMQ][crate::rmq::fast_rmq::FastRmq] 15 | /// for small inputs but has a much higher space overhead, which makes it slower for large inputs. 16 | /// It does not support input sizes exceeding 2^32 elements. 17 | /// 18 | /// # Example 19 | /// ```rust 20 | /// use vers_vecs::BinaryRmq; 21 | /// 22 | /// let data = vec![4, 10, 3, 11, 2, 12]; 23 | /// let rmq = BinaryRmq::from_vec(data); 24 | /// 25 | /// assert_eq!(rmq.range_min(0, 1), 0); 26 | /// assert_eq!(rmq.range_min(0, 2), 2); 27 | /// assert_eq!(rmq.range_min(0, 3), 2); 28 | /// ``` 29 | #[derive(Clone, Debug)] 30 | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 31 | pub struct BinaryRmq { 32 | data: Vec, 33 | 34 | // store indices relative to start of range. There is no way to have ranges exceeding 2^32 bits 35 | // but since we have fast_rmq for larger inputs, which does not have any downsides at that point, 36 | // we can just use u32 here (which gains cache efficiency for both implementations). 37 | results: Vec, 38 | } 39 | 40 | impl BinaryRmq { 41 | /// Create a new RMQ data structure for the given data. This uses O(n log n) space and 42 | /// precalculates the minimum element in intervals 2^k for all k for all elements. 43 | /// 44 | /// # Panics 45 | /// This function will panic if the input is larger than 2^32 elements. 46 | #[must_use] 47 | pub fn from_vec(data: Vec) -> Self { 48 | // the results are stored in a one-dimensional array, where the k'th element of each row i is 49 | // the index of the minimum element in the interval [i, i + 2^k). The length of the row is 50 | // ceil(log2(data.len())) + 1, which wastes 1/2 + 1/4 + 1/8... = 1 * log n words of memory, 51 | // but saves us a large amount of page faults for big vectors, when compared to having a 52 | // two-dimensional array with dynamic length in the second dimension. 53 | let len = data.len(); 54 | assert!(u32::try_from(len).is_ok(), "input too large for binary rmq"); 55 | 56 | let row_length = len.next_power_of_two().trailing_zeros() as usize + 1; 57 | let mut results = vec![0u32; len * row_length]; 58 | 59 | // initialize the first column of the results array with the indices of the elements in the 60 | // data array. This is setup for the dynamic programming approach to calculating the rest of 61 | // the results. 62 | for i in 0..len { 63 | results[i * row_length] = 0; 64 | } 65 | 66 | // calculate the rest of the results using dynamic programming (it uses the minima of smaller 67 | // intervals to calculate the minima of larger intervals). 68 | for i in 0..data.len().next_power_of_two().trailing_zeros() { 69 | let i = i as usize; 70 | for j in 0..data.len() { 71 | let offset = 1 << i; 72 | #[allow(clippy::collapsible_else_if)] // readability 73 | let arg_min: usize = if j + offset < data.len() { 74 | if data[results[j * row_length + i] as usize + j] 75 | < data[results[(j + offset) * row_length + i] as usize + (j + offset)] 76 | { 77 | results[j * row_length + i] as usize + j 78 | } else { 79 | results[(j + offset) * row_length + i] as usize + (j + offset) 80 | } 81 | } else { 82 | if data.len() - offset - 1 > j { 83 | if data[results[j * row_length + i] as usize + j] 84 | < data[results[(data.len() - offset - 1) * row_length + i - 1] as usize 85 | + (data.len() - offset - 1)] 86 | { 87 | results[j * row_length + i] as usize + j 88 | } else { 89 | results[(data.len() - offset - 1) * row_length + i - 1] as usize 90 | + (data.len() - offset - 1) 91 | } 92 | } else { 93 | j 94 | } 95 | }; 96 | 97 | #[allow(clippy::cast_possible_truncation)] 98 | // we know that the result is in bounds, since the input is bounded to 2^32 elements 99 | { 100 | results[j * row_length + i + 1] = (arg_min - j) as u32; 101 | } 102 | } 103 | } 104 | 105 | Self { data, results } 106 | } 107 | 108 | /// Convenience function for [`BinaryRmq::range_min`] for using range operators. 109 | /// The range is clamped to the length of the data structure, so this function will not panic, 110 | /// unless called on an empty data structure, because that does not have a valid index. 111 | /// 112 | /// # Example 113 | /// ```rust 114 | /// use vers_vecs::BinaryRmq; 115 | /// let rmq = BinaryRmq::from_vec(vec![5, 4, 3, 2, 1]); 116 | /// assert_eq!(rmq.range_min_with_range(0..3), 2); 117 | /// assert_eq!(rmq.range_min_with_range(0..=3), 3); 118 | /// ``` 119 | /// 120 | /// # Panics 121 | /// This function will panic if the data structure is empty. 122 | #[must_use] 123 | pub fn range_min_with_range>(&self, range: T) -> usize { 124 | let start = match range.start_bound() { 125 | Bound::Included(i) => *i, 126 | Bound::Excluded(i) => *i + 1, 127 | Bound::Unbounded => 0, 128 | } 129 | .clamp(0, self.len() - 1); 130 | 131 | let end = match range.end_bound() { 132 | Bound::Included(i) => *i, 133 | Bound::Excluded(i) => *i - 1, 134 | Bound::Unbounded => self.len() - 1, 135 | } 136 | .clamp(0, self.len() - 1); 137 | self.range_min(start, end) 138 | } 139 | 140 | /// Returns the index of the minimum element in the range [i, j] in O(1) time. 141 | /// This has a constant query time. The range is inclusive. 142 | /// 143 | /// # Panics 144 | /// Calling this function with i > j will produce either a panic or an incorrect result. 145 | /// Calling this function where one of the indices is out of bounds will produce a panic or an 146 | /// incorrect result. 147 | #[must_use] 148 | pub fn range_min(&self, i: usize, j: usize) -> usize { 149 | let row_len = self.data.len().next_power_of_two().trailing_zeros() as usize + 1; 150 | let log_dist = (usize::BITS - (j - i).leading_zeros()).saturating_sub(1) as usize; 151 | let dist = (1 << log_dist) - 1; 152 | 153 | // the minimum of the two sub-queries with powers of two is the minimum of the whole query. 154 | min_by( 155 | self.results[i * row_len + log_dist] as usize + i, 156 | self.results[(j - dist) * row_len + log_dist] as usize + (j - dist), 157 | |a, b| self.data[*a].cmp(&self.data[*b]), 158 | ) 159 | } 160 | 161 | /// Returns the amount of memory used by this data structure in bytes. This does not include 162 | /// space allocated but not in use (e.g. unused capacity of vectors). 163 | #[must_use] 164 | pub fn heap_size(&self) -> usize { 165 | self.data.len() * size_of::() + self.results.len() * size_of::() 166 | } 167 | } 168 | 169 | /// Implements Deref to delegate to the underlying data structure. This allows the user to use 170 | /// indexing syntax on the RMQ data structure to access the underlying data, as well as iterators, 171 | /// etc. 172 | impl Deref for BinaryRmq { 173 | type Target = Vec; 174 | 175 | fn deref(&self) -> &Self::Target { 176 | &self.data 177 | } 178 | } 179 | 180 | impl From> for BinaryRmq { 181 | fn from(data: Vec) -> Self { 182 | Self::from_vec(data) 183 | } 184 | } 185 | 186 | /// Create a new RMQ data structure for the given data. 187 | /// The iterator is consumed and the data is stored in a vector. 188 | /// 189 | /// See [`BinaryRmq::from_vec`] for more information. 190 | /// 191 | /// [`BinaryRmq::from_vec`]: BinaryRmq::from_vec 192 | impl FromIterator for BinaryRmq { 193 | fn from_iter>(iter: T) -> Self { 194 | Self::from_vec(iter.into_iter().collect()) 195 | } 196 | } 197 | 198 | #[cfg(test)] 199 | mod tests; 200 | -------------------------------------------------------------------------------- /src/rmq/binary_rmq/tests.rs: -------------------------------------------------------------------------------- 1 | use crate::rmq::binary_rmq::BinaryRmq; 2 | use rand::RngCore; 3 | 4 | #[test] 5 | fn small_test() { 6 | let rmq = BinaryRmq::from_vec(vec![9, 6, 10, 4, 0, 8, 3, 7, 1, 2, 5]); 7 | 8 | assert_eq!(rmq.range_min(0, 0), 0); 9 | assert_eq!(rmq.range_min(0, 1), 1); 10 | assert_eq!(rmq.range_min(0, 2), 1); 11 | assert_eq!(rmq.range_min(0, 3), 3); 12 | assert_eq!(rmq.range_min(5, 8), 8); 13 | assert_eq!(rmq.range_min(5, 9), 8); 14 | assert_eq!(rmq.range_min(9, 10), 9); 15 | assert_eq!(rmq.range_min(0, 10), 4); 16 | } 17 | 18 | #[test] 19 | fn randomized_test() { 20 | let mut rng = rand::thread_rng(); 21 | const L: usize = 100; 22 | 23 | let mut numbers_vec = Vec::with_capacity(L); 24 | for _ in 0..L { 25 | numbers_vec.push(rng.next_u64()); 26 | } 27 | 28 | let rmq = BinaryRmq::from_vec(numbers_vec.clone()); 29 | 30 | for i in 0..L { 31 | for j in i..L { 32 | let min = numbers_vec[i..=j].iter().min().unwrap(); 33 | assert_eq!( 34 | numbers_vec[rmq.range_min(i, j)], 35 | *min, 36 | "i = {}, j = {}", 37 | i, 38 | j 39 | ); 40 | } 41 | } 42 | } 43 | 44 | #[test] 45 | fn test_iter() { 46 | let rmq = BinaryRmq::from_vec(vec![1, 2, 3, 4, 5]); 47 | let mut iter = rmq.iter(); 48 | assert_eq!(iter.next(), Some(&1)); 49 | assert_eq!(iter.next(), Some(&2)); 50 | assert_eq!(iter.next(), Some(&3)); 51 | assert_eq!(iter.next(), Some(&4)); 52 | assert_eq!(iter.next(), Some(&5)); 53 | assert_eq!(iter.next(), None); 54 | } 55 | 56 | #[test] 57 | fn test_range_operators() { 58 | let rmq = BinaryRmq::from_vec(vec![5, 4, 3, 2, 1]); 59 | assert_eq!(rmq.range_min(0, 3), 3); 60 | assert_eq!(rmq.range_min_with_range(0..3), 2); 61 | assert_eq!(rmq.range_min_with_range(0..=3), 3); 62 | } 63 | 64 | #[test] 65 | fn test_empty_rmq() { 66 | let rmq = BinaryRmq::from_vec(Vec::::new()); 67 | assert!(rmq.is_empty()); 68 | // calling functions on an empty rmq will panic because the upper bound is inclusive, but there 69 | // is no valid index in an empty array, so we can't test anything else 70 | } 71 | -------------------------------------------------------------------------------- /src/rmq/fast_rmq/mod.rs: -------------------------------------------------------------------------------- 1 | //! A fast and quasi-succinct range minimum query data structure. 2 | //! It is based on a linear-space RMQ data structure 3 | //! but uses constant-sized structures in place of logarithmic ones, 4 | //! which makes it faster at the cost of increasing the space bound to O(n log n). 5 | 6 | use std::cmp::min_by; 7 | use std::mem::size_of; 8 | use std::ops::{Bound, Deref, RangeBounds}; 9 | 10 | use crate::rmq::binary_rmq::BinaryRmq; 11 | use crate::util::pdep::Pdep; 12 | 13 | /// Size of the blocks the data is split into. One block is indexable with a u8, hence its size. 14 | const BLOCK_SIZE: usize = 128; 15 | 16 | /// A constant size small bitvector that supports rank0 and select0 specifically for the RMQ 17 | /// structure 18 | #[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Default)] 19 | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 20 | struct SmallBitVector(u128); 21 | 22 | impl SmallBitVector { 23 | /// Calculates the rank0 of the bitvector up to the i-th bit by masking out the bits after i 24 | /// and counting the ones of the bitwise-inverted bitvector. 25 | #[allow(clippy::cast_possible_truncation)] // parameter must be out of scope for this to happen 26 | fn rank0(&self, i: usize) -> usize { 27 | debug_assert!(i <= 128); 28 | let mask = 1u128.checked_shl(i as u32).unwrap_or(0).wrapping_sub(1); 29 | (!self.0 & mask).count_ones() as usize 30 | } 31 | 32 | fn select0(&self, mut rank: usize) -> usize { 33 | let word = (self.0 & 0xFFFF_FFFF_FFFF_FFFF) as u64; 34 | if (word.count_zeros() as usize) <= rank { 35 | rank -= word.count_zeros() as usize; 36 | } else { 37 | return (1 << rank).pdep(!word).trailing_zeros() as usize; 38 | } 39 | let word = (self.0 >> 64) as u64; 40 | 64 + (1 << (rank % 64)).pdep(!word).trailing_zeros() as usize 41 | } 42 | 43 | fn set_bit(&mut self, i: usize) { 44 | debug_assert!(i <= 128); 45 | let mask = 1u128 << i; 46 | self.0 |= mask; 47 | } 48 | } 49 | 50 | /// A block has a bit vector indicating the minimum element in the prefix (suffix) of the 51 | /// block up to each bit's index. This way a simple select(rank(k)) query can be used to find the 52 | /// minimum element in the block prefix (suffix) of length k. 53 | /// The space requirement for this structure is (sub-)linear in the block size. 54 | #[derive(Clone, Debug)] 55 | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 56 | struct Block { 57 | prefix_minima: SmallBitVector, 58 | suffix_minima: SmallBitVector, 59 | } 60 | 61 | /// A data structure for fast range minimum queries based on a structure with theoretically linear space overhead. 62 | /// In practice, the space overhead is O(n log n), because of real-machine considerations. 63 | /// However, this increases speed and will only be a problem for incredibly large data sets. 64 | /// The data structure can handle up to 2^40 elements, after which some queries may cause 65 | /// panics. 66 | /// 67 | /// # Example 68 | /// ```rust 69 | /// use vers_vecs::FastRmq; 70 | /// 71 | /// let data = vec![4, 10, 3, 11, 2, 12]; 72 | /// let rmq = FastRmq::from_vec(data); 73 | /// 74 | /// assert_eq!(rmq.range_min(0, 1), 0); 75 | /// assert_eq!(rmq.range_min(0, 2), 2); 76 | /// assert_eq!(rmq.range_min(0, 3), 2); 77 | /// ``` 78 | #[derive(Clone, Debug)] 79 | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] 80 | pub struct FastRmq { 81 | data: Vec, 82 | block_minima: BinaryRmq, 83 | block_min_indices: Vec, 84 | blocks: Vec, 85 | } 86 | 87 | impl FastRmq { 88 | /// Creates a new range minimum query data structure from the given data. Creation time is 89 | /// O(n log n) and space overhead is O(n log n) with a fractional constant factor 90 | /// (see [`FastRmq`]) 91 | /// 92 | /// # Panics 93 | /// This function will panic if the input is larger than 2^40 elements. 94 | #[must_use] 95 | pub fn from_vec(data: Vec) -> Self { 96 | assert!(data.len() < 1 << 40, "input too large for fast rmq"); 97 | 98 | let mut block_minima = Vec::with_capacity(data.len() / BLOCK_SIZE + 1); 99 | let mut block_min_indices = Vec::with_capacity(data.len() / BLOCK_SIZE + 1); 100 | let mut blocks = Vec::with_capacity(data.len() / BLOCK_SIZE + 1); 101 | 102 | data.chunks(BLOCK_SIZE).for_each(|block| { 103 | let mut prefix_minima = SmallBitVector::default(); 104 | let mut suffix_minima = SmallBitVector::default(); 105 | 106 | let mut prefix_minimum = block[0]; 107 | let mut block_minimum = block[0]; 108 | let mut block_minimum_index = 0u8; 109 | 110 | for (i, elem) in block.iter().enumerate().skip(1) { 111 | if *elem < prefix_minimum { 112 | prefix_minimum = *elem; 113 | } else { 114 | prefix_minima.set_bit(i); 115 | } 116 | 117 | // This is safe because the block size is constant and smaller than 256 118 | #[allow(clippy::cast_possible_truncation)] 119 | if *elem < block_minimum { 120 | block_minimum = *elem; 121 | block_minimum_index = i as u8; 122 | } 123 | } 124 | 125 | let mut suffix_minimum = block[block.len() - 1]; 126 | 127 | for i in 2..=block.len() { 128 | if block[block.len() - i] < suffix_minimum { 129 | suffix_minimum = block[block.len() - i]; 130 | } else { 131 | suffix_minima.set_bit(i - 1); 132 | } 133 | } 134 | 135 | block_minima.push(block_minimum); 136 | block_min_indices.push(block_minimum_index); 137 | blocks.push(Block { 138 | prefix_minima, 139 | suffix_minima, 140 | }); 141 | }); 142 | 143 | Self { 144 | data, 145 | block_minima: BinaryRmq::from_vec(block_minima), 146 | block_min_indices, 147 | blocks, 148 | } 149 | } 150 | 151 | /// Convenience function for [`FastRmq::range_min`] for using range operators. 152 | /// The range is clamped to the length of the data structure, sso this function will not panic, 153 | /// unless called on an empty data structure, because that does not have a valid index. 154 | /// 155 | /// # Example 156 | /// ```rust 157 | /// use vers_vecs::FastRmq; 158 | /// let rmq = FastRmq::from_vec(vec![5, 4, 3, 2, 1]); 159 | /// assert_eq!(rmq.range_min_with_range(0..3), 2); 160 | /// assert_eq!(rmq.range_min_with_range(0..=3), 3); 161 | /// ``` 162 | /// 163 | /// # Panics 164 | /// This function will panic if the data structure is empty. 165 | #[must_use] 166 | pub fn range_min_with_range>(&self, range: T) -> usize { 167 | let start = match range.start_bound() { 168 | Bound::Included(i) => *i, 169 | Bound::Excluded(i) => *i + 1, 170 | Bound::Unbounded => 0, 171 | } 172 | .clamp(0, self.len() - 1); 173 | 174 | let end = match range.end_bound() { 175 | Bound::Included(i) => *i, 176 | Bound::Excluded(i) => *i - 1, 177 | Bound::Unbounded => self.len() - 1, 178 | } 179 | .clamp(0, self.len() - 1); 180 | self.range_min(start, end) 181 | } 182 | 183 | /// Returns the index of the minimum element in the range [i, j] in O(1) time. 184 | /// Runtime may still vary for different ranges, 185 | /// but is independent of the size of the data structure and bounded by a constant for all 186 | /// possible ranges. The range is inclusive. 187 | /// 188 | /// # Panics 189 | /// Calling this function with i > j will produce either a panic or an incorrect result. 190 | /// Calling this function where one of the indices is out of bounds will produce a panic or an 191 | /// incorrect result. 192 | #[must_use] 193 | #[allow(clippy::similar_names)] 194 | pub fn range_min(&self, i: usize, j: usize) -> usize { 195 | let block_i = i / BLOCK_SIZE; 196 | let block_j = j / BLOCK_SIZE; 197 | 198 | // if the range is contained in a single block, we just search it 199 | if block_i == block_j { 200 | let rank_i_prefix = self.blocks[block_i].prefix_minima.rank0(i % BLOCK_SIZE + 1); 201 | let rank_j_prefix = self.blocks[block_i].prefix_minima.rank0(j % BLOCK_SIZE + 1); 202 | 203 | if rank_j_prefix > rank_i_prefix { 204 | return block_i * BLOCK_SIZE 205 | + self.blocks[block_i] 206 | .prefix_minima 207 | .select0(rank_j_prefix - 1); 208 | } 209 | 210 | let rank_i_suffix = self.blocks[block_i] 211 | .suffix_minima 212 | .rank0(BLOCK_SIZE - (i % BLOCK_SIZE)); 213 | let rank_j_suffix = self.blocks[block_i] 214 | .suffix_minima 215 | .rank0(BLOCK_SIZE - (j % BLOCK_SIZE)); 216 | 217 | if rank_j_suffix > rank_i_suffix { 218 | return (block_i + 1) * BLOCK_SIZE 219 | - self.blocks[block_i] 220 | .suffix_minima 221 | .select0(rank_j_suffix - 1); 222 | } 223 | 224 | return i + self.data[i..=j] 225 | .iter() 226 | .enumerate() 227 | .min_by_key(|(_, &x)| x) 228 | .unwrap() 229 | .0; 230 | } 231 | 232 | let partial_block_i_min = (block_i + 1) * BLOCK_SIZE 233 | - self.blocks[block_i].suffix_minima.select0( 234 | self.blocks[block_i] 235 | .suffix_minima 236 | .rank0(BLOCK_SIZE - (i % BLOCK_SIZE)) 237 | - 1, 238 | ) 239 | - 1; 240 | 241 | let partial_block_j_min = block_j * BLOCK_SIZE 242 | + self.blocks[block_j] 243 | .prefix_minima 244 | .select0(self.blocks[block_j].prefix_minima.rank0(j % BLOCK_SIZE + 1) - 1); 245 | 246 | // if there are full blocks between the two partial blocks, we can use the block minima 247 | // to find the minimum in the range [block_i + 1, block_j - 1] 248 | if block_i + 1 < block_j { 249 | let intermediate_min_block = self.block_minima.range_min(block_i + 1, block_j - 1); 250 | let min_block_index = intermediate_min_block * BLOCK_SIZE 251 | + self.block_min_indices[intermediate_min_block] as usize; 252 | 253 | min_by( 254 | min_by(partial_block_i_min, partial_block_j_min, |&a, &b| { 255 | self.data[a].cmp(&self.data[b]) 256 | }), 257 | min_block_index, 258 | |&a, &b| self.data[a].cmp(&self.data[b]), 259 | ) 260 | } else { 261 | min_by(partial_block_i_min, partial_block_j_min, |&a, &b| { 262 | self.data[a].cmp(&self.data[b]) 263 | }) 264 | } 265 | } 266 | 267 | /// Returns the length of the RMQ data structure (i.e. the number of elements) 268 | #[must_use] 269 | pub fn len(&self) -> usize { 270 | self.data.len() 271 | } 272 | 273 | /// Returns true if the RMQ data structure is empty (i.e. contains no elements) 274 | #[must_use] 275 | pub fn is_empty(&self) -> bool { 276 | self.data.is_empty() 277 | } 278 | 279 | /// Returns the amount of memory used by the RMQ data structure in bytes. Does not include 280 | /// space allocated but not in use (e.g. unused capacity of vectors). 281 | #[must_use] 282 | pub fn heap_size(&self) -> usize { 283 | self.data.len() * size_of::() 284 | + self.block_minima.heap_size() 285 | + self.block_min_indices.len() 286 | + self.blocks.len() * size_of::() 287 | } 288 | } 289 | 290 | /// Implements Deref to delegate to the underlying data structure. This allows the user to use 291 | /// indexing syntax on the RMQ data structure to access the underlying data, as well as iterators, 292 | /// etc. 293 | impl Deref for FastRmq { 294 | type Target = Vec; 295 | 296 | fn deref(&self) -> &Self::Target { 297 | &self.data 298 | } 299 | } 300 | 301 | impl From> for FastRmq { 302 | fn from(data: Vec) -> Self { 303 | Self::from_vec(data) 304 | } 305 | } 306 | 307 | /// Creates a new range minimum query data structure from the given data. 308 | /// The iterator is consumed and the data is stored in a vector. 309 | /// 310 | /// See [`FastRmq::from_vec`] for more information. 311 | /// 312 | /// [`FastRmq::from_vec`]: FastRmq::from_vec 313 | impl FromIterator for FastRmq { 314 | fn from_iter>(iter: T) -> Self { 315 | Self::from_vec(iter.into_iter().collect()) 316 | } 317 | } 318 | 319 | #[cfg(test)] 320 | mod tests; 321 | -------------------------------------------------------------------------------- /src/rmq/fast_rmq/tests.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use rand::RngCore; 3 | 4 | #[test] 5 | fn test_small_bit_vector_rank0() { 6 | let mut sbv = SmallBitVector::default(); 7 | sbv.set_bit(1); 8 | sbv.set_bit(3); 9 | sbv.set_bit(64); 10 | sbv.set_bit(65); 11 | 12 | assert_eq!(sbv.rank0(0), 0); 13 | assert_eq!(sbv.rank0(1), 1); 14 | assert_eq!(sbv.rank0(2), 1); 15 | assert_eq!(sbv.rank0(3), 2); 16 | assert_eq!(sbv.rank0(4), 2); 17 | 18 | assert_eq!(sbv.rank0(64), 62); 19 | assert_eq!(sbv.rank0(65), 62); 20 | assert_eq!(sbv.rank0(66), 62); 21 | assert_eq!(sbv.rank0(67), 63); 22 | } 23 | 24 | #[test] 25 | fn test_small_bit_vector_select0() { 26 | let mut sbv = SmallBitVector::default(); 27 | sbv.set_bit(1); 28 | sbv.set_bit(3); 29 | sbv.set_bit(64); 30 | sbv.set_bit(65); 31 | 32 | assert_eq!(sbv.select0(0), 0); 33 | assert_eq!(sbv.select0(1), 2); 34 | assert_eq!(sbv.select0(2), 4); 35 | assert_eq!(sbv.select0(3), 5); 36 | assert_eq!(sbv.select0(64), 68); 37 | } 38 | 39 | #[test] 40 | fn test_fast_rmq() { 41 | const L: usize = 2 * BLOCK_SIZE; 42 | 43 | let mut numbers_vec = Vec::with_capacity(L); 44 | for i in 0..L { 45 | numbers_vec.push(i as u64); 46 | } 47 | 48 | let rmq = FastRmq::from_vec(numbers_vec.clone()); 49 | 50 | for i in 0..L { 51 | for j in i..L { 52 | let min = i + numbers_vec[i..=j] 53 | .iter() 54 | .enumerate() 55 | .min_by_key(|(_, &x)| x) 56 | .unwrap() 57 | .0; 58 | assert_eq!(rmq.range_min(i, j), min, "i = {}, j = {}", i, j); 59 | } 60 | } 61 | } 62 | 63 | #[test] 64 | fn test_fast_rmq_unsorted() { 65 | let mut rng = rand::thread_rng(); 66 | const L: usize = 2 * BLOCK_SIZE; 67 | 68 | let mut numbers_vec = Vec::with_capacity(L); 69 | for _ in 0..L { 70 | numbers_vec.push(rng.next_u64()); 71 | } 72 | 73 | let rmq = FastRmq::from_vec(numbers_vec.clone()); 74 | 75 | for i in 0..L { 76 | for j in i..L { 77 | let min = numbers_vec[i..=j].iter().min().unwrap(); 78 | assert_eq!( 79 | numbers_vec[rmq.range_min(i, j)], 80 | *min, 81 | "i = {}, j = {}", 82 | i, 83 | j 84 | ); 85 | } 86 | } 87 | } 88 | 89 | #[test] 90 | fn test_iter() { 91 | let rmq = FastRmq::from_vec(vec![1, 2, 3, 4, 5]); 92 | let mut iter = rmq.iter(); 93 | assert_eq!(iter.next(), Some(&1)); 94 | assert_eq!(iter.next(), Some(&2)); 95 | assert_eq!(iter.next(), Some(&3)); 96 | assert_eq!(iter.next(), Some(&4)); 97 | assert_eq!(iter.next(), Some(&5)); 98 | assert_eq!(iter.next(), None); 99 | } 100 | 101 | #[test] 102 | fn test_range_operators() { 103 | let rmq = FastRmq::from_vec(vec![5, 4, 3, 2, 1]); 104 | assert_eq!(rmq.range_min(0, 3), 3); 105 | assert_eq!(rmq.range_min_with_range(0..3), 2); 106 | assert_eq!(rmq.range_min_with_range(0..=3), 3); 107 | } 108 | 109 | #[test] 110 | fn test_empty_rmq() { 111 | let _rmq = FastRmq::from_vec(Vec::::new()); 112 | // calling functions on an empty rmq will panic because the upper bound is inclusive, but there 113 | // is no valid index in an empty array, so we can't test anything else 114 | } 115 | -------------------------------------------------------------------------------- /src/rmq/mod.rs: -------------------------------------------------------------------------------- 1 | //! Range minimum query data structures. These data structures allow for the calculation of the index of the 2 | //! minimum element in a range of a static array in constant-time. The implementations are located 3 | //! in the [`binary_rmq`] and [`fast_rmq`] modules. 4 | 5 | pub mod fast_rmq; 6 | 7 | pub mod binary_rmq; 8 | -------------------------------------------------------------------------------- /src/trees/bp/builder.rs: -------------------------------------------------------------------------------- 1 | use crate::trees::bp::{BpTree, DEFAULT_BLOCK_SIZE}; 2 | use crate::trees::TreeBuilder; 3 | use crate::BitVec; 4 | 5 | /// A builder for [`BpTrees`] using depth-first traversal of the tree. See the documentation of 6 | /// [`TreeBuilder`]. 7 | /// 8 | /// [`BpTree`]: BpTree 9 | pub struct BpBuilder { 10 | excess: i64, 11 | bit_vec: BitVec, 12 | } 13 | 14 | impl BpBuilder { 15 | /// Create new empty `DfsTreeBuilder` 16 | #[must_use] 17 | pub fn new() -> Self { 18 | Self { 19 | excess: 0, 20 | bit_vec: BitVec::new(), 21 | } 22 | } 23 | 24 | /// Create a new empty `DfsTreeBuilder` with the given capacity for nodes. 25 | #[must_use] 26 | pub fn with_capacity(capacity: u64) -> Self { 27 | Self { 28 | excess: 0, 29 | bit_vec: BitVec::with_capacity((capacity * 2) as usize), 30 | } 31 | } 32 | } 33 | 34 | impl Default for BpBuilder { 35 | fn default() -> Self { 36 | Self::new() 37 | } 38 | } 39 | 40 | impl TreeBuilder for BpBuilder { 41 | type Tree = BpTree; 42 | 43 | fn enter_node(&mut self) { 44 | self.excess += 1; 45 | self.bit_vec.append_bit(1); 46 | } 47 | 48 | fn leave_node(&mut self) { 49 | self.excess -= 1; 50 | self.bit_vec.append_bit(0); 51 | } 52 | 53 | fn build(self) -> Result { 54 | if self.excess != 0 { 55 | Err(self.excess) 56 | } else { 57 | Ok(BpTree::from_bit_vector(self.bit_vec)) 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/trees/bp/lookup.rs: -------------------------------------------------------------------------------- 1 | //! This module provides the lookup table and lookup functionality to answer excess queries 2 | //! for 8-bit and 16-bit blocks in the tree vector. 3 | //! Note that the 8-bit version is unused, since this whole module gets replaced with 4 | //! `lookup_query.rs` if the 16-bit block feature is disabled (since that module is faster) 5 | 6 | /// How big the lookup blocks are. We store this in a constant so we can switch out this module 7 | /// using a crate feature against one where this constant is redefined to 16, but reuse the actual 8 | /// scanning code for operations on the tree. 9 | #[cfg(feature = "bp_u16_lookup")] 10 | pub(crate) const LOOKUP_BLOCK_SIZE: u64 = 16; 11 | #[cfg(not(feature = "bp_u16_lookup"))] 12 | pub(crate) const LOOKUP_BLOCK_SIZE: u64 = 8; 13 | 14 | /// Integer type holding the blocks of the parenthesis expression we look up at once 15 | #[cfg(feature = "bp_u16_lookup")] 16 | type LookupBlockType = u16; 17 | #[cfg(not(feature = "bp_u16_lookup"))] 18 | type LookupBlockType = u8; 19 | 20 | /// Signed version of `LookupBlockType` 21 | #[cfg(feature = "bp_u16_lookup")] 22 | type SignedLookupBlockType = i16; 23 | #[cfg(not(feature = "bp_u16_lookup"))] 24 | type SignedLookupBlockType = i8; 25 | 26 | /// Data type we use in the lookup table to store excess values for lookup. Needs to be one size larger 27 | /// than `LookupBlockType` 28 | #[cfg(feature = "bp_u16_lookup")] 29 | type EncodedTableType = u16; 30 | #[cfg(not(feature = "bp_u16_lookup"))] 31 | type EncodedTableType = u16; 32 | 33 | /// Maximum value that `LookupBlockType` can hold, stored in one size larger because we need to 34 | /// iterate up to and including it 35 | #[cfg(feature = "bp_u16_lookup")] 36 | const LOOKUP_MAX_VALUE: u32 = u16::MAX as u32; 37 | #[cfg(not(feature = "bp_u16_lookup"))] 38 | const LOOKUP_MAX_VALUE: u32 = u8::MAX as u32; 39 | 40 | /// The lookup entry is indexed by the numerical value of a parenthesis expression block. The table 41 | /// contains the minimum, maximum, and total excess encoded in a single integer. 42 | /// 43 | /// The encoding scheme is simple: 44 | /// The least significant 5 (6) bits encode maximum excess (which is between -8 (-16) and 8 (16), 45 | /// which we store with an offset of 8 (16), so we don't have to deal with dual encoding), 46 | /// the next 5 (6) bits are the minimum excess encoded analogously. We do not encode total excess, 47 | /// as that can easily be calculated using popcnt. 48 | /// 49 | /// The rest of the bits are zero. 50 | #[allow(long_running_const_eval)] 51 | const PAREN_BLOCK_LOOKUP: [EncodedTableType; 1 << LOOKUP_BLOCK_SIZE] = calculate_lookup_table(); 52 | 53 | /// Offset to add to encoded excess values, so negative numbers are stored as positive integers, reducing 54 | /// encoding complexity 55 | const ENCODING_OFFSET: i32 = LOOKUP_BLOCK_SIZE as i32; 56 | 57 | /// Bitmask for one of the lookup values. 58 | #[cfg(feature = "bp_u16_lookup")] 59 | const ENCODING_MASK: EncodedTableType = 0b111111; 60 | #[cfg(not(feature = "bp_u16_lookup"))] 61 | const ENCODING_MASK: EncodedTableType = 0b11111; 62 | 63 | /// Where in the encoded bit pattern to store minimum excess 64 | #[cfg(feature = "bp_u16_lookup")] 65 | const MINIMUM_EXCESS_POSITION: usize = 6; 66 | #[cfg(not(feature = "bp_u16_lookup"))] 67 | const MINIMUM_EXCESS_POSITION: usize = 5; 68 | 69 | const fn calculate_lookup_table() -> [EncodedTableType; 1 << LOOKUP_BLOCK_SIZE] { 70 | // initial sentinel values during excess computation 71 | const MORE_THAN_MAX: SignedLookupBlockType = (LOOKUP_BLOCK_SIZE + 1) as SignedLookupBlockType; 72 | const LESS_THAN_MIN: SignedLookupBlockType = -(LOOKUP_BLOCK_SIZE as SignedLookupBlockType) - 1; 73 | 74 | let mut lookup = [0; 1 << LOOKUP_BLOCK_SIZE]; 75 | let mut v: u32 = 0; 76 | while v <= LOOKUP_MAX_VALUE { 77 | let mut minimum_excess = MORE_THAN_MAX; 78 | let mut maximum_excess = LESS_THAN_MIN; 79 | let mut total_excess = 0; 80 | 81 | let mut i = 0; 82 | while i < LOOKUP_BLOCK_SIZE { 83 | if ((v >> i) & 1) == 1 { 84 | total_excess += 1; 85 | } else { 86 | total_excess -= 1; 87 | } 88 | 89 | minimum_excess = min(minimum_excess, total_excess); 90 | maximum_excess = max(maximum_excess, total_excess); 91 | i += 1; 92 | } 93 | 94 | let mut encoded: EncodedTableType = 95 | ((minimum_excess as i32 + ENCODING_OFFSET) as EncodedTableType & ENCODING_MASK) 96 | << MINIMUM_EXCESS_POSITION; 97 | encoded |= (maximum_excess as i32 + ENCODING_OFFSET) as EncodedTableType & ENCODING_MASK; 98 | lookup[v as usize] = encoded; 99 | 100 | v += 1; 101 | } 102 | 103 | lookup 104 | } 105 | 106 | /// Obtain the minimum excess from an encoded 16 bit value from the lookup table 107 | const fn get_minimum_excess(value: EncodedTableType) -> i64 { 108 | ((value >> MINIMUM_EXCESS_POSITION) & ENCODING_MASK) as i64 - ENCODING_OFFSET as i64 109 | } 110 | 111 | /// Obtain the minimum excess from an encoded 16 bit value from the lookup table 112 | const fn get_maximum_excess(value: EncodedTableType) -> i64 { 113 | (value & ENCODING_MASK) as i64 - ENCODING_OFFSET as i64 114 | } 115 | 116 | /// Branchless const minimum computation for values that cannot overflow 117 | const fn min(a: SignedLookupBlockType, b: SignedLookupBlockType) -> SignedLookupBlockType { 118 | b + ((a - b) 119 | & -(((a - b) as LookupBlockType >> (LOOKUP_BLOCK_SIZE - 1)) as SignedLookupBlockType)) 120 | } 121 | 122 | /// Branchless const maximum computation for values that cannot overflow 123 | const fn max(a: SignedLookupBlockType, b: SignedLookupBlockType) -> SignedLookupBlockType { 124 | a - ((a - b) 125 | & -(((a - b) as LookupBlockType >> (LOOKUP_BLOCK_SIZE - 1)) as SignedLookupBlockType)) 126 | } 127 | 128 | /// Get the total excess of a block of eight parentheses 129 | #[inline(always)] 130 | fn lookup_total_excess(block: LookupBlockType) -> i64 { 131 | block.count_ones() as i64 - block.count_zeros() as i64 132 | } 133 | 134 | /// Get the maximum excess of a block of eight parentheses 135 | #[inline(always)] 136 | fn lookup_maximum_excess(block: LookupBlockType) -> i64 { 137 | get_maximum_excess(PAREN_BLOCK_LOOKUP[block as usize]) 138 | } 139 | 140 | /// Get the minimum excess of a block of eight parentheses 141 | #[inline(always)] 142 | fn lookup_minimum_excess(block: LookupBlockType) -> i64 { 143 | get_minimum_excess(PAREN_BLOCK_LOOKUP[block as usize]) 144 | } 145 | 146 | #[inline(always)] 147 | pub(crate) fn process_block_fwd( 148 | block: LookupBlockType, 149 | relative_excess: &mut i64, 150 | ) -> Result { 151 | if *relative_excess <= lookup_maximum_excess(block) 152 | && lookup_minimum_excess(block) <= *relative_excess 153 | { 154 | for i in 0..LOOKUP_BLOCK_SIZE { 155 | let bit = (block >> i) & 0x1; 156 | *relative_excess -= if bit == 1 { 1 } else { -1 }; 157 | 158 | if *relative_excess == 0 { 159 | return Ok(i); 160 | } 161 | } 162 | 163 | unreachable!() 164 | } else { 165 | *relative_excess -= lookup_total_excess(block); 166 | Err(()) 167 | } 168 | } 169 | 170 | #[inline(always)] 171 | pub(crate) fn process_block_bwd( 172 | block: LookupBlockType, 173 | relative_excess: &mut i64, 174 | ) -> Result { 175 | let total_excess = lookup_total_excess(block); 176 | if (*relative_excess + total_excess == 0) 177 | || (lookup_minimum_excess(block) <= *relative_excess + total_excess 178 | && *relative_excess + total_excess <= lookup_maximum_excess(block)) 179 | { 180 | for i in (0..LOOKUP_BLOCK_SIZE).rev() { 181 | let bit = (block >> i) & 0x1; 182 | *relative_excess += if bit == 1 { 1 } else { -1 }; 183 | 184 | if *relative_excess == 0 { 185 | return Ok(i); 186 | } 187 | } 188 | 189 | unreachable!() 190 | } else { 191 | *relative_excess += total_excess; 192 | Err(()) 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /src/trees/bp/lookup_query.rs: -------------------------------------------------------------------------------- 1 | //! This module provides a lookup table for 8-bit blocks of parenthesis, answering 2 | //! excess queries. The table contains the minimum and maximum excess of every possible 3 | //! block, and the answer to every possible relative excess query (-8 to 8). 4 | //! This module only works for 8 bit blocks, since 16 bit blocks are too large to 5 | //! efficiently store all 33 excess queries for every possible block. 6 | 7 | /// How big the lookup blocks are. 8 | pub(crate) const LOOKUP_BLOCK_SIZE: u64 = 8; 9 | 10 | /// Integer type holding the blocks of the parenthesis expression we look up at once 11 | type LookupBlockType = u8; 12 | 13 | /// Signed version of `LookupBlockType` 14 | type SignedLookupBlockType = i8; 15 | 16 | /// Maximum value that `LookupBlockType` can hold, stored in one size larger because we need to 17 | /// iterate up to and including it 18 | const LOOKUP_MAX_VALUE: u32 = u8::MAX as u32; 19 | 20 | /// Encoded fwd query results for all possible 8-bit blocks. 21 | /// The encoding reserves 10 bits for minimum and maximum excess (shifted by 8 bits so we don't have 22 | /// to dual-encode negative excess), and another 51 bits for all 17 queries that may end in this block 23 | /// (-8 to 8 relative excess). 24 | #[allow(long_running_const_eval)] 25 | const PAREN_BLOCK_LOOKUP_FWD: [u64; 1 << LOOKUP_BLOCK_SIZE] = calculate_lookup_table(true); 26 | 27 | /// Encoded bwd query results for all possible 8-bit blocks. 28 | /// The encoding reserves 10 bits for minimum and maximum excess (shifted by 8 bits so we don't have 29 | /// to dual-encode negative excess), and another 51 bits for all 17 queries that may end in this block 30 | /// (-8 to 8 relative excess). 31 | #[allow(long_running_const_eval)] 32 | const PAREN_BLOCK_LOOKUP_BWD: [u64; 1 << LOOKUP_BLOCK_SIZE] = calculate_lookup_table(false); 33 | 34 | /// Bitmask for one of the lookup values. 35 | const ENCODING_MASK: u64 = 0b11111; 36 | 37 | /// Where in the encoded bit pattern to store minimum excess 38 | const MINIMUM_EXCESS_POSITION: usize = 5; 39 | 40 | /// Where the encoded queries are stored in the encoded bit pattern 41 | const QUERY_BASE_POSITION: usize = 10; 42 | 43 | #[allow(clippy::cast_possible_truncation)] // we know that the values are within bounds 44 | #[allow(clippy::cast_sign_loss)] // we know that the values are within bounds 45 | const fn calculate_lookup_table(fwd: bool) -> [u64; 1 << LOOKUP_BLOCK_SIZE] { 46 | // initial sentinel values during excess computation 47 | const MORE_THAN_MAX: SignedLookupBlockType = (LOOKUP_BLOCK_SIZE + 1) as SignedLookupBlockType; 48 | const LESS_THAN_MIN: SignedLookupBlockType = -(LOOKUP_BLOCK_SIZE as SignedLookupBlockType) - 1; 49 | 50 | let mut lookup = [0; 1 << LOOKUP_BLOCK_SIZE]; 51 | let mut query_map = [-1i8; 17]; 52 | let mut v: u32 = 0; 53 | while v <= LOOKUP_MAX_VALUE { 54 | let mut minimum_excess = MORE_THAN_MAX; 55 | let mut maximum_excess = LESS_THAN_MIN; 56 | 57 | if fwd { 58 | calculate_values_fwd(v, &mut minimum_excess, &mut maximum_excess, &mut query_map); 59 | } else { 60 | calculate_values_bwd(v, &mut minimum_excess, &mut maximum_excess, &mut query_map); 61 | } 62 | 63 | let mut encoded: u64 = ((minimum_excess as i32 + LOOKUP_BLOCK_SIZE as i32) as u64 64 | & ENCODING_MASK) 65 | << MINIMUM_EXCESS_POSITION; 66 | encoded |= (maximum_excess as i32 + LOOKUP_BLOCK_SIZE as i32) as u64 & ENCODING_MASK; 67 | 68 | let mut relative_off = 0; 69 | while relative_off <= (LOOKUP_BLOCK_SIZE * 2) as usize { 70 | encoded |= ((query_map[relative_off] & 0b111) as u64) 71 | << (QUERY_BASE_POSITION + (relative_off * 3)) as u64; 72 | // reset query map to -1, so next block knows which queries are already answered 73 | query_map[relative_off] = -1; 74 | relative_off += 1; 75 | } 76 | 77 | lookup[v as usize] = encoded; 78 | v += 1; 79 | } 80 | 81 | lookup 82 | } 83 | 84 | #[allow(clippy::cast_possible_truncation)] // we know that the values are within bounds 85 | #[allow(clippy::cast_sign_loss)] // we know that the values are within bounds 86 | const fn calculate_values_fwd( 87 | v: u32, 88 | minimum_excess: &mut SignedLookupBlockType, 89 | maximum_excess: &mut SignedLookupBlockType, 90 | query_map: &mut [i8; 17], 91 | ) { 92 | let mut total_excess = 0; 93 | let mut i = 0; 94 | while i < LOOKUP_BLOCK_SIZE { 95 | if ((v >> i) & 1) == 1 { 96 | total_excess += 1; 97 | } else { 98 | total_excess -= 1; 99 | } 100 | 101 | *minimum_excess = min(*minimum_excess, total_excess); 102 | *maximum_excess = max(*maximum_excess, total_excess); 103 | 104 | if query_map[(total_excess + LOOKUP_BLOCK_SIZE as i8) as usize] == -1 { 105 | query_map[(total_excess + LOOKUP_BLOCK_SIZE as i8) as usize] = i as i8; 106 | } 107 | i += 1; 108 | } 109 | } 110 | 111 | #[allow(clippy::cast_possible_truncation)] // we know that the values are within bounds 112 | #[allow(clippy::cast_sign_loss)] // we know that the values are within bounds 113 | #[allow(clippy::cast_possible_wrap)] // we know that the values are within bounds 114 | const fn calculate_values_bwd( 115 | v: u32, 116 | minimum_excess: &mut SignedLookupBlockType, 117 | maximum_excess: &mut SignedLookupBlockType, 118 | query_map: &mut [i8; 17], 119 | ) { 120 | let mut total_excess = 0; 121 | let mut i = LOOKUP_BLOCK_SIZE as i64 - 1; 122 | while i >= 0 { 123 | if ((v >> i) & 1) == 1 { 124 | total_excess -= 1; 125 | } else { 126 | total_excess += 1; 127 | } 128 | 129 | *minimum_excess = min(*minimum_excess, total_excess); 130 | *maximum_excess = max(*maximum_excess, total_excess); 131 | 132 | if query_map[(total_excess + LOOKUP_BLOCK_SIZE as i8) as usize] == -1 { 133 | query_map[(total_excess + LOOKUP_BLOCK_SIZE as i8) as usize] = i as i8; 134 | } 135 | i -= 1; 136 | } 137 | } 138 | 139 | #[allow(clippy::cast_possible_truncation)] // we know that the table values are within bounds 140 | #[allow(clippy::cast_sign_loss)] // we know that the table values are within bounds 141 | #[allow(clippy::cast_possible_wrap)] // we know that the table values are within bounds 142 | const fn answer_query(value: u64, relative_excess: i64) -> u64 { 143 | debug_assert!(relative_excess.abs() <= LOOKUP_BLOCK_SIZE as i64); 144 | (value >> (QUERY_BASE_POSITION + ((relative_excess + 8) as usize * 3))) & 0b111 145 | } 146 | 147 | /// Obtain the minimum excess from an encoded 16 bit value from the lookup table 148 | #[allow(clippy::cast_possible_truncation)] // we know that the table values are within bounds 149 | #[allow(clippy::cast_sign_loss)] // we know that the table values are within bounds 150 | #[allow(clippy::cast_possible_wrap)] // we know that the table values are within bounds 151 | const fn get_minimum_excess(value: u64) -> i64 { 152 | ((value >> MINIMUM_EXCESS_POSITION) & ENCODING_MASK) as i64 - LOOKUP_BLOCK_SIZE as i64 153 | } 154 | 155 | /// Obtain the minimum excess from an encoded 16 bit value from the lookup table 156 | #[allow(clippy::cast_possible_truncation)] // we know that the table values are within bounds 157 | #[allow(clippy::cast_sign_loss)] // we know that the table values are within bounds 158 | #[allow(clippy::cast_possible_wrap)] // we know that the table values are within bounds 159 | const fn get_maximum_excess(value: u64) -> i64 { 160 | (value & ENCODING_MASK) as i64 - LOOKUP_BLOCK_SIZE as i64 161 | } 162 | 163 | /// Branchless const minimum computation for values that cannot overflow 164 | #[allow(clippy::cast_possible_truncation)] // we only call this with values that are within bounds 165 | #[allow(clippy::cast_sign_loss)] // we only call this with values that are within bounds 166 | #[allow(clippy::cast_possible_wrap)] // we only call this with values that are within bounds 167 | const fn min(a: SignedLookupBlockType, b: SignedLookupBlockType) -> SignedLookupBlockType { 168 | b + ((a - b) 169 | & -(((a - b) as LookupBlockType >> (LOOKUP_BLOCK_SIZE - 1)) as SignedLookupBlockType)) 170 | } 171 | 172 | /// Branchless const maximum computation for values that cannot overflow 173 | #[allow(clippy::cast_possible_truncation)] // we only call this with values that are within bounds 174 | #[allow(clippy::cast_sign_loss)] // we only call this with values that are within bounds 175 | #[allow(clippy::cast_possible_wrap)] // we only call this with values that are within bounds 176 | const fn max(a: SignedLookupBlockType, b: SignedLookupBlockType) -> SignedLookupBlockType { 177 | a - ((a - b) 178 | & -(((a - b) as LookupBlockType >> (LOOKUP_BLOCK_SIZE - 1)) as SignedLookupBlockType)) 179 | } 180 | 181 | /// Get the total excess of a block of eight parentheses 182 | #[inline(always)] 183 | fn lookup_total_excess(block: LookupBlockType) -> i64 { 184 | i64::from(block.count_ones()) - i64::from(block.count_zeros()) 185 | } 186 | 187 | /// Get the maximum excess of a block of eight parentheses 188 | #[inline(always)] 189 | fn lookup_maximum_excess(block: LookupBlockType) -> i64 { 190 | get_maximum_excess(PAREN_BLOCK_LOOKUP_FWD[block as usize]) 191 | } 192 | 193 | /// Get the minimum excess of a block of eight parentheses 194 | #[inline(always)] 195 | fn lookup_minimum_excess(block: LookupBlockType) -> i64 { 196 | get_minimum_excess(PAREN_BLOCK_LOOKUP_FWD[block as usize]) 197 | } 198 | 199 | #[inline(always)] 200 | pub(crate) fn process_block_fwd( 201 | block: LookupBlockType, 202 | relative_excess: &mut i64, 203 | ) -> Result { 204 | if *relative_excess <= lookup_maximum_excess(block) 205 | && lookup_minimum_excess(block) <= *relative_excess 206 | { 207 | Ok(answer_query( 208 | PAREN_BLOCK_LOOKUP_FWD[block as usize], 209 | *relative_excess, 210 | )) 211 | } else { 212 | *relative_excess -= lookup_total_excess(block); 213 | Err(()) 214 | } 215 | } 216 | 217 | #[inline(always)] 218 | pub(crate) fn process_block_bwd( 219 | block: LookupBlockType, 220 | relative_excess: &mut i64, 221 | ) -> Result { 222 | let total_excess = lookup_total_excess(block); 223 | if (*relative_excess + total_excess == 0) 224 | || (lookup_minimum_excess(block) <= *relative_excess + total_excess 225 | && *relative_excess + total_excess <= lookup_maximum_excess(block)) 226 | { 227 | Ok(answer_query( 228 | PAREN_BLOCK_LOOKUP_BWD[block as usize], 229 | *relative_excess, 230 | )) 231 | } else { 232 | *relative_excess += total_excess; 233 | Err(()) 234 | } 235 | } 236 | -------------------------------------------------------------------------------- /src/trees/mod.rs: -------------------------------------------------------------------------------- 1 | //! Tree data structures. Currently only the [BP][bp] tree is exposed. 2 | //! The trees are succinct, approaching the information-theoretic lower bound for the space complexity: 3 | //! They need O(n) bits to store a tree with n nodes, and theoretically o(n) extra bits to support queries. 4 | //! However, this is relaxed to O(n) with a factor smaller than 1 in practice. 5 | //! 6 | //! For details, see the submodules. 7 | 8 | pub mod bp; 9 | 10 | pub(crate) mod mmt; 11 | 12 | /// A trait for succinct tree data structures defining the most basic tree navigation operations. 13 | pub trait Tree { 14 | /// A type that represents a node during tree navigation. Note that the handle is not necessarily 15 | /// a contiguous index. 16 | type NodeHandle; 17 | 18 | /// Returns the root node of the tree, if the tree isn't empty. 19 | /// If the tree is unbalanced, the result is meaningless. 20 | fn root(&self) -> Option; 21 | 22 | /// Returns the parent of a node, if it exists. 23 | /// If `node` is not a valid node handle, the result is meaningless. 24 | fn parent(&self, node: Self::NodeHandle) -> Option; 25 | 26 | /// Returns the left child of a node, if it exists. 27 | /// If `node` is not a valid node handle, the result is meaningless. 28 | fn first_child(&self, node: Self::NodeHandle) -> Option; 29 | 30 | /// Returns the left sibling of a node, if it exists. 31 | /// If `node` is not a valid node handle, the result is meaningless. 32 | fn next_sibling(&self, node: Self::NodeHandle) -> Option; 33 | 34 | /// Returns the right sibling of a node, if it exists. 35 | /// If `node` is not a valid node handle, the result is meaningless. 36 | fn previous_sibling(&self, node: Self::NodeHandle) -> Option; 37 | 38 | /// Returns the rightmost child of a node, if it exists. 39 | /// If `node` is not a valid node handle, the result is meaningless. 40 | fn last_child(&self, node: Self::NodeHandle) -> Option; 41 | 42 | /// Convert a node handle into a contiguous index, allowing associated data to be stored in a vector. 43 | /// If `node` is not a valid node handle, the result is meaningless. 44 | fn node_index(&self, node: Self::NodeHandle) -> usize; 45 | 46 | /// Convert a contiguous index that enumerates all nodes into a node handle. 47 | /// This operation is the inverse of `node_index`. 48 | /// The index must be in the range `0..self.size()`. 49 | /// 50 | /// If the index is out of bounds, the behavior is unspecified. 51 | fn node_handle(&self, index: usize) -> Self::NodeHandle; 52 | 53 | /// Returns true if the node is a leaf. 54 | /// If `node` is not a valid node handle, the result is meaningless. 55 | fn is_leaf(&self, node: Self::NodeHandle) -> bool; 56 | 57 | /// Returns the depth of the node in the tree. 58 | /// The root node has depth 0. 59 | /// If `node` is not a valid node handle, the result is meaningless. 60 | /// 61 | /// If the tree is unbalanced, the result is zero for nodes that are preceded by too many closing 62 | /// parenthesis. 63 | fn depth(&self, node: Self::NodeHandle) -> u64; 64 | 65 | /// Returns the number of nodes in the tree. 66 | fn size(&self) -> usize; 67 | 68 | /// Returns true, if the tree has no nodes. 69 | fn is_empty(&self) -> bool { 70 | self.size() == 0 71 | } 72 | } 73 | 74 | /// A trait for succinct tree data structures that support [`subtree_size`] queries. 75 | /// 76 | /// [`subtree_size`]: SubtreeSize::subtree_size 77 | pub trait SubtreeSize: Tree { 78 | /// Returns the number of nodes in the subtree rooted at the given node. 79 | /// This includes the node itself, meaning the minimum subtree size is 1. 80 | /// If the function is called on an invalid node handle, the result is meaningless. 81 | /// 82 | /// Returns `None` if the `node` has no closing parenthesis (in an unbalanced parenthesis 83 | /// expression). 84 | fn subtree_size(&self, node: Self::NodeHandle) -> Option; 85 | } 86 | 87 | /// A trait for succinct tree data structures that support [`is_ancestor`] queries. 88 | /// 89 | /// [`is_ancestor`]: IsAncestor::is_ancestor 90 | pub trait IsAncestor: Tree { 91 | /// Returns true if `ancestor` is an ancestor of the `descendant` node. 92 | /// Note that a node is considered an ancestor of itself. 93 | /// 94 | /// Returns `None` if the parenthesis expression is unbalanced and `ancestor` does not have a 95 | /// closing parenthesis. 96 | fn is_ancestor(&self, ancestor: Self::NodeHandle, descendant: Self::NodeHandle) 97 | -> Option; 98 | } 99 | 100 | /// A trait for succinct tree data structures that support level-order traversal. 101 | pub trait LevelTree: Tree { 102 | /// Returns the `level`'th ancestor of the given node, if it exists. If the level is 0, `node` 103 | /// is returned. If `node` is not a valid node handle, the result is meaningless. 104 | fn level_ancestor(&self, node: Self::NodeHandle, level: u64) -> Option; 105 | 106 | /// Returns the next node in the level order traversal of the tree, if it exists. 107 | fn level_next(&self, node: Self::NodeHandle) -> Option; 108 | 109 | /// Returns the previous node in the level order traversal of the tree, if it exists. 110 | fn level_prev(&self, node: Self::NodeHandle) -> Option; 111 | 112 | /// Returns the leftmost node at the given level, if it exists. 113 | fn level_leftmost(&self, level: u64) -> Option; 114 | 115 | /// Returns the rightmost node at the given level, if it exists. 116 | fn level_rightmost(&self, level: u64) -> Option; 117 | } 118 | 119 | /// This trait provides the functionality to build a tree by visiting its nodes in depth first 120 | /// search order. The caller should call [`enter_node`] for each node visited in pre-order depth-first 121 | /// traversal, and [`leave_node`] once the node's subtree was visited (i.e. post-order). 122 | /// 123 | /// Once the full tree has been visited, the caller must call [`build`] to create an instance of the 124 | /// implementing tree type. 125 | pub trait TreeBuilder { 126 | /// The tree type constructed with this interface 127 | type Tree; 128 | 129 | /// Called to create a new node in the tree builder 130 | fn enter_node(&mut self); 131 | 132 | /// Called after the subtree of a node in the tree has already been visited. 133 | fn leave_node(&mut self); 134 | 135 | /// Finalize the tree instance. 136 | /// 137 | /// # Errors 138 | /// Returns `Err(excess)` if the constructed tree is invalid 139 | /// (i.e. there are nodes for which [`leave_node`] has not been called, 140 | /// or there are more calls to `leave_node` than to [`enter_node`]; 141 | /// the number of extraneous calls to `enter_node` is returned in the error). 142 | fn build(self) -> Result; 143 | } 144 | -------------------------------------------------------------------------------- /src/util/elias_fano_iter.rs: -------------------------------------------------------------------------------- 1 | macro_rules! gen_ef_iter_impl { 2 | ($($life:lifetime, )? $name:ident, $converter:ident) => { 3 | impl $(<$life>)? $name $(<$life>)? { 4 | /// Advances the iterator by `n` elements. Returns an error if the iterator does not have 5 | /// enough elements left. Does not call `next` internally. 6 | /// This method is currently being added to the iterator trait, see 7 | /// [this issue](https://github.com/rust-lang/rust/issues/77404). 8 | /// As soon as it is stabilized, this method will be removed and replaced with a custom 9 | /// implementation in the iterator impl. 10 | fn advance_by(&mut self, n: usize) -> Result<(), std::num::NonZeroUsize> { 11 | if n == 0 { 12 | return Ok(()); 13 | } 14 | 15 | if Some(self.index + n - 1) > self.back_index { 16 | if Some(self.index) > self.back_index { 17 | Err(std::num::NonZeroUsize::new(n).unwrap()) 18 | } else { 19 | Err(std::num::NonZeroUsize::new(n - (self.back_index.as_ref().unwrap_or(&usize::MAX).wrapping_sub(self.index).wrapping_add(1))).unwrap()) 20 | } 21 | } else { 22 | self.index += n; 23 | if n > 0 { 24 | // since advance_by is not stable yet, we need to call nth - 1. 25 | self.upper_iter.nth(n - 1).expect("upper iterator should not be exhausted"); 26 | } 27 | Ok(()) 28 | } 29 | } 30 | 31 | /// Advances the back iterator by `n` elements. Returns an error if the iterator does not have 32 | /// enough elements left. Does not call `next` internally. 33 | /// This method is currently being added to the iterator trait, see 34 | /// [this issue](https://github.com/rust-lang/rust/issues/77404). 35 | /// As soon as it is stabilized, this method will be removed and replaced with a custom 36 | /// implementation in the iterator impl. 37 | fn advance_back_by(&mut self, n: usize) -> Result<(), std::num::NonZeroUsize> { 38 | if n == 0 { 39 | return Ok(()); 40 | } 41 | 42 | // special case this, because otherwise back_index might be None and we would panic 43 | if self.is_iter_empty() { 44 | return Err(std::num::NonZeroUsize::new(n).unwrap()); 45 | } 46 | 47 | // since the cursors point to unconsumed items, we need to add 1 48 | let remaining = *self.back_index.as_ref().unwrap() - self.index + 1; 49 | if remaining < n { 50 | return Err(std::num::NonZeroUsize::new(n - remaining).unwrap()); 51 | } 52 | self.back_index = if self.back_index >= Some(n) { self.back_index.map(|b| b - n) } else { None }; 53 | if n > 0 { 54 | // since advance_by is not stable yet, we need to call nth - 1. 55 | self.upper_iter.nth_back(n - 1).expect("upper iterator should not be exhausted"); 56 | } 57 | Ok(()) 58 | } 59 | 60 | fn is_iter_empty(&self) -> bool { 61 | // this is legal because Ord is behaving as expected on Option 62 | Some(self.index) > self.back_index 63 | } 64 | } 65 | 66 | impl $(<$life>)? Iterator for $name $(<$life>)? { 67 | type Item = u64; 68 | 69 | fn next(&mut self) -> Option { 70 | if let Some(upper) = self.upper_iter.next() { 71 | let upper = upper - self.index - 1; 72 | let lower = self 73 | .vec 74 | .get_bits_unchecked(self.index * self.lower_len, self.lower_len); 75 | self.index += 1; 76 | Some((((upper as u64) << self.lower_len) | lower) + self.universe_zero) 77 | } else { 78 | None 79 | } 80 | } 81 | 82 | /// Returns the number of elements that this iterator will iterate over. The size is 83 | /// precise. 84 | fn size_hint(&self) -> (usize, Option) { 85 | (self.len(), Some(self.len())) 86 | } 87 | 88 | /// Returns the exact number of elements that this iterator would iterate over. Does not 89 | /// call `next` internally. 90 | fn count(self) -> usize 91 | where 92 | Self: Sized, 93 | { 94 | self.len() 95 | } 96 | 97 | /// Returns the last element of the iterator. Does not call `next` internally. 98 | fn last(self) -> Option 99 | where 100 | Self: Sized, 101 | { 102 | if self.is_iter_empty() { 103 | return None; 104 | } 105 | 106 | let upper = self.upper_iter.last().unwrap() - self.back_index.unwrap() - 1; 107 | let lower = self 108 | .vec 109 | .get_bits_unchecked(self.back_index.unwrap() * self.lower_len, self.lower_len); 110 | Some(((upper as u64) << self.lower_len) | lower) 111 | } 112 | 113 | /// Returns the nth element of the iterator. Does not call `next` internally, but advances 114 | /// the iterator by `n` elements. 115 | fn nth(&mut self, n: usize) -> Option { 116 | self.advance_by(n).ok()?; 117 | self.next() 118 | } 119 | 120 | /// Returns the minimum remaining element of the iterator. 121 | /// Operates in constant time, because Elias-Fano vectors are sorted. 122 | fn min(mut self) -> Option 123 | where 124 | Self: Sized, 125 | Self::Item: Ord, 126 | { 127 | self.next() 128 | } 129 | 130 | /// Returns the maximum remaining element of the iterator. Operates in constant time, 131 | /// because Elias-Fano vectors are sorted. 132 | fn max(self) -> Option 133 | where 134 | Self: Sized, 135 | Self::Item: Ord, 136 | { 137 | self.last() 138 | } 139 | } 140 | 141 | impl $(<$life>)? std::iter::ExactSizeIterator for $name $(<$life>)? { 142 | fn len(&self) -> usize { 143 | // intentionally overflowing calculations to avoid branches on empty iterator 144 | (*self.back_index.as_ref().unwrap_or(&usize::MAX)).wrapping_sub(self.index).wrapping_add(1) 145 | } 146 | } 147 | 148 | impl $(<$life>)? std::iter::FusedIterator for $name $(<$life>)? {} 149 | 150 | impl $(<$life>)? std::iter::DoubleEndedIterator for $name $(<$life>)? { 151 | fn next_back(&mut self) -> Option { 152 | if let Some(upper) = self.upper_iter.next_back() { 153 | let index_back = self.back_index.unwrap(); 154 | let upper = upper - index_back - 1; 155 | let lower = self 156 | .vec 157 | .get_bits_unchecked(index_back * self.lower_len, self.lower_len); 158 | if *self.back_index.as_ref().unwrap() == 0 { 159 | self.back_index = None; 160 | } else { 161 | self.back_index = Some(self.back_index.unwrap() - 1); 162 | } 163 | Some((((upper as u64) << self.lower_len) | lower) + self.universe_zero) 164 | } else { 165 | None 166 | } 167 | } 168 | 169 | fn nth_back(&mut self, n: usize) -> Option { 170 | self.advance_back_by(n).ok()?; 171 | self.next_back() 172 | } 173 | } 174 | }; 175 | } 176 | 177 | macro_rules! impl_ef_iterator { 178 | ($own:ident, $bor:ident) => { 179 | #[doc = concat!("An owning iterator for `", stringify!($type), "`.")] 180 | #[doc = concat!("This struct is created by the `into_iter` trait implementation of `", stringify!($type), "`.")] 181 | #[derive(Clone, Debug)] 182 | pub struct $own { 183 | upper_iter: crate::bit_vec::fast_rs_vec::SelectIntoIter, 184 | vec: crate::bit_vec::BitVec, 185 | index: usize, 186 | // back index is none, iff it points to element -1 (i.e. element 0 has been consumed by 187 | // a call to next_back()). It can be Some(..) even if the iterator is empty 188 | back_index: Option, 189 | lower_len: usize, 190 | universe_zero: u64, 191 | } 192 | 193 | impl $own { 194 | #[must_use] 195 | fn new(vec: crate::elias_fano::EliasFanoVec) -> Self { 196 | if vec.is_empty() { 197 | return Self { 198 | upper_iter: vec.upper_vec.into_iter1(), 199 | vec: vec.lower_vec, 200 | index: 0, 201 | back_index: None, 202 | lower_len: vec.lower_len, 203 | universe_zero: vec.universe_zero, 204 | }; 205 | } 206 | 207 | let last = vec.len - 1; 208 | Self { 209 | upper_iter: vec.upper_vec.into_iter1(), 210 | vec: vec.lower_vec, 211 | index: 0, 212 | back_index: Some(last), 213 | lower_len: vec.lower_len, 214 | universe_zero: vec.universe_zero, 215 | } 216 | } 217 | } 218 | 219 | impl EliasFanoVec { 220 | #[doc = concat!("Returns an iterator over the elements of `", stringify!($type), "`.")] 221 | #[must_use] 222 | pub fn iter(&self) -> $bor<'_> { 223 | $bor::new(self) 224 | } 225 | } 226 | 227 | #[doc = concat!("A borrowing iterator for `", stringify!($type), "`.")] 228 | #[doc = concat!("This struct is created by the `iter` method of `", stringify!($type), "`.")] 229 | #[derive(Clone, Debug)] 230 | pub struct $bor<'a> { 231 | upper_iter: crate::bit_vec::fast_rs_vec::SelectIter<'a, false>, 232 | vec: &'a crate::bit_vec::BitVec, 233 | index: usize, 234 | // back index is none, iff it points to element -1 (i.e. element 0 has been consumed by 235 | // a call to next_back()). It can be Some(..) even if the iterator is empty 236 | back_index: Option, 237 | lower_len: usize, 238 | universe_zero: u64, 239 | } 240 | 241 | impl<'a> $bor<'a> { 242 | #[must_use] 243 | fn new(vec: &'a crate::elias_fano::EliasFanoVec) -> Self { 244 | if vec.is_empty() { 245 | return Self { 246 | upper_iter: vec.upper_vec.iter1(), 247 | vec: &vec.lower_vec, 248 | index: 0, 249 | back_index: None, 250 | lower_len: vec.lower_len, 251 | universe_zero: vec.universe_zero, 252 | }; 253 | } 254 | 255 | let last = vec.len - 1; 256 | Self { 257 | upper_iter: vec.upper_vec.iter1(), 258 | vec: &vec.lower_vec, 259 | index: 0, 260 | back_index: Some(last), 261 | lower_len: vec.lower_len, 262 | universe_zero: vec.universe_zero, 263 | } 264 | } 265 | } 266 | 267 | crate::util::impl_into_iterator_impls!(EliasFanoVec, $own, $bor); 268 | 269 | crate::util::gen_ef_iter_impl!($own, into_iter1); 270 | 271 | crate::util::gen_ef_iter_impl!('a, $bor, iter1); 272 | }; 273 | } 274 | 275 | pub(crate) use gen_ef_iter_impl; 276 | pub(crate) use impl_ef_iterator; 277 | -------------------------------------------------------------------------------- /src/util/general_iter.rs: -------------------------------------------------------------------------------- 1 | // This macro generates the implementations for the iterator trait and relevant other traits for the 2 | // vector types. 3 | macro_rules! gen_vector_iter_impl { 4 | ($($life:lifetime, )? $name:ident, $type:ty, $item:ty, $get_unchecked:ident, $get:ident) => { 5 | impl $(<$life>)? $name $(<$life>)? { 6 | #[must_use] 7 | fn new(vec: $(&$life)? $type) -> Self { 8 | if vec.is_empty() { 9 | return Self { 10 | vec, 11 | index: 0, 12 | back_index: None, 13 | }; 14 | } 15 | 16 | let last = vec.len() - 1; 17 | Self { 18 | vec, 19 | index: 0, 20 | back_index: Some(last), 21 | } 22 | } 23 | 24 | /// Advances the iterator by `n` elements. Returns an error if the iterator does not have 25 | /// enough elements left. Does not call `next` internally. 26 | /// This method is currently being added to the iterator trait, see 27 | /// [this issue](https://github.com/rust-lang/rust/issues/77404). 28 | /// As soon as it is stabilized, this method will be removed and replaced with a custom 29 | /// implementation in the iterator impl. 30 | fn advance_by(&mut self, n: usize) -> Result<(), std::num::NonZeroUsize> { 31 | if n == 0 { 32 | return Ok(()); 33 | } 34 | 35 | if Some(self.index + n - 1) > self.back_index { 36 | if Some(self.index) > self.back_index { 37 | Err(std::num::NonZeroUsize::new(n).unwrap()) 38 | } else { 39 | Err(std::num::NonZeroUsize::new(n - (self.back_index.as_ref().unwrap_or(&usize::MAX).wrapping_sub(self.index).wrapping_add(1))).unwrap()) 40 | } 41 | } else { 42 | self.index += n; 43 | Ok(()) 44 | } 45 | } 46 | 47 | /// Advances the back iterator by `n` elements. Returns an error if the iterator does not have 48 | /// enough elements left. Does not call `next` internally. 49 | /// This method is currently being added to the iterator trait, see 50 | /// [this issue](https://github.com/rust-lang/rust/issues/77404). 51 | /// As soon as it is stabilized, this method will be removed and replaced with a custom 52 | /// implementation in the iterator impl. 53 | fn advance_back_by(&mut self, n: usize) -> Result<(), std::num::NonZeroUsize> { 54 | if n == 0 { 55 | return Ok(()); 56 | } 57 | 58 | // special case this, because otherwise back_index might be None and we would panic 59 | if self.is_iter_empty() { 60 | return Err(std::num::NonZeroUsize::new(n).unwrap()); 61 | } 62 | 63 | // since the cursors point to unconsumed items, we need to add 1 64 | let remaining = *self.back_index.as_ref().unwrap() - self.index + 1; 65 | if remaining < n { 66 | return Err(std::num::NonZeroUsize::new(n - remaining).unwrap()); 67 | } 68 | self.back_index = if self.back_index >= Some(n) { self.back_index.map(|b| b - n) } else { None }; 69 | Ok(()) 70 | } 71 | 72 | fn is_iter_empty(&self) -> bool { 73 | // this is legal because Ord is behaving as expected on Option 74 | Some(self.index) > self.back_index 75 | } 76 | } 77 | 78 | impl $(<$life>)? Iterator for $name $(<$life>)? { 79 | type Item = $item; 80 | 81 | fn next(&mut self) -> Option { 82 | if self.is_iter_empty() { 83 | return None; 84 | } 85 | self.vec.$get(self.index).map(|v| { 86 | self.index += 1; 87 | v 88 | }) 89 | } 90 | 91 | /// Returns the number of elements that this iterator will iterate over. The size is 92 | /// precise. 93 | fn size_hint(&self) -> (usize, Option) { 94 | (self.len(), Some(self.len())) 95 | } 96 | 97 | /// Returns the exact number of elements that this iterator would iterate over. Does not 98 | /// call `next` internally. 99 | fn count(self) -> usize 100 | where 101 | Self: Sized, 102 | { 103 | self.len() 104 | } 105 | 106 | /// Returns the last element of the iterator. Does not call `next` internally. 107 | fn last(self) -> Option 108 | where 109 | Self: Sized, 110 | { 111 | if self.is_iter_empty() { 112 | return None; 113 | } 114 | 115 | Some(self.vec.$get_unchecked(*self.back_index.as_ref().unwrap())) 116 | } 117 | 118 | /// Returns the nth element of the iterator. Does not call `next` internally, but advances 119 | /// the iterator by `n` elements. 120 | fn nth(&mut self, n: usize) -> Option { 121 | self.advance_by(n).ok()?; 122 | self.next() 123 | } 124 | } 125 | 126 | impl $(<$life>)? std::iter::ExactSizeIterator for $name $(<$life>)? { 127 | fn len(&self) -> usize { 128 | // intentionally overflowing calculations to avoid branches on empty iterator 129 | (*self.back_index.as_ref().unwrap_or(&usize::MAX)).wrapping_sub(self.index).wrapping_add(1) 130 | } 131 | } 132 | 133 | impl $(<$life>)? std::iter::FusedIterator for $name $(<$life>)? {} 134 | 135 | impl $(<$life>)? std::iter::DoubleEndedIterator for $name $(<$life>)? { 136 | fn next_back(&mut self) -> Option { 137 | if Some(self.index) > self.back_index { 138 | return None; 139 | } 140 | self.vec.$get(*self.back_index.as_ref().unwrap()).map(|v| { 141 | if *self.back_index.as_ref().unwrap() == 0 { 142 | self.back_index = None; 143 | } else { 144 | self.back_index = Some(self.back_index.unwrap() - 1); 145 | } 146 | v 147 | }) 148 | } 149 | 150 | fn nth_back(&mut self, n: usize) -> Option { 151 | self.advance_back_by(n).ok()?; 152 | self.next_back() 153 | } 154 | } 155 | }; 156 | } 157 | 158 | /// Internal macro to implement iterators for the vector types. 159 | /// The macro accepts the name of the data structure as its first mandatory argument. 160 | /// It then expects the identifiers for the iterator types 161 | /// It generates three `IntoIterator` implementations for the vector type. 162 | /// 163 | /// It expects that the iterator type has a constructor named `new` that takes only a 164 | /// reference to / value of the data structure and returns an iterator. 165 | /// 166 | /// This macro is used by all vector types including `EliasFanoVec` 167 | /// 168 | /// The macro generates the following items: 169 | /// - An `impl` block for `VecType` that implements `IntoIterator` for `VecType`. 170 | /// - An `impl` block for `&VecType` that implements `IntoIterator` for `&VecType`. 171 | /// - An `impl` block for `&mut VecType` that implements `IntoIterator` for `&mut VecType`. 172 | macro_rules! impl_into_iterator_impls { 173 | ($type:ty, $own:ident, $bor:ident) => { 174 | crate::util::impl_into_iterator_impls! { $type, $own, $bor, u64 } 175 | }; 176 | ($type:ty, $own:ident, $bor:ident, $element_type:ty) => { 177 | impl IntoIterator for $type { 178 | type Item = $element_type; 179 | type IntoIter = $own; 180 | 181 | fn into_iter(self) -> Self::IntoIter { 182 | $own::new(self) 183 | } 184 | } 185 | 186 | impl<'a> IntoIterator for &'a $type { 187 | type Item = $element_type; 188 | type IntoIter = $bor<'a>; 189 | 190 | fn into_iter(self) -> Self::IntoIter { 191 | $bor::new(self) 192 | } 193 | } 194 | 195 | // we allow into iter on mutable references for ease of use, 196 | // but an iter_mut() function on an immutable data structure would be nonsensical 197 | #[allow(clippy::into_iter_without_iter)] 198 | impl<'a> IntoIterator for &'a mut $type { 199 | type Item = $element_type; 200 | type IntoIter = $bor<'a>; 201 | 202 | fn into_iter(self) -> Self::IntoIter { 203 | $bor::new(self) 204 | } 205 | } 206 | }; 207 | } 208 | 209 | /// Internal macro to implement iterators for vector types. 210 | /// This macro accepts more patterns than it should, but it isn't exported. 211 | /// The macro accepts the name of the vector type as its first mandatory argument. 212 | /// It then expects two identifiers for the two iterator types. 213 | /// 214 | /// It then optionally accepts two identifiers for the getter functions that should be used, and 215 | /// a type for the return value of the getter functions. 216 | /// If not provided, it defaults to `get_unchecked` and `get` as the function names and `u64` as 217 | /// the return type. 218 | /// 219 | /// If the optional parameters are supplied, it also accepts an optional token "special", which 220 | /// generates the iterators but not the `iter` and `into_iter` functions. 221 | /// This way the macro can be used to generate specialized iterators that are constructed 222 | /// differently. 223 | /// 224 | /// The macro expects the vector type to implement a function called `len()`. 225 | /// 226 | /// This macro is not used for the `EliasFanoVec`, because that exploits internal structure for faster 227 | /// iteration, while this macro just calls `get()` repeatedly 228 | /// 229 | /// The macro generates the following items: 230 | /// - A struct named `VecTypeIter` that implements `Iterator` for `VecType`. 231 | /// - A struct named `VecTypeRefIter` that implements `Iterator` for `&VecType` and `$mut VecType`. 232 | macro_rules! impl_vector_iterator { 233 | ($type:ty, $own:ident, $bor:ident) => { impl_vector_iterator! { $type, $own, $bor, get_unchecked, get, u64 } }; 234 | ($type:ty, $own:ident, $bor:ident, $get_unchecked:ident, $get:ident, $return_type:ty, special) => { 235 | #[doc = concat!("An owning iterator for `", stringify!($type), "`.")] 236 | #[derive(Clone, Debug)] 237 | pub struct $own { 238 | vec: $type, 239 | index: usize, 240 | // back index is none, iff it points to element -1 (i.e. element 0 has been consumed by 241 | // a call to next_back()). It can be Some(..) even if the iterator is empty 242 | back_index: Option, 243 | } 244 | 245 | #[doc = concat!("A borrowing iterator for `", stringify!($type), "`.")] 246 | #[derive(Clone, Debug)] 247 | pub struct $bor<'a> { 248 | vec: &'a $type, 249 | index: usize, 250 | // back index is none, iff it points to element -1 (i.e. element 0 has been consumed by 251 | // a call to next_back()). It can be Some(..) even if the iterator is empty 252 | back_index: Option, 253 | } 254 | 255 | crate::util::gen_vector_iter_impl!($own, $type, $return_type, $get_unchecked, $get); 256 | 257 | crate::util::gen_vector_iter_impl!('a, $bor, $type, $return_type, $get_unchecked, $get); 258 | }; 259 | ($type:ty, $own:ident, $bor:ident, $get_unchecked:ident, $get:ident, $return_type:ty) => { 260 | impl_vector_iterator! { $type, $own, $bor, $get_unchecked, $get, $return_type, special } 261 | 262 | impl $type { 263 | #[doc = concat!("Returns an iterator over the elements of `", stringify!($type), "`.")] 264 | #[doc = concat!("The iterator returns `", stringify!($return_type), "` elements.")] 265 | #[must_use] 266 | pub fn iter(&self) -> $bor<'_> { 267 | $bor::new(self) 268 | } 269 | } 270 | 271 | crate::util::impl_into_iterator_impls!($type, $own, $bor, $return_type); 272 | } 273 | } 274 | 275 | pub(crate) use gen_vector_iter_impl; 276 | pub(crate) use impl_into_iterator_impls; 277 | pub(crate) use impl_vector_iterator; 278 | -------------------------------------------------------------------------------- /src/util/mod.rs: -------------------------------------------------------------------------------- 1 | pub(crate) mod elias_fano_iter; 2 | pub(crate) mod general_iter; 3 | pub(crate) mod pdep; 4 | pub(crate) mod unroll; 5 | 6 | // reexport all macros at toplevel for convenience 7 | pub(crate) use elias_fano_iter::gen_ef_iter_impl; 8 | pub(crate) use elias_fano_iter::impl_ef_iterator; 9 | pub(crate) use general_iter::gen_vector_iter_impl; 10 | pub(crate) use general_iter::impl_into_iterator_impls; 11 | pub(crate) use general_iter::impl_vector_iterator; 12 | pub(crate) use unroll::unroll; 13 | -------------------------------------------------------------------------------- /src/util/pdep.rs: -------------------------------------------------------------------------------- 1 | //! Parallel bits deposit intrinsics for all platforms. 2 | //! Uses the `PDEP` instruction on `x86`/`x86_64` platforms with the `bmi2` feature enabled. 3 | 4 | // bit manipulation generally doesn't care about sign, so the caller is aware of the consequences 5 | #![allow(clippy::cast_sign_loss)] 6 | #![allow(clippy::cast_possible_wrap)] 7 | 8 | // This file is part of the `bitintr` crate and is licensed under the terms of the MIT license. 9 | // Since this crate is dual-licensed, you may choose to use this file under either the MIT license 10 | // or the Apache License, Version 2.0, at your option (in compliance with the terms of the MIT license). 11 | // 12 | // Since the `bitintr` crate is abandoned, and the version on crates.io is outdated, 13 | // the contents of this file are copied from the `bitintr` crate from the files 14 | // `src/pdep.rs`, `src/macros.rs` and `src/lib.rs` at commit `6c49e01`. 15 | // The code is functionally identical to the original code, with only minor edits to make it 16 | // self-contained and update some documentation. 17 | // None of the utils here are publicly exposed. 18 | 19 | mod arch { 20 | #[cfg(all(target_arch = "x86", target_feature = "bmi2"))] 21 | pub use core::arch::x86::*; 22 | 23 | #[cfg(all(target_arch = "x86_64", target_feature = "bmi2"))] 24 | pub use core::arch::x86_64::*; 25 | } 26 | 27 | /// Parallel bits deposit 28 | pub trait Pdep { 29 | /// Parallel bits deposit. 30 | /// 31 | /// Scatter contiguous low order bits of `x` to the result at the positions 32 | /// specified by the `mask`. 33 | /// 34 | /// All other bits (bits not set in the `mask`) of the result are set to 35 | /// zero. 36 | /// 37 | /// **Keywords**: Parallel bits deposit, scatter bits. 38 | /// 39 | /// # Instructions 40 | /// 41 | /// - [`PDEP`](http://www.felixcloutier.com/x86/PDEP.html): 42 | /// - Description: Parallel bits deposit. 43 | /// - Architecture: x86. 44 | /// - Instruction set: BMI2. 45 | /// - Registers: 32/64 bit. 46 | /// ``` 47 | fn pdep(self, mask: Self) -> Self; 48 | } 49 | 50 | macro_rules! impl_all { 51 | ($impl_macro:ident: $($id:ident),*) => { 52 | $( 53 | $impl_macro!($id); 54 | )* 55 | } 56 | } 57 | 58 | macro_rules! cfg_if { 59 | // match if/else chains with a final `else` 60 | ($( 61 | if #[cfg($($meta:meta),*)] { $($it:item)* } 62 | ) else * else { 63 | $($it2:item)* 64 | }) => { 65 | cfg_if! { 66 | @__items 67 | () ; 68 | $( ( ($($meta),*) ($($it)*) ), )* 69 | ( () ($($it2)*) ), 70 | } 71 | }; 72 | 73 | // match if/else chains lacking a final `else` 74 | ( 75 | if #[cfg($($i_met:meta),*)] { $($i_it:item)* } 76 | $( 77 | else if #[cfg($($e_met:meta),*)] { $($e_it:item)* } 78 | )* 79 | ) => { 80 | cfg_if! { 81 | @__items 82 | () ; 83 | ( ($($i_met),*) ($($i_it)*) ), 84 | $( ( ($($e_met),*) ($($e_it)*) ), )* 85 | ( () () ), 86 | } 87 | }; 88 | 89 | // Internal and recursive macro to emit all the items 90 | // 91 | // Collects all the negated cfgs in a list at the beginning and after the 92 | // semicolon is all the remaining items 93 | (@__items ($($not:meta,)*) ; ) => {}; 94 | (@__items ($($not:meta,)*) ; ( ($($m:meta),*) ($($it:item)*) ), $($rest:tt)*) => { 95 | // Emit all items within one block, applying an approprate #[cfg]. The 96 | // #[cfg] will require all `$m` matchers specified and must also negate 97 | // all previous matchers. 98 | cfg_if! { @__apply cfg(all($($m,)* not(any($($not),*)))), $($it)* } 99 | 100 | // Recurse to emit all other items in `$rest`, and when we do so add all 101 | // our `$m` matchers to the list of `$not` matchers as future emissions 102 | // will have to negate everything we just matched as well. 103 | cfg_if! { @__items ($($not,)* $($m,)*) ; $($rest)* } 104 | }; 105 | 106 | // Internal macro to Apply a cfg attribute to a list of items 107 | (@__apply $m:meta, $($it:item)*) => { 108 | $(#[$m] $it)* 109 | }; 110 | } 111 | 112 | macro_rules! pdep_impl { 113 | ($ty:ty) => { 114 | #[inline] 115 | fn pdep_(value: $ty, mut mask: $ty) -> $ty { 116 | let mut res = 0; 117 | let mut bb: $ty = 1; 118 | loop { 119 | if mask == 0 { 120 | break; 121 | } 122 | if (value & bb) != 0 { 123 | res |= mask & mask.wrapping_neg(); 124 | } 125 | mask &= mask - 1; 126 | bb = bb.wrapping_add(bb); 127 | } 128 | res 129 | } 130 | }; 131 | ($ty:ty, $intr:ident) => { 132 | cfg_if! { 133 | if #[cfg(all( 134 | any(target_arch = "x86", target_arch = "x86_64"), 135 | target_feature = "bmi2" 136 | ))] { 137 | #[inline] 138 | #[target_feature(enable = "bmi2")] 139 | unsafe fn pdep_(value: $ty, mask: $ty) -> $ty { 140 | crate::util::pdep::arch::$intr( 141 | value as _, 142 | mask as _, 143 | ) as _ 144 | } 145 | } else { 146 | pdep_impl!($ty); 147 | } 148 | 149 | } 150 | }; 151 | } 152 | 153 | macro_rules! impl_pdep { 154 | ($id:ident $(,$args:ident)*) => { 155 | impl Pdep for $id { 156 | #[inline] 157 | #[allow(unused_unsafe)] 158 | fn pdep(self, mask: Self) -> Self { 159 | pdep_impl!($id $(,$args)*); 160 | // UNSAFETY: this is always safe, because 161 | // the unsafe `#[target_feature]` function 162 | // is only generated when the feature is 163 | // statically-enabled at compile-time. 164 | unsafe { pdep_(self, mask) } 165 | } 166 | } 167 | } 168 | } 169 | 170 | impl_all!(impl_pdep: u8, u16, i8, i16); 171 | 172 | cfg_if! { 173 | if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { 174 | impl_pdep!(u32, _pdep_u32); 175 | impl_pdep!(i32, _pdep_u32); 176 | cfg_if! { 177 | if #[cfg(target_arch = "x86_64")] { 178 | impl_pdep!(u64, _pdep_u64); 179 | impl_pdep!(i64, _pdep_u64); 180 | } else { 181 | impl_all!(impl_pdep: i64, u64); 182 | } 183 | } 184 | } else { 185 | impl_all!(impl_pdep: u32, i32, i64, u64); 186 | } 187 | } 188 | -------------------------------------------------------------------------------- /src/util/unroll.rs: -------------------------------------------------------------------------------- 1 | //! Unroll a loop a fixed number of times. This is a macro that performs manual loop unrolling, 2 | //! because LLVM is sometimes too conservative to do it itself. 3 | //! We only use it in hyper-optimized code paths like ``rank``. 4 | 5 | macro_rules! unroll { 6 | (1, |$i:ident = {$e:expr}| $s:stmt, $inc:expr) => { let mut $i: usize = $e; $s }; 7 | (2, |$i:ident = {$e:expr}| $s:stmt, $inc:expr) => { unroll!(1, |$i = {$e}| $s, $inc); $inc; $s }; 8 | (3, |$i:ident = {$e:expr}| $s:stmt, $inc:expr) => { unroll!(2, |$i = {$e}| $s, $inc); $inc; $s }; 9 | (4, |$i:ident = {$e:expr}| $s:stmt, $inc:expr) => { unroll!(3, |$i = {$e}| $s, $inc); $inc; $s }; 10 | (5, |$i:ident = {$e:expr}| $s:stmt, $inc:expr) => { unroll!(4, |$i = {$e}| $s, $inc); $inc; $s }; 11 | (6, |$i:ident = {$e:expr}| $s:stmt, $inc:expr) => { unroll!(5, |$i = {$e}| $s, $inc); $inc; $s }; 12 | (7, |$i:ident = {$e:expr}| $s:stmt, $inc:expr) => { unroll!(6, |$i = {$e}| $s, $inc); $inc; $s }; 13 | (8, |$i:ident = {$e:expr}| $s:stmt, $inc:expr) => { unroll!(7, |$i = {$e}| $s, $inc); $inc; $s }; 14 | } 15 | 16 | // export the macro to the crate 17 | pub(crate) use unroll; 18 | --------------------------------------------------------------------------------