├── .gitignore
├── Cargo.toml
├── LICENSE
├── README.md
├── benches
    └── rank_select.rs
├── build.rs
└── src
    ├── constants.rs
    ├── enum_code.rs
    ├── lib.rs
    ├── rank_acceleration.rs
    ├── rsdict_fuzz.rs
    └── test_helpers.rs


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Generated by Cargo
 2 | # will have compiled files and executables
 3 | /target/
 4 | 
 5 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
 6 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
 7 | Cargo.lock
 8 | 
 9 | # These are backup files generated by rustfmt
10 | **/*.rs.bk
11 | 
12 | /hfuzz_target/
13 | /hfuzz_workspace/
14 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rsdict"
 3 | version = "0.0.8"
 4 | authors = ["Sujay Jayakar <sujayakar314@gmail.com>"]
 5 | description = "Fast static rank and select data structure"
 6 | repository = "https://github.com/sujayakar/rsdict"
 7 | homepage = "https://github.com/sujayakar/rsdict"
 8 | readme = "README.md"
 9 | license = "MIT/Apache-2.0"
10 | keywords = ["rank", "select", "succinct"]
11 | categories = ["data-structures"]
12 | edition = "2018"
13 | 
14 | [profile.release]
15 | overflow-checks = true
16 | debug-assertions = true
17 | debug = true
18 | 
19 | [features]
20 | default = []
21 | fuzz = ["afl"]
22 | simd = []
23 | 
24 | [dependencies.afl]
25 | version = "0.13.3"
26 | optional = true
27 | 
28 | [dev-dependencies]
29 | quickcheck = "1.0.3"
30 | quickcheck_macros = "1.0.0"
31 | criterion = "0.5"
32 | rand = "0.8"
33 | succinct = "0.5.2"
34 | 
35 | [[bench]]
36 | name = "rank_select"
37 | harness = false
38 | 
39 | [[test]]
40 | name = "rsdict_fuzz"
41 | path = "src/rsdict_fuzz.rs"
42 | required-features = ["fuzz"]
43 | harness = false
44 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Sujay Jayakar
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RsDict: Fast rank/select over bitmaps
 2 | Rank and select are two useful operations on bitmaps for building more sophisticated data
 3 | structures.  First, the *rank* at a given index `i` counts the number of set bits left of `i`. For
 4 | example, a sparse array can be represented as a dense array of the values present and a bitmap
 5 | indicating which indices are present. Then, rank provides a function from an index into the sparse
 6 | array to an index into the dense one.
 7 | 
 8 | *Select* is the inverse of rank, where `select(B, k)` returns the index of the `k`th set bit. To make
 9 | the two inverses, we use zero-indexing for select (so `select(B, 0)` returns the index of the first
10 | bit set in `B`) and rank only counts indices strictly to the left of `i`. From our previous example,
11 | `select` allows going from an index in the dense array to the original sparse array.
12 | 
13 | This data structure implements these two operations efficiently on top of an append-only bitmap. It's
14 | an implementation of [Navarro and Providel, "Fast, Small, Simple Rank/Select On
15 | Bitmaps"](https://users.dcc.uchile.cl/~gnavarro/ps/sea12.1.pdf), with heavy inspiration from a [Go
16 | implementation](https://github.com/hillbig/rsdic). The underlying bitmap is stored in compressed
17 | form, so long runs of zeros and ones do not take up much space. The indices for rank and select add
18 | about 28% overhead over the uncompressed bitmap.
19 | 
20 | For more examples on how to use rank and select to build succinct datastructures, see Navarro's book
21 | on [Compact Data
22 | Structures](https://www.cambridge.org/core/books/compact-data-structures/68A5983E6F1176181291E235D0B7EB44)
23 | for an overview.
24 | 
25 | ## Implementation notes
26 | This library is mostly a port of the Go implementation with a few additional optimizations.
27 | 
28 | ### SSE acceleration for rank
29 | With the nightly-only `simd` feature and a CPU with SSSE3 support, the final step of rank is computed
30 | in a few steps without any loops. Turning this feature on improves the `rsdict::rank` benchmark by
31 | about 40% on my computer. See `rank_acceleration.rs` for more details.
32 | 
33 | ### Optimized routines for rank and select within a `u64`
34 | With a CPU that supports `popcnt`, computing rank within a small block of 64 bits will use this
35 | instruction to efficiently count the number of bits set.  Select uses an adapted version of an [an
36 | algorithm from Daniel Lemire's
37 | blog](https://lemire.me/blog/2018/02/21/iterating-over-set-bits-quickly/) that uses `tzcnt` to
38 | quickly skip over runs of trailing zeros.
39 | 
40 | ### Compact binomial coefficient lookup table
41 | Encoding and decoding blocks of the compressed bitmap requires computing the binomial coefficient
42 | `B(n, k)` where `0 <= k <= n <= 64`. Computing this on-the-fly is too expensive, so we store a
43 | precomputed lookup table of the coefficients. However, we exploit the symmetry of `B` in `k` to
44 | store only half the values. See `build.rs` for more details.
45 | 
46 | ## Performance
47 | Here's some results from running the benchmark on my 2018 MacBook Pro with `-C target-cpu=native`.
48 | ```
49 | rsdict::rank            time:   [10.330 us 10.488 us 10.678 us]
50 | Found 4 outliers among 100 measurements (4.00%)
51 |   4 (4.00%) high mild
52 | 
53 | jacobson::rank          time:   [17.958 us 18.335 us 18.740 us]
54 | Found 6 outliers among 100 measurements (6.00%)
55 |   1 (1.00%) high mild
56 |   5 (5.00%) high severe
57 | 
58 | rank9::rank             time:   [6.8907 us 7.0768 us 7.2940 us]
59 | Found 1 outliers among 100 measurements (1.00%)
60 |   1 (1.00%) high severe
61 | 
62 | rsdict::select0         time:   [37.124 us 37.505 us 37.991 us]
63 | Found 3 outliers among 100 measurements (3.00%)
64 |   3 (3.00%) high severe
65 | 
66 | rsdict::select1         time:   [29.782 us 29.918 us 30.067 us]
67 | Found 7 outliers among 100 measurements (7.00%)
68 |   5 (5.00%) high mild
69 |   2 (2.00%) high severe
70 | 
71 | rank9::binsearch::select0
72 |                         time:   [229.64 us 231.54 us 233.87 us]
73 | Found 5 outliers among 100 measurements (5.00%)
74 |   2 (2.00%) high mild
75 |   3 (3.00%) high severe
76 | 
77 | rank9::binsearch::select1
78 |                         time:   [253.69 us 255.84 us 258.19 us]
79 | Found 9 outliers among 100 measurements (9.00%)
80 |   4 (4.00%) high mild
81 |   5 (5.00%) high severe
82 | ```
83 | So for rank queries, this implementation is faster than `succinct-rs`'s Jacobson and slightly slower
84 | than its Rank9.  However for select queries, it's *much* faster than doing binary search over these
85 | rank structures, so consider using this library if select is an important operation for your algorithm.
86 | 
87 | ## Testing
88 | We use QuickCheck for testing data structure invariants.  In addition, there's basic AFL fuzz integration
89 | to find interesting test cases using program coverage.  Install [cargo-afl](https://github.com/rust-fuzz/afl.rs)
90 | and run the `rsdict_fuzz` binary with the `fuzz` feature set.
91 | ```
92 | $ cargo install afl
93 | $ cargo afl build --release --test rsdict_fuzz --features fuzz
94 | 
95 | # Create some starting bitsets within target/fuzz/in and create an empty directory target/fuzz/out.
96 | $ cargo afl fuzz -i target/fuzz/in -o target/fuzz/out target/release/rsdict_fuzz
97 | ```
98 | 


--------------------------------------------------------------------------------
/benches/rank_select.rs:
--------------------------------------------------------------------------------
  1 | use criterion::{black_box, criterion_group, criterion_main, Criterion};
  2 | use rand::rngs::StdRng;
  3 | use rand::{Rng, SeedableRng};
  4 | use rsdict::RsDict;
  5 | use succinct::bit_vec::{BitVecPush, BitVector};
  6 | use succinct::rank::{JacobsonRank, Rank9, RankSupport};
  7 | use succinct::select::{BinSearchSelect, Select0Support, Select1Support};
  8 | 
  9 | const NUM_BITS: usize = 1_000_000;
 10 | const SEED: u64 = 88004802264174740;
 11 | 
 12 | fn random_bits(len: usize) -> BitVector<u64> {
 13 |     let mut rng = StdRng::seed_from_u64(SEED);
 14 |     let mut bv = BitVector::with_capacity(len as u64);
 15 |     for _ in 0..len {
 16 |         bv.push_bit(rng.gen());
 17 |     }
 18 |     bv
 19 | }
 20 | 
 21 | fn random_indices(count: usize, range: usize) -> Vec<usize> {
 22 |     let mut rng = StdRng::seed_from_u64(SEED);
 23 |     (0..count).map(|_| rng.gen_range(0..range)).collect()
 24 | }
 25 | 
 26 | fn bench_one_rank<T, F, G>(c: &mut Criterion, name: &str, create: F, rank: G)
 27 | where
 28 |     F: FnOnce(BitVector<u64>) -> T,
 29 |     G: Fn(&T, u64) -> u64,
 30 | {
 31 |     let r = create(random_bits(NUM_BITS));
 32 |     let indices = random_indices(1000, NUM_BITS);
 33 |     c.bench_function(name, |b| {
 34 |         b.iter(|| {
 35 |             for &ix in &indices {
 36 |                 rank(&r, black_box(ix as u64));
 37 |             }
 38 |         })
 39 |     });
 40 | }
 41 | 
 42 | fn bench_rank(c: &mut Criterion) {
 43 |     bench_one_rank(
 44 |         c,
 45 |         "rsdict::rank",
 46 |         |bits| {
 47 |             let mut rs_dict = RsDict::with_capacity(NUM_BITS);
 48 |             for b in bits.iter() {
 49 |                 rs_dict.push(b);
 50 |             }
 51 |             rs_dict
 52 |         },
 53 |         |r, i| r.rank(i, true),
 54 |     );
 55 |     bench_one_rank(c, "jacobson::rank", JacobsonRank::new, |r, i| {
 56 |         r.rank(i, true)
 57 |     });
 58 |     bench_one_rank(c, "rank9::rank", Rank9::new, |r, i| r.rank(i, true));
 59 | }
 60 | 
 61 | fn bench_one_select<T, F, G, H>(c: &mut Criterion, name: &str, create: F, select0: G, select1: H)
 62 | where
 63 |     F: Fn(BitVector<u64>) -> T,
 64 |     G: Fn(&T, u64) -> Option<u64>,
 65 |     H: Fn(&T, u64) -> Option<u64>,
 66 | {
 67 |     let bits = random_bits(NUM_BITS);
 68 |     let num_set = bits.iter().filter(|&b| b).count();
 69 |     let r = create(bits);
 70 |     let indices = random_indices(1000, num_set);
 71 | 
 72 |     c.bench_function(&format!("{}::select0", name), |b| {
 73 |         b.iter(|| {
 74 |             for &ix in &indices {
 75 |                 select0(&r, black_box(ix as u64));
 76 |             }
 77 |         })
 78 |     });
 79 |     c.bench_function(&format!("{}::select1", name), |b| {
 80 |         b.iter(|| {
 81 |             for &ix in &indices {
 82 |                 select1(&r, black_box(ix as u64));
 83 |             }
 84 |         })
 85 |     });
 86 | }
 87 | 
 88 | fn bench_select(c: &mut Criterion) {
 89 |     bench_one_select(
 90 |         c,
 91 |         "rsdict",
 92 |         |bits| {
 93 |             let mut rs_dict = RsDict::with_capacity(NUM_BITS);
 94 |             for b in bits.iter() {
 95 |                 rs_dict.push(b);
 96 |             }
 97 |             rs_dict
 98 |         },
 99 |         |r, i| r.select0(i),
100 |         |r, i| r.select1(i),
101 |     );
102 |     bench_one_select(
103 |         c,
104 |         "rank9::binsearch",
105 |         |b| BinSearchSelect::new(Rank9::new(b)),
106 |         |r, i| r.select0(i),
107 |         |r, i| r.select1(i),
108 |     );
109 | }
110 | 
111 | criterion_group!(benches, bench_rank, bench_select);
112 | criterion_main!(benches);
113 | 


--------------------------------------------------------------------------------
/build.rs:
--------------------------------------------------------------------------------
  1 | use std::cmp;
  2 | use std::env;
  3 | use std::fs;
  4 | use std::path::Path;
  5 | 
  6 | // We want to precompute a table for binomial coefficients ahead of
  7 | // time, since computing them on the fly is expensive.  First, we
  8 | // can build the table using the recurrence:
  9 | //
 10 | // B(n, n) = 1
 11 | // B(n, k) = B(n - 1, k - 1) + B(n - 1, k)
 12 | //
 13 | // Here's the first few rows, where n is the row number (starting at zero), and
 14 | // k is the column number (also starting at zero).
 15 | //
 16 | // 1
 17 | // 1 1
 18 | // 1 2 1
 19 | // 1 3 3 1
 20 | // 1 4 6 4 1
 21 | // ...
 22 | //
 23 | // We can concatenate the rows into a flat array.  Then, computing B(n, k)
 24 | // involves finding the start of the nth row, and then looking up the kth
 25 | // element in that row. The ith row has length i + 1, so then the nth row starts
 26 | // at 1 + 2 + ... + n, which is n * (n + 1) / 2.
 27 | //
 28 | // However, note that each row in the table above is symmetric:  B(n, k) =
 29 | // B(n, n - k).  So, we can cut our space usage in half by only storing the
 30 | // first half of each row.
 31 | //
 32 | // 1
 33 | // 1
 34 | // 1 2
 35 | // 1 3
 36 | // 1 4 6
 37 | // ...
 38 | //
 39 | // We need to be able to compute the length of a row and also efficiently compute
 40 | // the beginning of each row in our array, the sum of the previous rows' lengths.
 41 | // Previously, row i had length i + 1, and now it has length ceil((i + 1) / 2).
 42 | // We can then use the identity `ceil((n + 1) / m) = floor(n / m) + 1` to
 43 | // simplify this to `i // 2 + 1`, where `//` is integer division.
 44 | //
 45 | // Then, the start of row `n` is `\sum_{i=0}^{n-1} {i // 2 + 1}`, which we'd
 46 | // like to reduce to something closed-form.  Here's the first few values:
 47 | //
 48 | // n:         0 1 2 3 4 5 ...
 49 | // row_len:   1 1 2 2 3 3 ...
 50 | // row_start: 0 1 2 4 6 9 ...
 51 | //
 52 | // Let's assume `n` is even.  Then, note that summing the `row_len`s to the left
 53 | // is just `2 * (1 + 2 + ... + n/2)`.  Then, we have
 54 | //
 55 | // row_start(2m) = 2 * (1 + 2 + ... + m)
 56 | //               = 2 * m * (m + 1) / 2
 57 | //               = m * (m + 1)
 58 | //
 59 | // Then, if `n` is odd, we need to add `row_len(n - 1)`
 60 | //
 61 | // row_start(2m + 1) = m * (m + 1) + row_len(2m)
 62 | //                   = m * (m + 1) + (m + 1)
 63 | //                   = (m + 1) * (m + 1)
 64 | //
 65 | // Now, we can combine the two cases:
 66 | //
 67 | // row_start(n) = (n / 2 + n % 2) * (n / 2 + 1)
 68 | //
 69 | fn row_start(n: usize) -> usize {
 70 |     let (q, r) = (n / 2, n % 2);
 71 |     (q + r) * (q + 1)
 72 | }
 73 | 
 74 | fn row_len(n: usize) -> usize {
 75 |     n / 2 + 1
 76 | }
 77 | 
 78 | fn lookup(row: &[u64], n: usize, k: usize) -> u64 {
 79 |     row[cmp::min(k, n - k)]
 80 | }
 81 | 
 82 | fn main() {
 83 |     let out_dir = env::var_os("OUT_DIR").expect("Failed to get output directory");
 84 |     let dst_path = Path::new(&out_dir).join("binomial.rs");
 85 | 
 86 |     let mut table = vec![];
 87 | 
 88 |     // Base case for n = 0;
 89 |     table.push(1u64);
 90 | 
 91 |     for n in 1..65usize {
 92 |         // Base case for k = 0
 93 |         table.push(1);
 94 |         for k in 1..row_len(n) {
 95 |             let prev_start = row_start(n - 1);
 96 |             let prev_row = &table[prev_start..(prev_start + row_len(n - 1))];
 97 |             let val = lookup(prev_row, n - 1, k - 1) + lookup(prev_row, n - 1, k);
 98 |             table.push(val);
 99 |         }
100 |     }
101 | 
102 |     let code = format!("pub const COEFFICIENT_TABLE: &[u64; {}] = &{:?};", table.len(), table);
103 | 
104 |     fs::write(&dst_path, code).expect("Failed to write binomial coefficient table");
105 |     println!("cargo:rerun-if-changed=build.rs");
106 | }
107 | 


--------------------------------------------------------------------------------
/src/constants.rs:
--------------------------------------------------------------------------------
1 | pub const SMALL_BLOCK_SIZE: u64 = 64;
2 | pub const LARGE_BLOCK_SIZE: u64 = 1024;
3 | pub const SELECT_BLOCK_SIZE: u64 = 4096;
4 | pub const SMALL_BLOCK_PER_LARGE_BLOCK: u64 = LARGE_BLOCK_SIZE / SMALL_BLOCK_SIZE;
5 | 


--------------------------------------------------------------------------------
/src/enum_code.rs:
--------------------------------------------------------------------------------
  1 | use super::constants::SMALL_BLOCK_SIZE;
  2 | use std::cmp;
  3 | 
  4 | mod binomial {
  5 |     // Load in the generated binomial coefficient table generated by `build.rs`.
  6 |     include!(concat!(env!("OUT_DIR"), "/binomial.rs"));
  7 | }
  8 | 
  9 | fn binomial_coefficient(n: u8, k: u8) -> u64 {
 10 |     use self::binomial::COEFFICIENT_TABLE;
 11 |     debug_assert!(n <= 64 && k <= 64);
 12 |     debug_assert!(k <= n);
 13 | 
 14 |     // See `build.rs` for an explanation of how the table is indexed.
 15 |     let (q, r) = (n as usize / 2, n as usize % 2);
 16 |     let row_start = (q + r) * (q + 1);
 17 |     let k = cmp::min(k, n - k) as usize;
 18 |     COEFFICIENT_TABLE[row_start + k]
 19 | }
 20 | 
 21 | // Let's say we're coding a bitstring of `n` bits, where `k` of them are set. There are `n` choose
 22 | // `k` different bitstrings of this form, so we'd like to map our bitstring to an integer in the
 23 | // range
 24 | //
 25 | //    [0, binomial_coefficient(n, k)).
 26 | //
 27 | // If `n == k == 0`, then `binomial_coefficient(0, 0) = 1`, so we must return zero as our coded
 28 | // value. Let's assume that `n > 0`, so the bitstring in nonempty with a first "head" bits and
 29 | // the remaining `n - 1` tail bits. Compute our code inductively on the tail bits. We'd like to
 30 | // transform this coded value in some way to be able to also recover the head bit when decoding.
 31 | //
 32 | // If `n == k` or `k == 0`, the bitstring must be all ones or zeros, respectively. So, we know
 33 | // upfront what the head bit must be, so we don't need to emit any new information, and we can
 34 | // return the tail's coded value.
 35 | //
 36 | // If `0 < k < n`, the head bit may either be zero or one. If it's zero, the range of the coded
 37 | // tail is `[0, binomial_coefficient(n - 1, k))`, since there are `k` bits in the tail. If the
 38 | // head bit is set, the tail's range is `[0, binomial_coefficient(n - 1, k - 1))`. We can code
 39 | // our head bit by gluing these two ranges together, creating a final value in
 40 | // `[0, binomial_coefficient(n - 1, k) + binomial_coefficient(n - 1, k - 1))`.
 41 | //
 42 | //
 43 | //     [ binomial_coefficient(n - 1, k) ][ binomial_coefficient(n - 1, k - 1) ]
 44 | //              head bit is zero         ^         head bit is one
 45 | //                                 zero_case_count
 46 | //
 47 | // Then, `zero_case_count` indicates the start of the region where the head bit is one or,
 48 | // alternatively, the number of different tail bitstrings where the head bit is zero. If `n == k`,
 49 | // `zero_case_count` must be zero since it's impossible for the first bit to be zero.
 50 | //
 51 | fn zero_case_count(n: u8, k: u8) -> u64 {
 52 |     if n == k {
 53 |         0
 54 |     } else {
 55 |         binomial_coefficient(n - 1, k)
 56 |     }
 57 | }
 58 | 
 59 | pub fn encode(value: u64, class: u8) -> (u8, u64) {
 60 |     debug_assert_eq!(value.count_ones() as u8, class);
 61 |     let code_len = ENUM_CODE_LENGTH[class as usize];
 62 | 
 63 |     // Fast path: return the integer unchanged if we're using all of our bits.
 64 |     if code_len == SMALL_BLOCK_SIZE as u8 {
 65 |         return (code_len, value);
 66 |     }
 67 | 
 68 |     let mut code = 0u64;
 69 |     let mut k = class;
 70 |     for i in 0..(SMALL_BLOCK_SIZE as u8) {
 71 |         let n = SMALL_BLOCK_SIZE as u8 - i;
 72 |         if (value >> i) & 1 != 0 {
 73 |             code += zero_case_count(n, k);
 74 |             k -= 1;
 75 |         }
 76 |     }
 77 |     (code_len, code)
 78 | }
 79 | 
 80 | #[cfg(test)]
 81 | pub fn decode(mut code: u64, class: u8) -> u64 {
 82 |     if ENUM_CODE_LENGTH[class as usize] == SMALL_BLOCK_SIZE as u8 {
 83 |         return code;
 84 |     }
 85 |     let mut value = 0u64;
 86 |     let mut k = class;
 87 |     for i in 0..(SMALL_BLOCK_SIZE as u8) {
 88 |         let n = SMALL_BLOCK_SIZE as u8 - i;
 89 |         let z = zero_case_count(n, k);
 90 |         if code >= z {
 91 |             value |= 1 << i;
 92 |             code -= z;
 93 |             k -= 1;
 94 |         }
 95 |     }
 96 |     value
 97 | }
 98 | 
 99 | pub fn decode_bit(mut code: u64, class: u8, pos: u64) -> bool {
100 |     if ENUM_CODE_LENGTH[class as usize] == SMALL_BLOCK_SIZE as u8 {
101 |         return (code >> pos) & 1 != 0;
102 |     }
103 |     let mut k = class;
104 |     for i in 0..(pos as u8) {
105 |         let n = SMALL_BLOCK_SIZE as u8 - i;
106 |         let z = zero_case_count(n, k);
107 |         if code >= z {
108 |             code -= z;
109 |             k -= 1;
110 |         }
111 |     }
112 | 
113 |     let n = SMALL_BLOCK_SIZE - pos;
114 |     code >= zero_case_count(n as u8, k)
115 | }
116 | 
117 | #[inline(always)]
118 | fn rank_impl(mut code: u64, class: u8, pos: u64) -> u64 {
119 |     if ENUM_CODE_LENGTH[class as usize] == SMALL_BLOCK_SIZE as u8 {
120 |         return (code & ((1 << pos) - 1)).count_ones() as u64;
121 |     }
122 |     let mut cur_rank = class;
123 |     for i in 0..pos {
124 |         let n = SMALL_BLOCK_SIZE - i;
125 |         let z = zero_case_count(n as u8, cur_rank);
126 |         if code >= z {
127 |             code -= z;
128 |             cur_rank -= 1;
129 |         }
130 |     }
131 |     (class - cur_rank) as u64
132 | }
133 | 
134 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
135 | #[target_feature(enable = "popcnt")]
136 | unsafe fn rank_with_popcount(code: u64, class: u8, pos: u64) -> u64 {
137 |     rank_impl(code, class, pos)
138 | }
139 | 
140 | pub fn rank(code: u64, class: u8, pos: u64) -> u64 {
141 |     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
142 |     {
143 |         if is_x86_feature_detected!("popcnt") {
144 |             return unsafe { rank_with_popcount(code, class, pos) };
145 |         }
146 |     }
147 |     rank_impl(code, class, pos)
148 | }
149 | 
150 | // Adapted from https://lemire.me/blog/2018/02/21/iterating-over-set-bits-quickly/
151 | pub fn select1_raw(mut code: u64, mut rank: u64) -> u64 {
152 |     debug_assert!(rank < code.count_ones() as u64);
153 |     for _ in 0..64 {
154 |         let t = code & code.wrapping_neg();
155 |         if rank == 0 {
156 |             return code.trailing_zeros() as u64;
157 |         }
158 |         rank -= 1;
159 |         code ^= t;
160 |     }
161 |     debug_assert!(false, "select1_raw past end of codeword");
162 |     0
163 | }
164 | 
165 | pub fn select1(mut code: u64, class: u8, mut rank: u64) -> u64 {
166 |     if ENUM_CODE_LENGTH[class as usize] == SMALL_BLOCK_SIZE as u8 {
167 |         return select1_raw(code, rank);
168 |     }
169 |     let mut k = class;
170 |     for i in 0..SMALL_BLOCK_SIZE {
171 |         let n = SMALL_BLOCK_SIZE - i;
172 |         let z = zero_case_count(n as u8, k as u8);
173 |         if code >= z {
174 |             if rank == 0 {
175 |                 return i;
176 |             }
177 |             rank -= 1;
178 |             code -= z;
179 |             k -= 1;
180 |         }
181 |     }
182 |     debug_assert!(false, "select1 past end of codeword");
183 |     0
184 | }
185 | 
186 | pub fn select0(mut code: u64, class: u8, mut rank: u64) -> u64 {
187 |     if ENUM_CODE_LENGTH[class as usize] == SMALL_BLOCK_SIZE as u8 {
188 |         return select1_raw(!code, rank);
189 |     }
190 |     let mut k = class as usize;
191 |     for i in 0..SMALL_BLOCK_SIZE {
192 |         let n = SMALL_BLOCK_SIZE - i;
193 |         let z = zero_case_count(n as u8, k as u8);
194 |         if code >= z {
195 |             code -= z;
196 |             k -= 1;
197 |         } else {
198 |             if rank == 0 {
199 |                 return i;
200 |             }
201 |             rank -= 1;
202 |         }
203 |     }
204 |     debug_assert!(false, "select0 past end of codeword");
205 |     0
206 | }
207 | 
208 | #[cfg(test)]
209 | mod tests {
210 |     use super::{binomial_coefficient, decode, decode_bit, encode, rank, select0, select1};
211 |     use crate::test_helpers::hash_u64;
212 |     use std::collections::HashMap;
213 |     use succinct::broadword;
214 | 
215 |     fn check_value(value: u64) -> bool {
216 |         let class = value.count_ones() as u8;
217 |         let (_, code) = encode(value, class);
218 | 
219 |         let decoded = decode(code, class) == value;
220 |         let decode_bit = (0..64).all(|i| {
221 |             let computed = decode_bit(code, class, i);
222 |             let expected = (value >> i) & 1 != 0;
223 |             computed == expected
224 |         });
225 |         let rank = (0..64).all(|i| {
226 |             let computed = rank(code, class, i);
227 |             let expected = (value & ((1 << i) - 1)).count_ones() as u64;
228 |             computed == expected
229 |         });
230 |         let select0 = (0..(64 - class) as u64).all(|i| {
231 |             let computed = select0(code, class, i) as usize;
232 |             let expected = broadword::select1_raw(i as usize, !value);
233 |             computed == expected
234 |         });
235 |         let select1 = (0..class as u64).all(|i| {
236 |             let computed = select1(code, class, i) as usize;
237 |             let expected = broadword::select1_raw(i as usize, value);
238 |             computed == expected
239 |         });
240 | 
241 |         decoded && decode_bit && rank && select0 && select1
242 |     }
243 | 
244 |     #[test]
245 |     fn test_enum_code() {
246 |         for i in 0..64 {
247 |             assert!(check_value(std::u64::MAX << i));
248 |         }
249 |         assert!(check_value(0));
250 |     }
251 | 
252 |     #[quickcheck]
253 |     fn qc_enum_code(value: u64) -> bool {
254 |         check_value(value)
255 |     }
256 | 
257 |     #[quickcheck]
258 |     fn qc_enum_code_hashed(value: u64) -> bool {
259 |         check_value(hash_u64(value))
260 |     }
261 | 
262 |     #[test]
263 |     fn test_binomial_coefficient_table() {
264 |         fn lookup(table: &mut HashMap<(u8, u8), u64>, n: u8, k: u8) -> u64 {
265 |             if k == 0 || k == n {
266 |                 return 1;
267 |             }
268 |             if let Some(&v) = table.get(&(n, k)) {
269 |                 return v;
270 |             }
271 |             let v = lookup(table, n - 1, k - 1) + lookup(table, n - 1, k);
272 |             table.insert((n, k), v);
273 |             v
274 |         }
275 |         let mut table = HashMap::new();
276 |         for n in 0..=64 {
277 |             for k in 0..=n {
278 |                 assert_eq!(binomial_coefficient(n, k), lookup(&mut table, n, k));
279 |             }
280 |         }
281 |     }
282 | }
283 | 
284 | // Precomputed number of bits it takes to represent a block of 64 bits where k
285 | // of them are set: ceil(log(binomial_coefficient(64, k))) for k in [0, 64].
286 | // However, note that once the code length is sufficiently long (> 46), we just
287 | // set it to 64 to hit the fast paths above.  It's not worth using a variable
288 | // length code to save a few bits when it makes computing `rank` and `select`
289 | // much more expensive.
290 | pub const ENUM_CODE_LENGTH: &[u8; 65] = &[
291 |     0, 6, 11, 16, 20, 23, 27, 30, 33, 35, 38, 40, 42, 44, 46, 64, 64, 64, 64, 64, 64, 64, 64, 64,
292 |     64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
293 |     64, 64, 46, 44, 42, 40, 38, 35, 33, 30, 27, 23, 20, 16, 11, 6, 0,
294 | ];
295 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! 'RsDict' data structure that supports both rank and select over a bitmap.
  2 | //!
  3 | //! This crate is an implementation of [Navarro and Providel, "Fast, Small,
  4 | //! Simple Rank/Select On
  5 | //! Bitmaps"](https://users.dcc.uchile.cl/~gnavarro/ps/sea12.1.pdf), with heavy
  6 | //! inspiration from a [Go implementation](https://github.com/hillbig/rsdic).
  7 | //!
  8 | //! ```
  9 | //! use rsdict::RsDict;
 10 | //!
 11 | //! let mut r = RsDict::new();
 12 | //! r.push(false);
 13 | //! r.push(true);
 14 | //! r.push(true);
 15 | //! r.push(false);
 16 | //!
 17 | //! // There's one bit set to the left of index 2.
 18 | //! assert_eq!(r.rank(2, true), 1);
 19 | //!
 20 | //! // The index of the second (zero-indexed as 1) bit is 3.
 21 | //! assert_eq!(r.select(1, false), Some(3));
 22 | //! ```
 23 | //!
 24 | //! # Implementation notes
 25 | //! First, we store the bitmap in compressed form.  Each block of 64 bits is
 26 | //! stored with a variable length code, where the length is determined by the
 27 | //! number of bits set in the block (its "class").  Then, we store the classes
 28 | //! (i.e. the number of bits set per block) in a separate array, allowing us to
 29 | //! iterate forward from a pointer into the variable length buffer.
 30 | //!
 31 | //! To allow efficient indexing, we then break up the input into
 32 | //! `LARGE_BLOCK_SIZE` blocks and store a pointer into the variable length
 33 | //! buffer per block.  As with other rank structures, we also store a
 34 | //! precomputed rank from the beginning of the large block.
 35 | //!
 36 | //! Finally, we store precomputed indices for selection in separate arrays.  For
 37 | //! every `SELECT_BLOCK_SIZE`th bit, we maintain a pointer to the large block
 38 | //! this bit falls in.  We also do the same for zeros.
 39 | //!
 40 | //! Then, we can compute ranks by consulting the large block rank and then
 41 | //! iterating over the small block classes before our desired position.  Once
 42 | //! we've found the boundary small block, we can then decode it and compute the
 43 | //! rank within the block.  The choice of variable length code allows computing
 44 | //! its internal rank without decoding the entire block.
 45 | //!
 46 | //! Select works similarly where we start with the large block indices, skip
 47 | //! over as many small blocks as possible, and then select within a small
 48 | //! block. As with rank, we're able to select within a small block directly.
 49 | 
 50 | #![cfg_attr(feature = "simd", feature(portable_simd))]
 51 | 
 52 | #[cfg(test)]
 53 | extern crate quickcheck;
 54 | #[cfg(test)]
 55 | #[macro_use(quickcheck)]
 56 | extern crate quickcheck_macros;
 57 | 
 58 | use std::cmp::Ordering;
 59 | use std::mem;
 60 | 
 61 | mod constants;
 62 | mod enum_code;
 63 | 
 64 | mod rank_acceleration;
 65 | 
 66 | #[cfg(test)]
 67 | mod test_helpers;
 68 | 
 69 | use self::constants::{
 70 |     LARGE_BLOCK_SIZE, SELECT_BLOCK_SIZE, SMALL_BLOCK_PER_LARGE_BLOCK, SMALL_BLOCK_SIZE,
 71 | };
 72 | use self::enum_code::ENUM_CODE_LENGTH;
 73 | 
 74 | /// Data structure for efficiently computing both rank and select queries
 75 | #[derive(Debug, Clone)]
 76 | pub struct RsDict {
 77 |     len: u64,
 78 |     num_ones: u64,
 79 |     num_zeros: u64,
 80 | 
 81 |     // Small block metadata (stored every SMALL_BLOCK_SIZE bits):
 82 |     // * number of set bits (the "class") for the small block
 83 |     // * index within a class for each small block; note that the indexes are
 84 |     //   variable length (see `ENUM_CODE_LENGTH`), so there isn't direct access
 85 |     //   for a particular small block.
 86 |     sb_classes: Vec<u8>,
 87 |     sb_indices: VarintBuffer,
 88 | 
 89 |     // Large block metadata (stored every LARGE_BLOCK_SIZE bits):
 90 |     // * pointer into variable-length `bits` for the block start
 91 |     // * cached rank at the block start
 92 |     large_blocks: Vec<LargeBlock>,
 93 | 
 94 |     // Select acceleration:
 95 |     // `select_{one,zero}_inds` store the (offset / LARGE_BLOCK_SIZE) of each
 96 |     // SELECT_BLOCK_SIZE'th bit.
 97 |     select_one_inds: Vec<u64>,
 98 |     select_zero_inds: Vec<u64>,
 99 | 
100 |     // Current in-progress small block we're appending to
101 |     last_block: LastBlock,
102 | }
103 | 
104 | impl RsDict {
105 |     /// Create a dictionary from a bitset, specified as an iterator of 64-bit blocks.  This function
106 |     /// is equivalent to pushing each bit one at a time but is much faster.
107 |     pub fn from_blocks(blocks: impl Iterator<Item = u64>) -> Self {
108 |         #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
109 |         {
110 |             if is_x86_feature_detected!("popcnt") {
111 |                 return unsafe { Self::from_blocks_popcount(blocks) };
112 |             }
113 |         }
114 |         Self::from_blocks_impl(blocks)
115 |     }
116 | 
117 |     /// Return the size of the heap allocations associated with the `RsDict`.
118 |     pub fn heap_size(&self) -> usize {
119 |         self.sb_classes.capacity() * mem::size_of::<u8>()
120 |             + self.sb_indices.heap_size()
121 |             + self.large_blocks.capacity() * mem::size_of::<LargeBlock>()
122 |             + self.select_one_inds.capacity() * mem::size_of::<u64>()
123 |             + self.select_zero_inds.capacity() * mem::size_of::<u64>()
124 |     }
125 | 
126 |     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
127 |     #[target_feature(enable = "popcnt")]
128 |     unsafe fn from_blocks_popcount(blocks: impl Iterator<Item = u64>) -> Self {
129 |         Self::from_blocks_impl(blocks)
130 |     }
131 | 
132 |     #[inline(always)]
133 |     fn from_blocks_impl(blocks: impl Iterator<Item = u64>) -> Self {
134 |         let (_, hint) = blocks.size_hint();
135 |         let hint = hint.unwrap_or(0);
136 | 
137 |         let mut large_blocks = Vec::with_capacity(hint / LARGE_BLOCK_SIZE as usize);
138 |         let mut select_one_inds = Vec::with_capacity(hint / SELECT_BLOCK_SIZE as usize);
139 |         let mut select_zero_inds = Vec::with_capacity(hint / SELECT_BLOCK_SIZE as usize);
140 |         let mut sb_classes = Vec::with_capacity(hint / SMALL_BLOCK_SIZE as usize);
141 |         let mut sb_indices = VarintBuffer::with_capacity(hint);
142 |         let mut last_block = LastBlock::new();
143 | 
144 |         let mut num_ones = 0;
145 |         let mut num_zeros = 0;
146 | 
147 |         let mut iter = blocks.enumerate().peekable();
148 | 
149 |         while let Some((i, block)) = iter.next() {
150 |             let sb_class = block.count_ones() as u8;
151 | 
152 |             if i as u64 % SMALL_BLOCK_PER_LARGE_BLOCK == 0 {
153 |                 let lblock = LargeBlock {
154 |                     rank: num_ones,
155 |                     pointer: sb_indices.len() as u64,
156 |                 };
157 |                 large_blocks.push(lblock);
158 |             }
159 | 
160 |             // If we're on the last block, write to `last_block` rather than
161 |             // pushing onto the `VarintBuffer`.
162 |             if iter.peek().is_none() {
163 |                 last_block.bits = block;
164 |                 last_block.num_ones = sb_class as u64;
165 |                 last_block.num_zeros = 64 - sb_class as u64;
166 |             } else {
167 |                 sb_classes.push(sb_class);
168 |                 let (code_len, code) = enum_code::encode(block, sb_class);
169 |                 sb_indices.push(code_len as usize, code);
170 |             }
171 | 
172 |             let lb_start = i as u64 * SMALL_BLOCK_SIZE / LARGE_BLOCK_SIZE;
173 | 
174 |             // We want to see if there's any j in [num_ones, num_ones + sb_class) such
175 |             // that j % SELECT_BLOCK_SIZE = 0.  We can do this arithmetically by
176 |             // comparing two divisors:
177 |             //
178 |             // 1. (num_ones - 1) / SELECT_BLOCK_SIZE and
179 |             // 2. (num_ones + sb_class - 1) / SELECT_BLOCK_SIZE.
180 |             //
181 |             // If they're not equal, there must be a multiple of SELECT_BLOCK_SIZE in
182 |             // the interval [num_ones, num_ones + sb_class).  To see why, consider
183 |             // the case where sb_class > 0 and SELECT_BLOCK_SIZE divides num_ones.
184 |             // Then, the first divisor's numerator is one less than a multiple, and
185 |             // the second one must be greater than or equal to it.  Similarly, if the
186 |             // last value num_ones + sb_class - 1 is a multiple, then the first divsior
187 |             // must be less than the second.  Then, since sb_class < SELECT_BLOCK_SIZE,
188 |             // the same argument holds for any divisor in the middle.
189 |             //
190 |             // Finally, since we're working with unsigned integers, add SELECT_BLOCK_SIZE
191 |             // to both numerators so we don't ever underflow when subtracting one.
192 |             let start = num_ones + SELECT_BLOCK_SIZE - 1;
193 |             let end = num_ones + SELECT_BLOCK_SIZE + sb_class as u64 - 1;
194 |             if start / SELECT_BLOCK_SIZE != end / SELECT_BLOCK_SIZE {
195 |                 select_one_inds.push(lb_start);
196 |             }
197 | 
198 |             // Now do the same for the zero indices.
199 |             let start = num_zeros + SELECT_BLOCK_SIZE - 1;
200 |             let end = num_zeros + SELECT_BLOCK_SIZE + (64 - sb_class as u64) - 1;
201 |             if start / SELECT_BLOCK_SIZE != end / SELECT_BLOCK_SIZE {
202 |                 select_zero_inds.push(lb_start);
203 |             }
204 | 
205 |             num_ones += sb_class as u64;
206 |             num_zeros += 64 - sb_class as u64;
207 |         }
208 | 
209 |         let num_sb = sb_classes.len();
210 |         let align = SMALL_BLOCK_PER_LARGE_BLOCK as usize;
211 |         sb_classes.reserve((num_sb + align - 1) / align * align);
212 | 
213 |         Self {
214 |             large_blocks,
215 |             select_one_inds,
216 |             select_zero_inds,
217 |             sb_classes,
218 |             sb_indices,
219 | 
220 |             len: num_ones + num_zeros,
221 |             num_ones,
222 |             num_zeros,
223 | 
224 |             last_block,
225 |         }
226 |     }
227 | 
228 |     /// Create a new `RsDict` with zero capacity.
229 |     pub fn new() -> Self {
230 |         Self::with_capacity(0)
231 |     }
232 | 
233 |     /// Create a new `RsDict` with the given capacity preallocated.
234 |     pub fn with_capacity(n: usize) -> Self {
235 |         Self {
236 |             large_blocks: Vec::with_capacity(n / LARGE_BLOCK_SIZE as usize),
237 |             select_one_inds: Vec::with_capacity(n / SELECT_BLOCK_SIZE as usize),
238 |             select_zero_inds: Vec::with_capacity(n / SELECT_BLOCK_SIZE as usize),
239 |             sb_classes: Vec::with_capacity(n / SMALL_BLOCK_SIZE as usize),
240 |             sb_indices: VarintBuffer::with_capacity(n),
241 | 
242 |             len: 0,
243 |             num_ones: 0,
244 |             num_zeros: 0,
245 | 
246 |             last_block: LastBlock::new(),
247 |         }
248 |     }
249 | 
250 |     /// Non-inclusive rank: Count the number of `bit` values left of `pos`. Panics if `pos` is
251 |     /// out-of-bounds.
252 |     pub fn rank(&self, pos: u64, bit: bool) -> u64 {
253 |         if pos >= self.len {
254 |             panic!("Out of bounds position: {} >= {}", pos, self.len);
255 |         }
256 |         // If we're in the last block, count the number of ones set after our
257 |         // bit in the last block and remove that from the global count.
258 |         if self.is_last_block(pos) {
259 |             let trailing_ones = self.last_block.count_suffix(pos % SMALL_BLOCK_SIZE);
260 |             return rank_by_bit(self.num_ones - trailing_ones, pos, bit);
261 |         }
262 | 
263 |         // Start with the rank from our position's large block.
264 |         let lblock = pos / LARGE_BLOCK_SIZE;
265 |         let LargeBlock {
266 |             mut pointer,
267 |             mut rank,
268 |         } = self.large_blocks[lblock as usize];
269 | 
270 |         // Add in the ranks (i.e. the classes) per small block up to our
271 |         // position's small block.
272 |         let sblock_start = (lblock * SMALL_BLOCK_PER_LARGE_BLOCK) as usize;
273 |         let sblock = (pos / SMALL_BLOCK_SIZE) as usize;
274 |         let (class_sum, length_sum) =
275 |             rank_acceleration::scan_block(&self.sb_classes, sblock_start, sblock);
276 |         rank += class_sum;
277 |         pointer += length_sum;
278 | 
279 |         // If we aren't on a small block boundary, add in the rank within the small block.
280 |         if pos % SMALL_BLOCK_SIZE != 0 {
281 |             let sb_class = self.sb_classes[sblock];
282 |             let code = self.read_sb_index(pointer, ENUM_CODE_LENGTH[sb_class as usize]);
283 |             rank += enum_code::rank(code, sb_class, pos % SMALL_BLOCK_SIZE);
284 |         }
285 | 
286 |         rank_by_bit(rank, pos, bit)
287 |     }
288 | 
289 |     /// Query the `pos`th bit (zero-indexed) of the underlying bit and the number of set bits to the
290 |     /// left of `pos` in a single operation.  This method is faster than calling `get_bit(pos)` and
291 |     /// `rank(pos, true)` separately.
292 |     pub fn bit_and_one_rank(&self, pos: u64) -> (bool, u64) {
293 |         if pos >= self.len {
294 |             panic!("Out of bounds position: {} >= {}", pos, self.len);
295 |         }
296 |         if self.is_last_block(pos) {
297 |             let sb_pos = pos % SMALL_BLOCK_SIZE;
298 |             let bit = self.last_block.get_bit(sb_pos);
299 |             let after_rank = self.last_block.count_suffix(sb_pos);
300 |             return (bit, self.num_ones - after_rank);
301 |         }
302 |         let lblock = pos / LARGE_BLOCK_SIZE;
303 |         let sblock = (pos / SMALL_BLOCK_SIZE) as usize;
304 |         let sblock_start = (lblock * SMALL_BLOCK_PER_LARGE_BLOCK) as usize;
305 |         let LargeBlock {
306 |             mut pointer,
307 |             mut rank,
308 |         } = self.large_blocks[lblock as usize];
309 |         for &sb_class in &self.sb_classes[sblock_start..sblock] {
310 |             pointer += ENUM_CODE_LENGTH[sb_class as usize] as u64;
311 |             rank += sb_class as u64;
312 |         }
313 |         let sb_class = self.sb_classes[sblock];
314 |         let code_length = ENUM_CODE_LENGTH[sb_class as usize];
315 |         let code = self.read_sb_index(pointer, code_length);
316 | 
317 |         rank += enum_code::rank(code, sb_class, pos % SMALL_BLOCK_SIZE);
318 |         let bit = enum_code::decode_bit(code, sb_class, pos % SMALL_BLOCK_SIZE);
319 |         (bit, rank)
320 |     }
321 | 
322 |     /// Inclusive rank: Count the number of `bit` values at indices less than or equal to
323 |     /// `pos`. Panics if `pos` is out-of-bounds.
324 |     pub fn inclusive_rank(&self, pos: u64, bit: bool) -> u64 {
325 |         let (pos_bit, one_rank) = self.bit_and_one_rank(pos);
326 |         rank_by_bit(one_rank, pos, bit) + if pos_bit == bit { 1 } else { 0 }
327 |     }
328 | 
329 |     /// Compute the position of the `rank`th instance of `bit` (zero-indexed), returning `None` if
330 |     /// there are not `rank + 1` instances of `bit` in the array.
331 |     pub fn select(&self, rank: u64, bit: bool) -> Option<u64> {
332 |         if bit {
333 |             self.select1(rank)
334 |         } else {
335 |             self.select0(rank)
336 |         }
337 |     }
338 | 
339 |     /// Specialized version of [`RsDict::select`] for finding positions of zeros.
340 |     pub fn select0(&self, rank: u64) -> Option<u64> {
341 |         if rank >= self.num_zeros {
342 |             return None;
343 |         }
344 |         // How many zeros are there *excluding* the last block?
345 |         let prefix_num_zeros = self.num_zeros - self.last_block.num_zeros;
346 | 
347 |         // Our rank must be in the last block.
348 |         if rank >= prefix_num_zeros {
349 |             let lb_rank = (rank - prefix_num_zeros) as u8;
350 |             return Some(self.last_block_ind() + self.last_block.select0(lb_rank));
351 |         }
352 | 
353 |         // First, use the select pointer to jump forward to a large block and
354 |         // then walk forward over the large blocks until we pass our rank.
355 |         let select_ind = (rank / SELECT_BLOCK_SIZE) as usize;
356 |         let lb_start = self.select_zero_inds[select_ind] as usize;
357 |         let mut lblock = None;
358 |         for (i, large_block) in self.large_blocks[lb_start..].iter().enumerate() {
359 |             let lb_ix = (lb_start + i) as u64;
360 |             let lb_rank = lb_ix * LARGE_BLOCK_SIZE - large_block.rank;
361 |             if rank < lb_rank {
362 |                 lblock = Some(lb_ix - 1);
363 |                 break;
364 |             }
365 |         }
366 |         let lblock = lblock.unwrap_or(self.large_blocks.len() as u64 - 1);
367 |         let large_block = &self.large_blocks[lblock as usize];
368 | 
369 |         // Next, iterate over the small blocks, using their cached class to
370 |         // subtract out our rank.
371 |         let sb_start = (lblock * SMALL_BLOCK_PER_LARGE_BLOCK) as usize;
372 |         let mut pointer = large_block.pointer;
373 |         let mut remaining = rank - (lblock * LARGE_BLOCK_SIZE - large_block.rank);
374 |         for (i, &sb_class) in self.sb_classes[sb_start..].iter().enumerate() {
375 |             let sb_zeros = (SMALL_BLOCK_SIZE as u8 - sb_class) as u64;
376 |             let code_length = ENUM_CODE_LENGTH[sb_class as usize];
377 | 
378 |             // Our desired rank is within this block.
379 |             if remaining < sb_zeros {
380 |                 let code = self.read_sb_index(pointer, code_length);
381 |                 let sb_rank = (sb_start + i) as u64 * SMALL_BLOCK_SIZE;
382 |                 let block_rank = enum_code::select0(code, sb_class, remaining);
383 |                 return Some(sb_rank + block_rank);
384 |             }
385 | 
386 |             // Otherwise, subtract out this block and continue.
387 |             remaining -= sb_zeros;
388 |             pointer += code_length as u64;
389 |         }
390 |         panic!("Ran out of small blocks when iterating over rank");
391 |     }
392 | 
393 |     /// Specialized version of [`RsDict::select`] for finding positions of ones.
394 |     pub fn select1(&self, rank: u64) -> Option<u64> {
395 |         if rank >= self.num_ones {
396 |             return None;
397 |         }
398 | 
399 |         let prefix_num_ones = self.num_ones - self.last_block.num_ones;
400 |         if rank >= prefix_num_ones {
401 |             let lb_rank = (rank - prefix_num_ones) as u8;
402 |             return Some(self.last_block_ind() + self.last_block.select1(lb_rank));
403 |         }
404 | 
405 |         let select_ind = (rank / SELECT_BLOCK_SIZE) as usize;
406 |         let lb_start = self.select_one_inds[select_ind] as usize;
407 |         let mut lblock = None;
408 |         for (i, large_block) in self.large_blocks[lb_start..].iter().enumerate() {
409 |             if rank < large_block.rank {
410 |                 lblock = Some((lb_start + i - 1) as u64);
411 |                 break;
412 |             }
413 |         }
414 |         let lblock = lblock.unwrap_or(self.large_blocks.len() as u64 - 1);
415 |         let large_block = &self.large_blocks[lblock as usize];
416 | 
417 |         let sb_start = (lblock * SMALL_BLOCK_PER_LARGE_BLOCK) as usize;
418 |         let mut pointer = large_block.pointer;
419 |         let mut remaining = rank - large_block.rank;
420 |         for (i, &sb_class) in self.sb_classes[sb_start..].iter().enumerate() {
421 |             let sb_ones = sb_class as u64;
422 |             let code_length = ENUM_CODE_LENGTH[sb_class as usize];
423 | 
424 |             if remaining < sb_ones {
425 |                 let code = self.read_sb_index(pointer, code_length);
426 |                 let sb_rank = (sb_start + i) as u64 * SMALL_BLOCK_SIZE;
427 |                 let block_rank = enum_code::select1(code, sb_class, remaining);
428 |                 return Some(sb_rank + block_rank);
429 |             }
430 | 
431 |             remaining -= sb_ones;
432 |             pointer += code_length as u64;
433 |         }
434 |         panic!("Ran out of small blocks when iterating over rank");
435 |     }
436 | 
437 |     /// Return the length of the underlying bitmap.
438 |     pub fn len(&self) -> usize {
439 |         self.len as usize
440 |     }
441 | 
442 |     /// Return whether the underlying bitmap is empty.
443 |     pub fn is_empty(&self) -> bool {
444 |         self.len == 0
445 |     }
446 | 
447 |     /// Count the number of set bits in the underlying bitmap.
448 |     pub fn count_ones(&self) -> usize {
449 |         self.num_ones as usize
450 |     }
451 | 
452 |     /// Count the number of unset bits in the underlying bitmap.
453 |     pub fn count_zeros(&self) -> usize {
454 |         self.num_zeros as usize
455 |     }
456 | 
457 |     /// Push a bit at the end of the underlying bitmap.
458 |     pub fn push(&mut self, bit: bool) {
459 |         if self.len % SMALL_BLOCK_SIZE == 0 {
460 |             self.write_block();
461 |         }
462 |         if bit {
463 |             self.last_block.set_one(self.len % SMALL_BLOCK_SIZE);
464 |             if self.num_ones % SELECT_BLOCK_SIZE == 0 {
465 |                 self.select_one_inds.push(self.len / LARGE_BLOCK_SIZE);
466 |             }
467 |             self.num_ones += 1;
468 |         } else {
469 |             self.last_block.set_zero(self.len % SMALL_BLOCK_SIZE);
470 |             if self.num_zeros % SELECT_BLOCK_SIZE == 0 {
471 |                 self.select_zero_inds.push(self.len / LARGE_BLOCK_SIZE);
472 |             }
473 |             self.num_zeros += 1;
474 |         }
475 |         self.len += 1;
476 |     }
477 | 
478 |     /// Query the `pos`th bit (zero-indexed) of the underlying bitmap.
479 |     pub fn get_bit(&self, pos: u64) -> bool {
480 |         if self.is_last_block(pos) {
481 |             return self.last_block.get_bit(pos % SMALL_BLOCK_SIZE);
482 |         }
483 |         let lblock = pos / LARGE_BLOCK_SIZE;
484 |         let sblock = (pos / SMALL_BLOCK_SIZE) as usize;
485 |         let sblock_start = (lblock * SMALL_BLOCK_PER_LARGE_BLOCK) as usize;
486 |         let mut pointer = self.large_blocks[lblock as usize].pointer;
487 |         for &sb_class in &self.sb_classes[sblock_start..sblock] {
488 |             pointer += ENUM_CODE_LENGTH[sb_class as usize] as u64;
489 |         }
490 |         let sb_class = self.sb_classes[sblock];
491 |         let code_length = ENUM_CODE_LENGTH[sb_class as usize];
492 |         let code = self.read_sb_index(pointer, code_length);
493 |         enum_code::decode_bit(code, sb_class, pos % SMALL_BLOCK_SIZE)
494 |     }
495 | 
496 |     /// Iterate over the bits in the bitset.
497 |     pub fn iter(&self) -> impl Iterator<Item = bool> + '_ {
498 |         struct RsDictIter<'a> {
499 |             rsdict: &'a RsDict,
500 |             pos: u64,
501 |         }
502 |         impl<'a> Iterator for RsDictIter<'a> {
503 |             type Item = bool;
504 | 
505 |             fn next(&mut self) -> Option<bool> {
506 |                 if self.pos >= self.rsdict.len {
507 |                     return None;
508 |                 }
509 |                 // TODO: We could optimize this to read in a block once rather than decoding a bit
510 |                 // at a time.
511 |                 let out = self.rsdict.get_bit(self.pos);
512 |                 self.pos += 1;
513 |                 Some(out)
514 |             }
515 |         }
516 |         RsDictIter {
517 |             rsdict: self,
518 |             pos: 0,
519 |         }
520 |     }
521 | 
522 |     fn write_block(&mut self) {
523 |         if self.len > 0 {
524 |             let block = mem::replace(&mut self.last_block, LastBlock::new());
525 | 
526 |             let sb_class = block.num_ones as u8;
527 |             self.sb_classes.push(sb_class);
528 | 
529 |             // To avoid indexing past the end of our allocation when
530 |             // scanning through a large block, reserve some extra space to
531 |             // ensure that we always have a full large block in
532 |             // `sb_classes`.
533 |             let num_sb = self.sb_classes.len();
534 |             let align = SMALL_BLOCK_PER_LARGE_BLOCK as usize;
535 |             self.sb_classes
536 |                 .reserve((num_sb + align - 1) / align * align);
537 | 
538 |             let (code_len, code) = enum_code::encode(block.bits, sb_class);
539 |             self.sb_indices.push(code_len as usize, code);
540 |         }
541 |         if self.len % LARGE_BLOCK_SIZE == 0 {
542 |             let lblock = LargeBlock {
543 |                 rank: self.num_ones,
544 |                 pointer: self.sb_indices.len() as u64,
545 |             };
546 |             self.large_blocks.push(lblock);
547 |         }
548 |     }
549 | 
550 |     fn last_block_ind(&self) -> u64 {
551 |         if self.len == 0 {
552 |             return 0;
553 |         }
554 |         ((self.len - 1) / SMALL_BLOCK_SIZE) * SMALL_BLOCK_SIZE
555 |     }
556 | 
557 |     fn is_last_block(&self, pos: u64) -> bool {
558 |         pos >= self.last_block_ind()
559 |     }
560 | 
561 |     fn read_sb_index(&self, ptr: u64, code_len: u8) -> u64 {
562 |         self.sb_indices.get(ptr as usize, code_len as usize)
563 |     }
564 | }
565 | 
566 | impl PartialEq for RsDict {
567 |     fn eq(&self, rhs: &Self) -> bool {
568 |         self.iter().eq(rhs.iter())
569 |     }
570 | }
571 | 
572 | impl Eq for RsDict {}
573 | 
574 | impl PartialOrd for RsDict {
575 |     fn partial_cmp(&self, rhs: &Self) -> Option<Ordering> {
576 |         self.iter().partial_cmp(rhs.iter())
577 |     }
578 | }
579 | 
580 | impl Ord for RsDict {
581 |     fn cmp(&self, rhs: &Self) -> Ordering {
582 |         self.iter().cmp(rhs.iter())
583 |     }
584 | }
585 | 
586 | #[derive(Clone, Debug, Eq, PartialEq)]
587 | struct LargeBlock {
588 |     pointer: u64,
589 |     rank: u64,
590 | }
591 | 
592 | #[derive(Clone, Debug, Eq, PartialEq)]
593 | struct VarintBuffer {
594 |     buf: Vec<u64>,
595 |     len: usize,
596 | }
597 | 
598 | impl VarintBuffer {
599 |     fn with_capacity(bits: usize) -> Self {
600 |         Self {
601 |             buf: Vec::with_capacity(bits / 64),
602 |             len: 0,
603 |         }
604 |     }
605 | 
606 |     fn push(&mut self, num_bits: usize, value: u64) {
607 |         debug_assert!(num_bits <= 64);
608 |         if num_bits == 0 {
609 |             return;
610 |         }
611 |         let (block, offset) = (self.len / 64, self.len % 64);
612 |         if self.buf.len() == block || offset + num_bits > 64 {
613 |             self.buf.push(0);
614 |         }
615 |         self.buf[block] |= value << offset;
616 |         if offset + num_bits > 64 {
617 |             self.buf[block + 1] |= value >> (64 - offset);
618 |         }
619 |         self.len += num_bits;
620 |     }
621 | 
622 |     fn get(&self, index: usize, num_bits: usize) -> u64 {
623 |         debug_assert!(num_bits <= 64);
624 |         if num_bits == 0 {
625 |             return 0;
626 |         }
627 |         let (block, offset) = (index / 64, index % 64);
628 |         let mask = 1u64
629 |             .checked_shl(num_bits as u32)
630 |             .unwrap_or(0)
631 |             .wrapping_sub(1);
632 |         let mut ret = (self.buf[block] >> offset) & mask;
633 |         if offset + num_bits > 64 {
634 |             ret |= self.buf[block + 1] << (64 - offset);
635 |         }
636 |         ret & mask
637 |     }
638 | 
639 |     fn heap_size(&self) -> usize {
640 |         self.buf.capacity() * mem::size_of::<u64>()
641 |     }
642 | 
643 |     fn len(&self) -> usize {
644 |         self.len
645 |     }
646 | }
647 | 
648 | #[derive(Clone, Debug, Eq, PartialEq)]
649 | struct LastBlock {
650 |     bits: u64,
651 |     num_ones: u64,
652 |     num_zeros: u64,
653 | }
654 | 
655 | impl LastBlock {
656 |     fn new() -> Self {
657 |         LastBlock {
658 |             bits: 0,
659 |             num_ones: 0,
660 |             num_zeros: 0,
661 |         }
662 |     }
663 | 
664 |     fn select0(&self, rank: u8) -> u64 {
665 |         debug_assert!(rank < self.num_zeros as u8);
666 |         enum_code::select1_raw(!self.bits, rank as u64)
667 |     }
668 | 
669 |     fn select1(&self, rank: u8) -> u64 {
670 |         debug_assert!(rank < self.num_ones as u8);
671 |         enum_code::select1_raw(self.bits, rank as u64)
672 |     }
673 | 
674 |     // Count the number of bits set at indices i >= pos
675 |     fn count_suffix(&self, pos: u64) -> u64 {
676 |         (self.bits >> pos).count_ones() as u64
677 |     }
678 | 
679 |     fn get_bit(&self, pos: u64) -> bool {
680 |         (self.bits >> pos) & 1 == 1
681 |     }
682 | 
683 |     // Only call one of `set_one` or `set_zeros` for any `pos`.
684 |     fn set_one(&mut self, pos: u64) {
685 |         self.bits |= 1 << pos;
686 |         self.num_ones += 1;
687 |     }
688 |     fn set_zero(&mut self, _pos: u64) {
689 |         self.num_zeros += 1;
690 |     }
691 | }
692 | 
693 | fn rank_by_bit(x: u64, n: u64, b: bool) -> u64 {
694 |     if b {
695 |         x
696 |     } else {
697 |         n - x
698 |     }
699 | }
700 | 
701 | #[cfg(test)]
702 | mod tests {
703 |     use super::RsDict;
704 |     use crate::test_helpers::hash_u64;
705 | 
706 |     fn hash_u64_blocks(blocks: &[u64]) -> Vec<bool> {
707 |         let mut bits = Vec::with_capacity(blocks.len() * 64);
708 |         let to_pop = blocks.get(0).unwrap_or(&0) % 64;
709 |         for block in blocks {
710 |             for i in 0..4 {
711 |                 let block = hash_u64(block.wrapping_add(i));
712 |                 if block % 2 != 0 {
713 |                     for j in 0..64 {
714 |                         let bit = (block >> j) & 1 != 0;
715 |                         bits.push(bit);
716 |                     }
717 |                 }
718 |             }
719 |         }
720 |         for _ in 0..to_pop {
721 |             bits.pop();
722 |         }
723 |         bits
724 |     }
725 | 
726 |     fn check_rsdict(bits: &[bool]) {
727 |         let mut rs_dict = RsDict::with_capacity(bits.len());
728 |         for &bit in bits {
729 |             rs_dict.push(bit);
730 |         }
731 | 
732 |         // Check that rank(i) matches our naively computed ranks for all indices.
733 |         let mut one_rank = 0;
734 |         let mut zero_rank = 0;
735 |         for (i, &inp_bit) in bits.iter().enumerate() {
736 |             assert_eq!(rs_dict.rank(i as u64, false), zero_rank);
737 |             assert_eq!(rs_dict.rank(i as u64, true), one_rank);
738 |             if inp_bit {
739 |                 one_rank += 1;
740 |             } else {
741 |                 zero_rank += 1;
742 |             }
743 |         }
744 | 
745 |         // Check `select(r)` for ranks "in bounds" within the bitvector against
746 |         // our naively computed ranks.
747 |         let mut one_rank = 0;
748 |         let mut zero_rank = 0;
749 |         for (i, &inp_bit) in bits.iter().enumerate() {
750 |             if inp_bit {
751 |                 assert_eq!(rs_dict.select(one_rank as u64, true), Some(i as u64));
752 |                 one_rank += 1;
753 |             } else {
754 |                 assert_eq!(rs_dict.select(zero_rank as u64, false), Some(i as u64));
755 |                 zero_rank += 1;
756 |             }
757 |         }
758 |         // Check all of the "out of bounds" ranks up until `bits.len()`.
759 |         for r in (one_rank + 1)..bits.len() {
760 |             assert_eq!(rs_dict.select(r as u64, true), None);
761 |         }
762 |         for r in (zero_rank + 1)..bits.len() {
763 |             assert_eq!(rs_dict.select(r as u64, false), None);
764 |         }
765 | 
766 |         // Check that we can query all of the bits back out.
767 |         for (i, &bit) in bits.iter().enumerate() {
768 |             assert_eq!(rs_dict.get_bit(i as u64), bit);
769 |         }
770 | 
771 |         // Check our combined bit and rank method.
772 |         let mut one_rank = 0;
773 |         for (i, &bit) in bits.iter().enumerate() {
774 |             let (rs_bit, rs_rank) = rs_dict.bit_and_one_rank(i as u64);
775 |             assert_eq!((rs_bit, rs_rank), (bit, one_rank));
776 |             if bit {
777 |                 one_rank += 1;
778 |             }
779 |         }
780 | 
781 |         // Check that iteration matches.
782 |         assert!(bits.iter().cloned().eq(rs_dict.iter()));
783 | 
784 |         // Check that equality is reflexive.
785 |         assert_eq!(bits, bits)
786 |     }
787 | 
788 |     #[quickcheck]
789 |     fn qc_from_blocks(blocks: Vec<u64>) {
790 |         let bits = hash_u64_blocks(&blocks);
791 |         let mut rs_dict = RsDict::with_capacity(bits.len());
792 |         for &bit in &bits {
793 |             rs_dict.push(bit);
794 |         }
795 |         let blocks = (0..(bits.len() / 64)).map(|i| {
796 |             let mut block = 0u64;
797 |             for j in 0..64 {
798 |                 if bits[i * 64 + j] {
799 |                     block |= 1 << j;
800 |                 }
801 |             }
802 |             block
803 |         });
804 |         let mut block_rs_dict = RsDict::from_blocks(blocks);
805 |         for i in (bits.len() / 64 * 64)..bits.len() {
806 |             block_rs_dict.push(bits[i]);
807 |         }
808 | 
809 |         assert_eq!(rs_dict.len, block_rs_dict.len);
810 |         assert_eq!(rs_dict.num_ones, block_rs_dict.num_ones);
811 |         assert_eq!(rs_dict.num_zeros, block_rs_dict.num_zeros);
812 |         assert_eq!(rs_dict.sb_classes, block_rs_dict.sb_classes);
813 |         assert_eq!(rs_dict.sb_indices, block_rs_dict.sb_indices);
814 |         assert_eq!(rs_dict.large_blocks, block_rs_dict.large_blocks);
815 |         assert_eq!(rs_dict.select_one_inds, block_rs_dict.select_one_inds);
816 |         assert_eq!(rs_dict.select_zero_inds, block_rs_dict.select_zero_inds);
817 |         assert_eq!(rs_dict.last_block, block_rs_dict.last_block);
818 |     }
819 | 
820 |     // Ask quickcheck to generate blocks of 64 bits so we get test
821 |     // coverage for ranges spanning multiple small blocks.
822 |     #[quickcheck]
823 |     fn qc_rsdict(blocks: Vec<u64>) {
824 |         check_rsdict(&hash_u64_blocks(&blocks));
825 |     }
826 | 
827 |     #[test]
828 |     fn test_large_rsdicts() {
829 |         check_rsdict(&[true; 65]);
830 |         check_rsdict(&[true; 1025]);
831 |         check_rsdict(&[true; 3121]);
832 |         check_rsdict(&[true; 3185]);
833 |         check_rsdict(&[true; 4097]);
834 |         check_rsdict(&[true; 8193]);
835 | 
836 |         check_rsdict(&[false; 65]);
837 |         check_rsdict(&[false; 1025]);
838 |         check_rsdict(&[false; 3121]);
839 |         check_rsdict(&[false; 3185]);
840 |         check_rsdict(&[false; 4097]);
841 |         check_rsdict(&[false; 8193]);
842 | 
843 |         let alternating = &mut [false; 8193];
844 |         for i in 0..8193 {
845 |             alternating[i] = i % 2 == 0;
846 |         }
847 |         check_rsdict(alternating);
848 |     }
849 | 
850 |     #[test]
851 |     fn test_ordering() {
852 |         let r1 = RsDict::from_blocks([0u64].iter().cloned());
853 |         let r2 = RsDict::from_blocks([1u64].iter().cloned());
854 | 
855 |         assert_ne!(r1, r2);
856 |         assert!(r1 < r2);
857 |     }
858 | }
859 | 


--------------------------------------------------------------------------------
/src/rank_acceleration.rs:
--------------------------------------------------------------------------------
  1 | use crate::enum_code::ENUM_CODE_LENGTH;
  2 | 
  3 | fn scan_block_naive(classes: &[u8], start: usize, end: usize) -> (u64, u64) {
  4 |     let mut class_sum = 0;
  5 |     let mut length_sum = 0;
  6 |     for &class in &classes[start..end] {
  7 |         class_sum += class as u64;
  8 |         length_sum += ENUM_CODE_LENGTH[class as usize] as u64;
  9 |     }
 10 |     (class_sum, length_sum)
 11 | }
 12 | 
 13 | #[cfg(not(all(feature = "simd", target_arch = "x86_64")))]
 14 | pub fn scan_block(classes: &[u8], start: usize, end: usize) -> (u64, u64) { scan_block_naive(classes, start, end) }
 15 | 
 16 | #[cfg(all(feature = "simd", target_arch = "x86_64"))]
 17 | mod accelerated {
 18 |     use super::scan_block_naive;
 19 |     use crate::enum_code::ENUM_CODE_LENGTH;
 20 |     use std::arch::x86_64::{__m128i, _mm_sad_epu8, _mm_setzero_si128};
 21 |     use std::simd::{num::SimdUint, u64x2, u8x16, Simd};
 22 |     use std::simd::prelude::SimdOrd;
 23 |     use std::slice;
 24 |     use std::u64;
 25 | 
 26 |     // Scan a prefix of a large block of small block classes, returning the
 27 |     // sum of the classes and their total encoded length.
 28 |     //
 29 |     // Preconditions:
 30 |     // * start <= end <= start + 16
 31 |     // * classes[start] must be 128-bit aligned
 32 |     //
 33 |     // Returns:
 34 |     // * class_sum: classes[start..end].sum()
 35 |     // * length_sum: classes[start.end].map(|i| ENUM_CODE_LENGTH[i]).sum()
 36 |     pub fn scan_block(classes: &[u8], start: usize, end: usize) -> (u64, u64) {
 37 |         if is_x86_feature_detected!("ssse3") {
 38 |             unsafe { scan_block_ssse3(classes, start, end) }
 39 |         } else {
 40 |             scan_block_naive(classes, start, end)
 41 |         }
 42 |     }
 43 | 
 44 |     #[target_feature(enable = "ssse3")]
 45 |     unsafe fn scan_block_ssse3(classes: &[u8], start: usize, end: usize) -> (u64, u64) {
 46 |         // Step 1: Load the classes into a u8x16.  Our approach here is to do a
 47 |         // single load and then mask off the elements past `len`.  This is unsafe
 48 |         // since we're potentially reading past the end of the slice, but we're
 49 |         // masking off the extraneous elements before processing them.
 50 |         let len = end - start;
 51 |         debug_assert!(len <= 16);
 52 | 
 53 |         // Step 1a: Start with all bits on, shift to turn off the lowest 8n bits,
 54 |         // and then negate to have the lowest 8n bits on.
 55 |         let lo_shift = len as u32 * 8;
 56 |         let lo_mask = !u64::MAX.checked_shl(lo_shift).unwrap_or(0);
 57 |         // Step 1b: Do the same for the remaining 8 bytes.
 58 |         let hi_shift = len.saturating_sub(8) as u32 * 8;
 59 |         let hi_mask = !u64::MAX.checked_shl(hi_shift).unwrap_or(0);
 60 |         let ix_mask: u8x16 = core::mem::transmute(u64x2::from([lo_mask, hi_mask]));
 61 | 
 62 |         let classes = {
 63 |             let start = classes.as_ptr().offset(start as isize);
 64 |             let block = slice::from_raw_parts(start, 16);
 65 | 
 66 |             let block = u8x16::from_slice(block);
 67 |             block & ix_mask
 68 |         };
 69 | 
 70 |         // Step 2: We want to be able to pack the `ENUM_CODE_LENGTH` table of 65
 71 |         // entries into a single u8x16 vector.  We can do this with two insights:
 72 |         //
 73 |         // 1) The table is symmetric, so we only need to store half of it if we can
 74 |         //    transform the indices.
 75 |         // 2) The table "caps" out at 64 for most of the range in the middle, which
 76 |         //    is the length of the 15th element.  If we just truncate indices greater
 77 |         //    than 15 (after reflection), we'll not change the value.
 78 |         //
 79 |         // Putting this together, we have f(i) = min(i, 64 - i, 15) such that
 80 |         //
 81 |         //    ENUM_CODE_LENGTH[i] == ENUM_CODE_LENGTH[f(i)] for i in [0, 64].
 82 |         //
 83 |         let indices = classes.simd_min(u8x16::splat(64) - classes).simd_min(u8x16::splat(15));
 84 |         let enum_code_vector: u8x16 = u8x16::from([
 85 |             ENUM_CODE_LENGTH[0], ENUM_CODE_LENGTH[1], ENUM_CODE_LENGTH[2], ENUM_CODE_LENGTH[3],
 86 |             ENUM_CODE_LENGTH[4], ENUM_CODE_LENGTH[5], ENUM_CODE_LENGTH[6], ENUM_CODE_LENGTH[7],
 87 |             ENUM_CODE_LENGTH[8], ENUM_CODE_LENGTH[9], ENUM_CODE_LENGTH[10], ENUM_CODE_LENGTH[11],
 88 |             ENUM_CODE_LENGTH[12], ENUM_CODE_LENGTH[13], ENUM_CODE_LENGTH[14], ENUM_CODE_LENGTH[15],
 89 |         ]);
 90 | 
 91 |         // Step 3: This is the real magic.  Now that we've packed our table into
 92 |         // a vector and transformed our classes into indices into this packed vector,
 93 |         // we can use `pshufb` to index into our table in parallel.
 94 |         let code_lengths = Simd::swizzle_dyn(enum_code_vector, indices);
 95 | 
 96 |         // Step 4: Compute our sums and return.
 97 |         let class_sum = sum_u8x16(classes);
 98 |         let length_sum = sum_u8x16(code_lengths);
 99 | 
100 |         (class_sum, length_sum)
101 |     }
102 | 
103 |     // In case `std::simd` supports `psadbw`, that could be a
104 |     // great way to sum a u8x16 into a u64x2 in a single SSE2 instruction.
105 |     unsafe fn sum_u8x16(xs: u8x16) -> u64 {
106 |         let zero_m128: __m128i = _mm_setzero_si128();
107 |         let xs_m128: __m128i = __m128i::from(xs);
108 |         let sum_m128 = _mm_sad_epu8(zero_m128, xs_m128);
109 |         u64x2::from(sum_m128).reduce_sum()
110 |     }
111 | }
112 | 
113 | #[cfg(all(feature = "simd", target_arch = "x86_64"))]
114 | pub use self::accelerated::scan_block;
115 | 


--------------------------------------------------------------------------------
/src/rsdict_fuzz.rs:
--------------------------------------------------------------------------------
 1 | use rsdict::RsDict;
 2 | use succinct::rank::RankSupport;
 3 | use succinct::select::SelectSupport;
 4 | 
 5 | fn main() {
 6 |     loop {
 7 |         afl::fuzz!(|data: &[u8]| {
 8 |             let mut bits = Vec::with_capacity(data.len() * 8);
 9 |             for byte in data {
10 |                 for i in 0..8 {
11 |                     bits.push(byte & (1 << i) != 0);
12 |                 }
13 |             }
14 |             let mut blocks = Vec::with_capacity(bits.len() / 64);
15 |             for chunk in bits.chunks_exact(64) {
16 |                 let mut block = 0;
17 |                 for (i, &bit) in chunk.iter().enumerate() {
18 |                     if bit {
19 |                         block |= 1 << i;
20 |                     }
21 |                 }
22 |                 blocks.push(block);
23 |             }
24 | 
25 |             let mut from_bits = RsDict::new();
26 |             for &bit in &bits {
27 |                 from_bits.push(bit);
28 |             }
29 | 
30 |             let mut from_blocks = RsDict::from_blocks(blocks.into_iter());
31 |             for &bit in &bits[(bits.len() / 64 * 64)..] {
32 |                 from_blocks.push(bit);
33 |             }
34 | 
35 |             let mut one_rank = 0;
36 |             let mut zero_rank = 0;
37 | 
38 |             for (i, &bit) in bits.iter().enumerate() {
39 |                 for r in &[&from_bits, &from_blocks] {
40 |                     assert_eq!(r.get_bit(i as u64), bit);
41 | 
42 |                     assert_eq!(r.rank(i as u64, false), zero_rank);
43 |                     assert_eq!(r.rank(i as u64, true), one_rank);
44 | 
45 |                     if bit {
46 |                         assert_eq!(r.select(one_rank as u64, true), Some(i as u64));
47 |                     } else {
48 |                         assert_eq!(r.select(zero_rank as u64, false), Some(i as u64));
49 |                     }
50 |                 }
51 |                 if bit {
52 |                     one_rank += 1;
53 |                 } else {
54 |                     zero_rank += 1;
55 |                 }
56 |             }
57 | 
58 |             for r in &[&from_bits, &from_blocks] {
59 |                 for rank in (one_rank + 1)..bits.len() as u64 {
60 |                     assert_eq!(r.select(rank, true), None);
61 |                 }
62 |                 for rank in (zero_rank + 1)..bits.len() as u64 {
63 |                     assert_eq!(r.select(rank, false), None);
64 |                 }
65 |             }
66 |         });
67 |     }
68 | }
69 | 


--------------------------------------------------------------------------------
/src/test_helpers.rs:
--------------------------------------------------------------------------------
 1 | use std::collections::hash_map::DefaultHasher;
 2 | use std::hash::Hasher;
 3 | 
 4 | // QuickCheck doesn't generate uniform integer input, so let's hash
 5 | // the blocks before turning them into a bitset.
 6 | pub fn hash_u64(x: u64) -> u64 {
 7 |     let mut h = DefaultHasher::new();
 8 |     h.write_u64(x);
 9 |     h.finish()
10 | }
11 | 


--------------------------------------------------------------------------------