├── src
    ├── internal_data_structure.rs
    ├── lib.rs
    ├── fid
    │   ├── block.rs
    │   ├── chunk.rs
    │   ├── fid_iter.rs
    │   ├── blocks.rs
    │   ├── chunks.rs
    │   └── fid_impl.rs
    ├── internal_data_structure
    │   ├── popcount_table.rs
    │   └── raw_bit_vector.rs
    └── fid.rs
├── .gitignore
├── CHANGELOG.md
├── ci
    └── pr-check-fix.sh
├── .github
    └── workflows
    │   └── clippy.yml
├── README.tpl
├── LICENSE-MIT
├── Cargo.toml
├── .travis.yml
├── tests
    └── test.rs
├── benches
    └── bench.rs
├── README.md
└── LICENSE-APACHE


/src/internal_data_structure.rs:
--------------------------------------------------------------------------------
1 | pub mod popcount_table;
2 | pub mod raw_bit_vector;
3 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
1 | #![doc = include_str!("../README.md")]
2 | 
3 | pub use fid::Fid;
4 | 
5 | pub mod fid;
6 | mod internal_data_structure;
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /target
 2 | **/*.rs.bk
 3 | Cargo.lock
 4 | 
 5 | # local cargo settings
 6 | /.cargo
 7 | 
 8 | # used for debugging with VSCode
 9 | src/main.rs
10 | 


--------------------------------------------------------------------------------
/src/fid/block.rs:
--------------------------------------------------------------------------------
 1 | use super::Block;
 2 | 
 3 | impl super::Block {
 4 |     /// Constructor.
 5 |     pub fn new(value: u16, length: u8) -> Block {
 6 |         Block { value, length }
 7 |     }
 8 | 
 9 |     /// Returns a content (total rank to go) of the block.
10 |     pub fn value(&self) -> u16 {
11 |         self.value
12 |     }
13 | 
14 |     /// Returns size of the block.
15 |     pub fn length(&self) -> u8 {
16 |         self.length
17 |     }
18 | }
19 | 


--------------------------------------------------------------------------------
/src/fid/chunk.rs:
--------------------------------------------------------------------------------
 1 | use super::{Blocks, Chunk};
 2 | use crate::internal_data_structure::raw_bit_vector::RawBitVector;
 3 | 
 4 | impl super::Chunk {
 5 |     /// Constructor.
 6 |     pub fn new(value: u64, length: u16, rbv: &RawBitVector, i_chunk: u64) -> Chunk {
 7 |         let blocks = Blocks::new(rbv, i_chunk, length);
 8 |         Chunk {
 9 |             value,
10 |             blocks,
11 |         }
12 |     }
13 | 
14 |     /// Returns the content of the chunk.
15 |     pub fn value(&self) -> u64 {
16 |         self.value
17 |     }
18 | }
19 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | All notable changes to this project will be documented in this file.
 3 | 
 4 | The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
 5 | and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
 6 | 
 7 | ## [Unreleased]
 8 | 
 9 | ## [v0.2.0] - 2023-04-15
10 | 
11 | ### Added
12 | 
13 | - Added serde, made rayon optional, introduced GitHub actions CI, fixed many code smells. ([#25](https://github.com/laysakura/fid-rs/pull/25))
14 | - feature: Make Fid cloneable. ([#26](https://github.com/laysakura/fid-rs/pull/26))
15 | 
16 | ## [v0.1.1] - 2019-04-26
17 | 
18 | Just fixed docs.rs URL.
19 | 
20 | ## [v0.1.0] - 2019-04-25
21 | Initial release.
22 | 
23 | [Unreleased]: https://github.com/laysakura/fid-rs/compare/v0.2.0...HEAD
24 | [v0.2.0]: <https://github.com/laysakura/fid-rs/compare/v0.1.1...v0.2.0>
25 | [v0.1.1]: <https://github.com/laysakura/fid-rs/compare/v0.1.0...v0.1.1>
26 | [v0.1.0]: https://github.com/laysakura/fid-rs/compare/48fe478...v0.1.0
27 | 


--------------------------------------------------------------------------------
/ci/pr-check-fix.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -eux
 3 | 
 4 | travis_terminate() {
 5 |     set +e
 6 |     pkill -9 -P $$ &> /dev/null || true
 7 |     exit $1
 8 | }
 9 | 
10 | rustup component add rustfmt
11 | cargo readme > /dev/null || cargo install cargo-readme  # skip if already available
12 | 
13 | ## Auto commit & push by CI
14 | (
15 |     cd `mktemp -d`
16 |     git clone https://${GITHUB_TOKEN}@github.com/${TRAVIS_REPO_SLUG}.git
17 |     cd fid-rs
18 |     git checkout ${TRAVIS_PULL_REQUEST_BRANCH}
19 | 
20 |     committed=0
21 | 
22 |     ### README.md from src/lib.rs
23 |     cargo readme > README.md
24 |     git add README.md
25 |     git commit -m 'cargo readme > README.md' && committed=1
26 | 
27 |     ### cargo fmt
28 |     cargo fmt --all
29 |     git add -A
30 |     git commit -m 'cargo fmt --all' && committed=1
31 | 
32 |     ### git push
33 |     git push origin ${TRAVIS_PULL_REQUEST_BRANCH}
34 | 
35 |     ### Stop build if anything updated in remote
36 |     [ $committed -eq 1 ] && travis_terminate 1 || :
37 | )
38 | 


--------------------------------------------------------------------------------
/.github/workflows/clippy.yml:
--------------------------------------------------------------------------------
 1 | name: Clippy
 2 | 
 3 | 
 4 | on:
 5 |   push:
 6 |     branches: ["master"]
 7 |   pull_request:
 8 |     branches: ["master"]
 9 | 
10 | env:
11 |   CARGO_TERM_COLOR: always
12 | 
13 | jobs:
14 |   clippy:
15 |     runs-on: ubuntu-latest
16 | 
17 |     steps:
18 |       - uses: actions/checkout@v2
19 |       - name: Install Clippy
20 |         run:
21 |           rustup toolchain install nightly --component clippy
22 |       - name: Set up Rust
23 |         uses: actions-rs/toolchain@v1
24 |         with:
25 |           toolchain: nightly
26 |           override: true
27 |       - name: Run clippy
28 |         run: cargo clippy --all-features
29 |       - name: Run clippy without rayon
30 |         run: cargo clippy --no-default-features --features="serde"
31 |       - name: Run tests
32 |         run: cargo test --all-features
33 |       - name: Run tests without rayon
34 |         run: cargo test --no-default-features --features="serde"
35 |       - name: Run tests release
36 |         run: cargo test --release --all-features
37 | 


--------------------------------------------------------------------------------
/README.tpl:
--------------------------------------------------------------------------------
 1 | # {{crate}}
 2 | 
 3 | {{readme}}
 4 | 
 5 | ## Versions
 6 | fid-rs uses [semantic versioning](http://semver.org/spec/v2.0.0.html).
 7 | 
 8 | Since current major version is _0_, minor version update might involve breaking public API change (although it is carefully avoided).
 9 | 
10 | ## Rust Version Supports
11 | 
12 | fid-rs is continuously tested with these Rust versions in Travis CI:
13 | 
14 | - 1.33.0
15 | - Latest stable version
16 | - Beta version
17 | - Nightly build
18 | 
19 | So it expectedly works with Rust 1.33.0 and any newer versions.
20 | 
21 | Older versions may also work, but are not tested or guaranteed.
22 | 
23 | ## Contributing
24 | 
25 | Any kind of pull requests are appreciated.
26 | 
27 | ### Guidelines
28 | 
29 | - `README.md` is generated from `$ cargo readme` command. Do not manually update `README.md` but edit `src/lib.rs` and then `$ cargo readme > README.md`.
30 | - Travis CI automatically does the following commit & push to your pull-requests:
31 |     - `$ cargo readme > README.md`
32 |     - `$ cargo fmt --all`
33 | 
34 | ## License
35 | 
36 | {{license}}
37 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2019 Sho Nakatani
 2 | 
 3 | Permission is hereby granted, free of charge, to any
 4 | person obtaining a copy of this software and associated
 5 | documentation files (the "Software"), to deal in the
 6 | Software without restriction, including without
 7 | limitation the rights to use, copy, modify, merge,
 8 | publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software
10 | is furnished to do so, subject to the following
11 | conditions:
12 | 
13 | The above copyright notice and this permission notice
14 | shall be included in all copies or substantial portions
15 | of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 | DEALINGS IN THE SOFTWARE.
26 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "fid-rs"
 3 | version = "0.2.0"
 4 | authors = ["Sho Nakatani <lay.sakura@gmail.com>"]
 5 | description = "High performance FID (Fully Indexable Dictionary) library"
 6 | readme = "README.md"
 7 | license = "MIT OR Apache-2.0"
 8 | repository = "https://github.com/laysakura/fid-rs"
 9 | homepage = "https://github.com/laysakura/fid-rs"
10 | keywords = ["fid", "succinct-bit-vector"] # up to 5 keywords, each keyword should have <= 20 chars
11 | categories = ["compression", "data-structures"]
12 | edition = "2018"
13 | 
14 | [dependencies]
15 | # Rayon is an optional feature, which is enabled by default.
16 | # It is used to crate the Chunks collection in parallel.
17 | rayon = { version = "1.5", optional = true }
18 | # Serde is another optional feature, which can be enabled by setting `serde` feature.
19 | # It is used to serialize and deserialize the FID structure.
20 | serde = { version = "1.0", optional = true, features = ["derive"] }
21 | mem_dbg = {version = "0.1.4", optional = true}
22 | 
23 | [dev-dependencies]
24 | criterion = "0.5"
25 | rand = "0.8"
26 | 
27 | [features]
28 | default = ["rayon"]
29 | 
30 | [[bench]]
31 | name = "bench"
32 | harness = false
33 | 


--------------------------------------------------------------------------------
/src/fid/fid_iter.rs:
--------------------------------------------------------------------------------
 1 | use super::{Fid, FidIter};
 2 | 
 3 | impl<'iter> Fid {
 4 |     /// Creates an iterator over FID's bit vector.
 5 |     ///
 6 |     /// # Examples
 7 |     /// ```
 8 |     /// use fid_rs::Fid;
 9 |     ///
10 |     /// let fid = Fid::from("1010_1010");
11 |     /// for (i, bit) in fid.iter().enumerate() {
12 |     ///     assert_eq!(bit, fid[i as u64]);
13 |     /// }
14 |     /// ```
15 |     pub fn iter(&'iter self) -> FidIter<'iter> {
16 |         FidIter { fid: self, i: 0 }
17 |     }
18 | }
19 | 
20 | impl<'iter> Iterator for FidIter<'iter> {
21 |     type Item = bool;
22 |     fn next(&mut self) -> Option<Self::Item> {
23 |         if self.i >= self.fid.len() {
24 |             None
25 |         } else {
26 |             self.i += 1;
27 |             Some(self.fid[self.i - 1])
28 |         }
29 |     }
30 | }
31 | 
32 | #[cfg(test)]
33 | mod iter_success_tests {
34 |     use crate::Fid;
35 | 
36 |     #[test]
37 |     fn iter() {
38 |         let fid = Fid::from("1010_1010");
39 |         for (i, bit) in fid.iter().enumerate() {
40 |             assert_eq!(bit, fid[i as u64]);
41 |         }
42 |     }
43 | }
44 | 
45 | #[cfg(test)]
46 | mod iter_failure_tests {
47 |     // Nothing to test
48 | }
49 | 


--------------------------------------------------------------------------------
/src/fid/blocks.rs:
--------------------------------------------------------------------------------
 1 | use super::{Block, Blocks, Chunks};
 2 | use crate::internal_data_structure::raw_bit_vector::RawBitVector;
 3 | 
 4 | impl super::Blocks {
 5 |     /// Constructor.
 6 |     pub fn new(rbv: &RawBitVector, i_chunk: u64, this_chunk_size: u16) -> Blocks {
 7 |         let n = rbv.len();
 8 |         let chunk_size = Chunks::calc_chunk_size(n);
 9 |         let block_size = Blocks::calc_block_size(n);
10 |         let blocks_cnt = this_chunk_size / block_size as u16
11 |             + if this_chunk_size % block_size as u16 == 0 {
12 |                 0
13 |             } else {
14 |                 1
15 |             };
16 | 
17 |         let mut blocks: Vec<Block> = Vec::with_capacity(blocks_cnt as usize);
18 |         for i_block in 0..(blocks_cnt as usize) {
19 |             let i_rbv = i_chunk * chunk_size as u64 + i_block as u64 * block_size as u64;
20 |             assert!(i_rbv < n);
21 | 
22 |             let this_block_size: u8 = if n - i_rbv >= block_size as u64 {
23 |                 block_size
24 |             } else {
25 |                 (n - i_rbv) as u8
26 |             };
27 | 
28 |             let block_rbv = rbv.clone_sub(i_rbv, this_block_size as u64);
29 |             let popcount_in_block = block_rbv.popcount() as u16;
30 |             let block = Block::new(
31 |                 popcount_in_block
32 |                     + if i_block == 0 {
33 |                         0
34 |                     } else {
35 |                         let block_left = &blocks[i_block - 1];
36 |                         block_left.value()
37 |                     },
38 |                 this_block_size,
39 |             );
40 |             blocks.push(block);
41 |         }
42 | 
43 |         Blocks { blocks, blocks_cnt }
44 |     }
45 | 
46 |     /// Returns i-th block.
47 |     ///
48 |     /// # Panics
49 |     /// When _`i` >= `self.blocks_cnt()`_.
50 |     pub fn access(&self, i: u64) -> &Block {
51 |         assert!(
52 |             i <= self.blocks_cnt as u64,
53 |             "i = {} must be smaller then {} (self.blocks_cnt())",
54 |             i,
55 |             self.blocks_cnt,
56 |         );
57 |         &self.blocks[i as usize]
58 |     }
59 | 
60 |     /// Returns size of 1 block: _(log N) / 2_
61 |     pub fn calc_block_size(n: u64) -> u8 {
62 |         let lg2 = (n as f64).log2() as u8;
63 |         let sz = lg2 / 2;
64 |         if sz == 0 {
65 |             1
66 |         } else {
67 |             sz
68 |         }
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: rust
 2 | branches:
 3 |   only:
 4 |     - master
 5 |     - /^v[0-9]/  # tag
 6 | cache: cargo
 7 | 
 8 | stages:
 9 |   - pr-check-fix
10 |   - test
11 |   - doc-bench-deploy
12 | 
13 | # test stage
14 | rust:
15 |   - 1.33.0  # pinned stable Rust release
16 |   - stable
17 |   #- beta
18 |   #- nightly
19 | os:
20 |   - linux
21 |   - osx
22 | script:
23 |   - cargo build --release --verbose --all
24 |   - cargo test --release --verbose --all
25 | 
26 | jobs:
27 |   include:
28 |     - stage: pr-check-fix
29 |       rust: stable
30 |       os: linux
31 |       script: ./ci/pr-check-fix.sh
32 | 
33 |     - stage: doc-bench-deploy
34 |       rust: stable
35 |       os: linux
36 | 
37 |       # gnuplot for Criterion HTML Report
38 |       addons:
39 |         apt:
40 |           packages:
41 |             - gnuplot-nox
42 | 
43 |       script:
44 |         - cargo doc
45 |         - cargo bench --all
46 |         ## Move criterion's HTML report into doc/ dir in order to be uploaded in github.io
47 |         - rm -rf target/doc/criterion && mv target/criterion target/doc/
48 | 
49 |       deploy:
50 |         # GitHub Pages
51 |         - provider: pages
52 |           skip_cleanup: true
53 |           github_token: "$GITHUB_TOKEN"
54 |           local_dir: target/doc
55 |           on:
56 |             branch: master
57 |         # crates.io
58 |         - provider: cargo
59 |           token:
60 |             secure: T1PLtSay+QeZphz3UjOn1Pn7q0ojNbPMpzxsKimj0ZFtHe09w4mQYASB1hMr55hb7NvTme/cpPxU5KRj2DFs/UpdqoWqgAyuL6NVPTPnJglE9ZXTXPjGXbr3tiH9b24/xQw3Z3j01bzyW1VRHLRQJXzyDk9ykaN/GIm8hxH15wyVT9x5AqqQ6eT83d1LgkgQFkIAcvMaSJ/+MOYKrOeRL3olIH/zLqvkPJVs7zZm1U0Z1aqMJFwDuOlLpV4Tc9485+Gk0gOz04AuBDWOaQcAy4WICKhIK/d6jI2oYptPrkKZmTwGydtOMDdWpOR4TocrdldPdEe8CothZH6k1i52Q4rl0aU2TgVGJRL+qMy0sAlBkRvvtasC9viJg2UFjL6m5Kdbor9xadj8VpSZtq89TTIMHB8WHQJvJwrOlJZk4w8UYhGV5c5EOYrRii00o4VFy2k7bzMWV55SKJXPdDvcVYLBrbTFbXAJtLDQPZWr+w0YfTMP7oxTRR9FzscVDjvR7sszYI8epfpukjmOX32XDXmz0Y2h43Gux+rRqg7eIdjxQR4pgP5wDdxQsf7UZeUwQPPbTPuCwEgf0WJ1uGVgQ0aa0ka8YI2QClor+20io+ZnPdfIX25X9Ggl/Qj4w2/wAan2T7mhMLu3KAkomP0LUDZ+vhRcZbcpgDzU182etMQ=
61 |           on:
62 |             tags: true
63 | 
64 | env:
65 |   global:
66 |     secure: R96BQcuzhTALH8I0HeP6ZhIsCpAc6X4vr9jmM7euIq7s0ztueZqx2Gorycpi9zfFsLPWvDOU3HNEiL7TRROsPWouOEfS+Sn52KISmYRSamv+wwoIe3c/SBxxLXFudWjxI9qGnXbsIZCWObQvvMGQ7a5yi+uRyrqn45Jgj0oXCvhLowo09CDz56PBud967J2BkU3JF+FV82QYFn/82fu5WfDRSxX3Br2owGyU0DyZ4KZTJ+JFmiVSehpOkzVY0sxCtFvGVNFLJEH0xvj5ARWiRofLz1riJYM4yG9q4XBYxUgeJRLJ8OKnusxySncRi1+GFppusX7EScjMRiHVLM3X0s5roalVC7vToYhaWHv2WH//R60nH5txsQtFZirkbAbZuPqTgsT/KPn1xNMNj7dhaD/T3xCOUXn0nAy61ufVlOUFfkPnAhbU4vm3Pjs5xGCpaOQAgr/qnaAR1BcHRarMjwzI1VCqOVjP3M9yYNR/fo6Zfm/ExhOTLSYxiN4XzrEQO3vvcjc1x/ex1V1n2Th4qwV6tvU7GMcyoDHqFxgVI5NW0hUmuDEpgS1dIvaTgrhUq2Mt9vz+BX5P7VbxyJ9Cmervf2BjKbzUVNNvYnswzU/N5GMX8FWe33jf9RyOMRkHlvEiw1ssC+BM+E+CJoX4vZ/AE3ek3C/sA2RaGiA2TOE=
67 | 


--------------------------------------------------------------------------------
/src/internal_data_structure/popcount_table.rs:
--------------------------------------------------------------------------------
  1 | #[cfg(feature = "serde")]
  2 | use serde::{Deserialize, Serialize};
  3 | 
  4 | #[cfg(feature = "mem_dbg")]
  5 | use mem_dbg::{MemDbg, MemSize};
  6 | 
  7 | /// Cache table of `popcount` results.
  8 | #[derive(Clone, Debug)]
  9 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 10 | #[cfg_attr(feature = "mem_dbg", derive(MemDbg, MemSize))]
 11 | pub struct PopcountTable {
 12 |     bit_length: u8,
 13 | 
 14 |     /// `table[target_num] == target_num.popcount()`
 15 |     table: Vec<u8>,
 16 | }
 17 | 
 18 | impl PopcountTable {
 19 |     /// Constructor.
 20 |     ///
 21 |     /// Time-complexity:  `O(bit_length)` (Assuming `u64::count_ones()` takes `O(1)`)
 22 |     /// Space-complexity: `O(bit_length)`
 23 |     ///
 24 |     /// `bit_length` must be in [1, 64].
 25 |     ///
 26 |     /// # Panics
 27 |     /// When `bit_length` is out of [1, 64].
 28 |     pub fn new(bit_length: u8) -> PopcountTable {
 29 |         assert!(
 30 |             (1..=64).contains(&bit_length),
 31 |             "bit_length (= {}) must be in [1, 64]",
 32 |             bit_length
 33 |         );
 34 | 
 35 |         let table = (0..=(1 << bit_length) - 1)
 36 |             .map(|target: u64| target.count_ones() as u8)
 37 |             .collect();
 38 |         PopcountTable { bit_length, table }
 39 |     }
 40 | 
 41 |     /// Returns the same value as `target.count_ones()` in `O(1)`.
 42 |     ///
 43 |     /// # Panics
 44 |     /// When `target` is out of [0, 2^ `self.bit_length` ).
 45 |     pub fn popcount(&self, target: u64) -> u8 {
 46 |         assert!(
 47 |             target <= ((1 << self.bit_length) - 1),
 48 |             "target = {} must be < 2^{}, while PopcountTable::bit_length = {}",
 49 |             target,
 50 |             self.bit_length,
 51 |             self.bit_length
 52 |         );
 53 | 
 54 |         self.table[target as usize]
 55 |     }
 56 | }
 57 | 
 58 | #[cfg(test)]
 59 | mod new_success_tests {
 60 |     // well-tested in popcount_success_tests
 61 | }
 62 | 
 63 | #[cfg(test)]
 64 | mod new_failure_tests {
 65 |     use super::PopcountTable;
 66 | 
 67 |     #[test]
 68 |     #[should_panic]
 69 |     fn new_0() {
 70 |         let _ = PopcountTable::new(0);
 71 |     }
 72 | 
 73 |     #[test]
 74 |     #[should_panic]
 75 |     fn new_65() {
 76 |         let _ = PopcountTable::new(65);
 77 |     }
 78 | }
 79 | 
 80 | #[cfg(test)]
 81 | mod popcount_success_tests {
 82 |     use super::PopcountTable;
 83 |     use std::ops::RangeInclusive;
 84 | 
 85 |     macro_rules! parameterized_tests {
 86 |         ($($name:ident: $value:expr,)*) => {
 87 |         $(
 88 |             #[test]
 89 |             fn $name() {
 90 |                 let bit_length = $value;
 91 |                 let tbl = PopcountTable::new(bit_length);
 92 | 
 93 |                 let range: RangeInclusive<u64> = 0..= ((1 << bit_length) - 1);
 94 |                 for target in range {
 95 |                     assert_eq!(tbl.popcount(target), target.count_ones() as u8);
 96 |                 }
 97 |             }
 98 |         )*
 99 |         }
100 |     }
101 | 
102 |     parameterized_tests! {
103 |         bit_length1: 1,
104 |         bit_length2: 2,
105 |         bit_length4: 4,
106 |         bit_length8: 8,
107 |         bit_length16: 16,
108 |         // wants to test 32, 64 but takes too long time
109 | 
110 |         bit_length15: 15,
111 |         bit_length17: 17,
112 |     }
113 | }
114 | 
115 | #[cfg(test)]
116 | mod popcount_failure_tests {
117 |     use super::PopcountTable;
118 | 
119 |     macro_rules! parameterized_tests {
120 |         ($($name:ident: $value:expr,)*) => {
121 |         $(
122 |             #[test]
123 |             #[should_panic]
124 |             fn $name() {
125 |                 let bit_length = $value;
126 |                 let tbl = PopcountTable::new(bit_length);
127 |                 let _ = tbl.popcount(1 << bit_length);
128 |             }
129 |         )*
130 |         }
131 |     }
132 | 
133 |     parameterized_tests! {
134 |         bit_length1: 1,
135 |         bit_length2: 2,
136 |         bit_length4: 4,
137 |         bit_length8: 8,
138 |         bit_length16: 16,
139 | 
140 |         bit_length15: 15,
141 |         bit_length17: 17,
142 |     }
143 | }
144 | 


--------------------------------------------------------------------------------
/tests/test.rs:
--------------------------------------------------------------------------------
  1 | use fid_rs::Fid;
  2 | 
  3 | #[test]
  4 | fn from_str() {
  5 |     let fid = Fid::from("01");
  6 |     assert_eq!(fid[0], false);
  7 |     assert_eq!(fid[1], true);
  8 | }
  9 | 
 10 | #[test]
 11 | fn fuzzing_test() {
 12 |     let samples = 10000;
 13 | 
 14 |     fn access_from_bit_string(s: &str, i: u64) -> bool {
 15 |         s.chars().collect::<Vec<char>>()[i as usize] == '1'
 16 |     }
 17 | 
 18 |     fn rank_from_bit_string(s: &str, i: u64) -> u64 {
 19 |         let chs = s.chars().collect::<Vec<char>>();
 20 |         let mut rank: u64 = 0;
 21 |         for j in 0..=i as usize {
 22 |             if chs[j] == '1' {
 23 |                 rank += 1
 24 |             };
 25 |         }
 26 |         rank
 27 |     }
 28 | 
 29 |     fn rank0_from_bit_string(s: &str, i: u64) -> u64 {
 30 |         let chs = s.chars().collect::<Vec<char>>();
 31 |         let mut rank0: u64 = 0;
 32 |         for j in 0..=i as usize {
 33 |             if chs[j] == '0' {
 34 |                 rank0 += 1
 35 |             };
 36 |         }
 37 |         rank0
 38 |     }
 39 | 
 40 |     fn select_from_bit_string(s: &str, num: u64) -> Option<u64> {
 41 |         if num == 0 {
 42 |             return Some(0);
 43 |         }
 44 | 
 45 |         let mut cnt: u64 = 0;
 46 |         for (i, ch) in s.chars().enumerate() {
 47 |             if ch == '1' {
 48 |                 cnt += 1;
 49 |             }
 50 |             if cnt == num {
 51 |                 return Some(i as u64);
 52 |             }
 53 |         }
 54 |         None
 55 |     }
 56 | 
 57 |     fn select0_from_bit_string(s: &str, num: u64) -> Option<u64> {
 58 |         if num == 0 {
 59 |             return Some(0);
 60 |         }
 61 | 
 62 |         let mut cnt: u64 = 0;
 63 |         for (i, ch) in s.chars().enumerate() {
 64 |             if ch == '0' {
 65 |                 cnt += 1;
 66 |             }
 67 |             if cnt == num {
 68 |                 return Some(i as u64);
 69 |             }
 70 |         }
 71 |         None
 72 |     }
 73 | 
 74 |     for _ in 0..samples {
 75 |         let s = &format!("{:b}", rand::random::<u128>());
 76 |         eprintln!("build(): bit vec = \"{}\"", s);
 77 | 
 78 |         let fid = Fid::from(s.as_str());
 79 | 
 80 |         for i in 0..s.len() {
 81 |             eprintln!("[] op: bit vec = \"{}\", i = {}, ", s, i);
 82 |             assert_eq!(
 83 |                 fid[i as u64],
 84 |                 access_from_bit_string(s, i as u64),
 85 |                 "bit vec = \"{}\", i={}, Index<u64>()={}, access_from_bit_string={}",
 86 |                 s,
 87 |                 i,
 88 |                 fid[i as u64],
 89 |                 access_from_bit_string(s, i as u64)
 90 |             );
 91 | 
 92 |             eprintln!("rank(): bit vec = \"{}\", i = {}, ", s, i);
 93 |             assert_eq!(
 94 |                 fid.rank(i as u64),
 95 |                 rank_from_bit_string(s, i as u64),
 96 |                 "bit vec = \"{}\", i={}, Fid::rank()={}, rank_from_bit_string={}",
 97 |                 s,
 98 |                 i,
 99 |                 fid.rank(i as u64),
100 |                 rank_from_bit_string(s, i as u64)
101 |             );
102 | 
103 |             let num = i as u64;
104 |             eprintln!("select(): bit vec = \"{}\", num = {}, ", s, num);
105 |             assert_eq!(
106 |                 fid.select(num),
107 |                 select_from_bit_string(s, num),
108 |                 "bit vec = \"{}\", num={}, Fid::select()={:?}, select_from_bit_string={:?}",
109 |                 s,
110 |                 num,
111 |                 fid.select(num),
112 |                 select_from_bit_string(s, num)
113 |             );
114 | 
115 |             eprintln!("rank0(): bit vec = \"{}\", i = {}, ", s, i);
116 |             assert_eq!(
117 |                 fid.rank0(i as u64),
118 |                 rank0_from_bit_string(s, i as u64),
119 |                 "bit vec = \"{}\", i={}, Fid::rank0()={}, rank0_from_bit_string={}",
120 |                 s,
121 |                 i,
122 |                 fid.rank0(i as u64),
123 |                 rank0_from_bit_string(s, i as u64)
124 |             );
125 | 
126 |             let num = i as u64;
127 |             eprintln!("select0(): bit vec = \"{}\", num = {}, ", s, num);
128 |             assert_eq!(
129 |                 fid.select0(num),
130 |                 select0_from_bit_string(s, num),
131 |                 "bit vec = \"{}\", num={}, Fid::select0()={:?}, select0_from_bit_string={:?}",
132 |                 s,
133 |                 num,
134 |                 fid.select0(num),
135 |                 select0_from_bit_string(s, num)
136 |             );
137 |         }
138 |     }
139 | }
140 | 


--------------------------------------------------------------------------------
/benches/bench.rs:
--------------------------------------------------------------------------------
  1 | #[macro_use]
  2 | extern crate criterion;
  3 | 
  4 | use criterion::Criterion;
  5 | use std::time::Duration;
  6 | 
  7 | fn c() -> Criterion {
  8 |     Criterion::default()
  9 |         .sample_size(10) // must be >= 10 for Criterion v0.3
 10 |         .warm_up_time(Duration::from_secs(1))
 11 |         .with_plots()
 12 | }
 13 | 
 14 | fn git_hash() -> String {
 15 |     use std::process::Command;
 16 |     let output = Command::new("git")
 17 |         .args(&["rev-parse", "--short", "HEAD"])
 18 |         .output()
 19 |         .unwrap();
 20 |     String::from(String::from_utf8(output.stdout).unwrap().trim())
 21 | }
 22 | 
 23 | mod fid {
 24 |     use criterion::{BatchSize, Criterion};
 25 |     use fid_rs::Fid;
 26 | 
 27 |     const NS: [u64; 5] = [1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20];
 28 | 
 29 |     pub fn from_str_benchmark(_: &mut Criterion) {
 30 |         super::c().bench_function_over_inputs(
 31 |             &format!(
 32 |                 "[{}] Fid::from(\"00...(repeated N-times)\")",
 33 |                 super::git_hash()
 34 |             ),
 35 |             |b, &&n| {
 36 |                 b.iter_batched(
 37 |                     || String::from_utf8(vec!['0' as u8; n as usize]).unwrap(),
 38 |                     |s| Fid::from(s.as_str()),
 39 |                     BatchSize::SmallInput,
 40 |                 )
 41 |             },
 42 |             &NS,
 43 |         );
 44 |     }
 45 | 
 46 |     pub fn from_slice_benchmark(_: &mut Criterion) {
 47 |         super::c().bench_function_over_inputs(
 48 |             &format!("[{}] Fid::from(&[false; N])", super::git_hash()),
 49 |             |b, &&n| {
 50 |                 b.iter_batched(
 51 |                     || vec![false; n as usize],
 52 |                     |v| Fid::from(&v[..]),
 53 |                     BatchSize::SmallInput,
 54 |                 )
 55 |             },
 56 |             &NS,
 57 |         );
 58 |     }
 59 | 
 60 |     pub fn rank_benchmark(_: &mut Criterion) {
 61 |         let times = 1_000_000;
 62 | 
 63 |         super::c().bench_function_over_inputs(
 64 |             &format!("[{}] Fid::rank(N) {} times", super::git_hash(), times),
 65 |             move |b, &&n| {
 66 |                 b.iter_batched(
 67 |                     || {
 68 |                         let v = vec![false; n as usize];
 69 |                         Fid::from(&v[..])
 70 |                     },
 71 |                     |fid| {
 72 |                         // iter_batched() does not properly time `routine` time when `setup` time is far longer than `routine` time.
 73 |                         // Tested function takes too short compared to build(). So loop many times.
 74 |                         for _ in 0..times {
 75 |                             assert_eq!(fid.rank(n - 1), 0);
 76 |                         }
 77 |                     },
 78 |                     BatchSize::SmallInput,
 79 |                 )
 80 |             },
 81 |             &NS,
 82 |         );
 83 |     }
 84 | 
 85 |     pub fn select_benchmark(_: &mut Criterion) {
 86 |         let times = 1_000;
 87 | 
 88 |         super::c().bench_function_over_inputs(
 89 |             &format!("[{}] Fid::select(N) {} times", super::git_hash(), times),
 90 |             move |b, &&n| {
 91 |                 b.iter_batched(
 92 |                     || {
 93 |                         let v = vec![true; n as usize];
 94 |                         Fid::from(&v[..])
 95 |                     },
 96 |                     |fid| {
 97 |                         // iter_batched() does not properly time `routine` time when `setup` time is far longer than `routine` time.
 98 |                         // Tested function takes too short compared to build(). So loop many times.
 99 |                         for _ in 0..times {
100 |                             assert_eq!(fid.select(n - 1), Some(n - 2));
101 |                         }
102 |                     },
103 |                     BatchSize::SmallInput,
104 |                 )
105 |             },
106 |             &NS,
107 |         );
108 |     }
109 | 
110 |     pub fn rank0_benchmark(_: &mut Criterion) {
111 |         let times = 1_000_000;
112 | 
113 |         super::c().bench_function_over_inputs(
114 |             &format!("[{}] Fid::rank0(N) {} times", super::git_hash(), times),
115 |             move |b, &&n| {
116 |                 b.iter_batched(
117 |                     || {
118 |                         let v = vec![false; n as usize];
119 |                         Fid::from(&v[..])
120 |                     },
121 |                     |fid| {
122 |                         // iter_batched() does not properly time `routine` time when `setup` time is far longer than `routine` time.
123 |                         // Tested function takes too short compared to build(). So loop many times.
124 |                         for _ in 0..times {
125 |                             assert_eq!(fid.rank0(n - 1), n);
126 |                         }
127 |                     },
128 |                     BatchSize::SmallInput,
129 |                 )
130 |             },
131 |             &NS,
132 |         );
133 |     }
134 | 
135 |     pub fn select0_benchmark(_: &mut Criterion) {
136 |         let times = 1_000;
137 | 
138 |         super::c().bench_function_over_inputs(
139 |             &format!("[{}] Fid::select0(N) {} times", super::git_hash(), times),
140 |             move |b, &&n| {
141 |                 b.iter_batched(
142 |                     || {
143 |                         let v = vec![false; n as usize];
144 |                         Fid::from(&v[..])
145 |                     },
146 |                     |fid| {
147 |                         // iter_batched() does not properly time `routine` time when `setup` time is far longer than `routine` time.
148 |                         // Tested function takes too short compared to build(). So loop many times.
149 |                         for _ in 0..times {
150 |                             assert_eq!(fid.select0(n - 1), Some(n - 2));
151 |                         }
152 |                     },
153 |                     BatchSize::SmallInput,
154 |                 )
155 |             },
156 |             &NS,
157 |         );
158 |     }
159 | }
160 | 
161 | criterion_group!(
162 |     benches,
163 |     fid::from_str_benchmark,
164 |     fid::from_slice_benchmark,
165 |     fid::rank_benchmark,
166 |     fid::select_benchmark,
167 |     fid::rank0_benchmark,
168 |     fid::select0_benchmark,
169 | );
170 | criterion_main!(benches);
171 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # fid-rs
  2 | 
  3 | High performance FID (Fully Indexable Dictionary) library.
  4 | 
  5 | [Master API Docs](https://laysakura.github.io/fid-rs/fid_rs/)
  6 | |
  7 | [Released API Docs](https://docs.rs/crate/fid-rs)
  8 | |
  9 | [Benchmark Results](https://laysakura.github.io/fid-rs/criterion/report/)
 10 | |
 11 | [Changelog](https://github.com/laysakura/fid-rs/blob/master/CHANGELOG.md)
 12 | 
 13 | [![GitHub Actions Status](https://github.com/laysakura/fid-rs/actions/workflows/clippy.yml/badge.svg)](https://github.com/laysakura/fid-rs/actions)
 14 | [![Travis Status](https://travis-ci.com/laysakura/fid-rs.svg?branch=master)](https://travis-ci.com/laysakura/fid-rs)
 15 | [![Crates.io Version](https://img.shields.io/crates/v/fid-rs.svg)](https://crates.io/crates/fid-rs)
 16 | [![Crates.io Downloads](https://img.shields.io/crates/d/fid-rs.svg)](https://crates.io/crates/fid-rs)
 17 | [![Minimum rustc version](https://img.shields.io/badge/rustc-1.33+-lightgray.svg)](https://github.com/laysakura/fid-rs#rust-version-supports)
 18 | [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/laysakura/fid-rs/blob/master/LICENSE-MIT)
 19 | [![License: Apache 2.0](https://img.shields.io/badge/license-Apache_2.0-blue.svg)](https://github.com/laysakura/fid-rs/blob/master/LICENSE-APACHE)
 20 | 
 21 | ## Quickstart
 22 | 
 23 | To use fid-rs, add the following to your `Cargo.toml` file:
 24 | 
 25 | ```toml
 26 | [dependencies]
 27 | fid-rs = "0.1"  # NOTE: Replace to latest minor version.
 28 | ```
 29 | 
 30 | ### Usage Overview
 31 | 
 32 | ```rust
 33 | use fid_rs::Fid;
 34 | 
 35 | let fid = Fid::from("0100_1");  // Tips: Fid::from::<&str>() ignores '_'.
 36 | 
 37 | // Basic operations ---------------------
 38 | assert_eq!(fid[0], false);  // [0]1001; 0th bit is '0' (false)
 39 | assert_eq!(fid[1], true);   // 0[1]001; 1st bit is '1' (true)
 40 | assert_eq!(fid[4], true);   // 0100[1]; 4th bit is '1' (true)
 41 | 
 42 | assert_eq!(fid.rank(0), 0);  // [0]1001; Range [0, 0] has no '1'
 43 | assert_eq!(fid.rank(3), 1);  // [0100]1; Range [0, 3] has 1 '1'
 44 | assert_eq!(fid.rank(4), 2);  // [01001]; Range [0, 4] has 2 '1's
 45 | 
 46 | assert_eq!(fid.select(0), Some(0)); // []01001; Minimum i where range [0, i] has 0 '1's is i=0
 47 | assert_eq!(fid.select(1), Some(1)); // 0[1]001; Minimum i where range [0, i] has 1 '1's is i=1
 48 | assert_eq!(fid.select(2), Some(4)); // 0100[1]; Minimum i where range [0, i] has 2 '1's is i=4
 49 | assert_eq!(fid.select(3), None);    // There is no i where range [0, i] has 3 '1's
 50 | 
 51 | // rank0, select0 -----------------------
 52 | assert_eq!(fid.rank0(0), 1);  // [0]1001; Range [0, 0] has no '0'
 53 | assert_eq!(fid.rank0(3), 3);  // [0100]1; Range [0, 3] has 3 '0's
 54 | assert_eq!(fid.rank0(4), 3);  // [01001]; Range [0, 4] has 3 '0's
 55 | 
 56 | assert_eq!(fid.select0(0), Some(0)); // []01001; Minimum i where range [0, i] has 0 '0's is i=0
 57 | assert_eq!(fid.select0(1), Some(0)); // [0]1001; Minimum i where range [0, i] has 1 '0's is i=0
 58 | assert_eq!(fid.select0(2), Some(2)); // 01[0]01; Minimum i where range [0, i] has 2 '0's is i=2
 59 | assert_eq!(fid.select0(4), None);    // There is no i where range [0, i] has 4 '0's
 60 | ```
 61 | 
 62 | ### Constructors
 63 | 
 64 | ```rust
 65 | use fid_rs::Fid;
 66 | 
 67 | // Most human-friendly way: Fid::from::<&str>()
 68 | let fid = Fid::from("0100_1");
 69 | 
 70 | // Complex construction in simple way: Fid::from::<&[bool]>()
 71 | let mut arr = [false; 5];
 72 | arr[1] = true;
 73 | arr[4] = true;
 74 | let fid = Fid::from(&arr[..]);
 75 | ```
 76 | 
 77 | ### Iterator
 78 | 
 79 | ```rust
 80 | use fid_rs::Fid;
 81 | 
 82 | let fid = Fid::from("0100_1");
 83 | 
 84 | for bit in fid.iter() {
 85 |     println!("{}", bit);
 86 | }
 87 | // =>
 88 | // false
 89 | // true
 90 | // false
 91 | // false
 92 | // true
 93 | ```
 94 | 
 95 | ### Utility Methods
 96 | 
 97 | ```rust
 98 | use fid_rs::Fid;
 99 | 
100 | let fid = Fid::from("0100_1");
101 | 
102 | assert_eq!(fid.len(), 5);
103 | ```
104 | 
105 | ## Features
106 | 
107 | - **Arbitrary length support with minimum working memory**: fid-rs provides virtually _arbitrary size_ of FID. It is carefully designed to use as small memory space as possible.
108 | - **Parallel build of FID**: Build operations (`Fid::from()`) takes _O(N)_ time. It is parallelized and achieves nearly optimal scale-out.
109 | - **No memory copy while/after build operations**: After internally creating bit vector representation, any operation does not do memory copy.
110 | - **Latest benchmark results are always accessible**: fid-rs is continuously benchmarked in Travis CI using [Criterion.rs](https://crates.io/crates/criterion). Graphical benchmark results are published [here](https://laysakura.github.io/fid-rs/criterion/report/).
111 | 
112 | ### Complexity
113 | 
114 | When the length of a `Fid` is _N_:
115 | 
116 | | Operation | Time-complexity | Space-complexity |
117 | |-----------|-----------------|------------------|
118 | | [Fid::from::<&str>()](https://laysakura.github.io/fid-rs/fid_rs/fid/struct.Fid.html#implementations) | _O(N)_ | _N + o(N)_ |
119 | | [Fid::from::<&[bool]>()](https://laysakura.github.io/fid-rs/fid_rs/fid/struct.Fid.html#implementations) | _O(N)_ | _N + o(N)_ |
120 | | [Index&lt;u64&gt;](https://laysakura.github.io/fid-rs/fid_rs/fid/struct.Fid.html#impl-Index<u64>) | _O(1)_ | _0_ |
121 | | [Fid::rank()](https://laysakura.github.io/fid-rs/fid_rs/fid/struct.Fid.html#method.rank) | _O(1)_ | _O(1)_ |
122 | | [Fid::rank0()](https://laysakura.github.io/fid-rs/fid_rs/fid/struct.Fid.html#method.rank0) | _O(1)_ | _O(1)_ |
123 | | [Fid::select()](https://laysakura.github.io/fid-rs/fid_rs/fid/struct.Fid.html#method.select) | _O(log N)_ | _O(1)_ |
124 | | [Fid::select0()](https://laysakura.github.io/fid-rs/fid_rs/fid/struct.Fid.html#method.select0) | _O(log N)_ | _O(1)_ |
125 | 
126 | (Actually, `select()`'s time-complexity can be _O(1)_ with complex implementation but fid-rs, like many other libraries, uses binary search of `rank()`'s result).
127 | 
128 | ## Versions
129 | fid-rs uses [semantic versioning](http://semver.org/spec/v2.0.0.html).
130 | 
131 | Since current major version is _0_, minor version update might involve breaking public API change (although it is carefully avoided).
132 | 
133 | ## Rust Version Supports
134 | 
135 | fid-rs is continuously tested with these Rust versions in Travis CI:
136 | 
137 | - 1.33.0
138 | - Latest stable version
139 | - Beta version
140 | - Nightly build
141 | 
142 | So it expectedly works with Rust 1.33.0 and any newer versions.
143 | 
144 | Older versions may also work, but are not tested or guaranteed.
145 | 
146 | ## Contributing
147 | 
148 | Any kind of pull requests are appreciated.
149 | 
150 | ### Guidelines
151 | 
152 | - `README.md` is generated from `$ cargo readme` command. Do not manually update `README.md` but edit `src/lib.rs` and then `$ cargo readme > README.md`.
153 | - Travis CI automatically does the following commit & push to your pull-requests:
154 |     - `$ cargo readme > README.md`
155 |     - `$ cargo fmt --all`
156 | 
157 | ## License
158 | 
159 | MIT OR Apache-2.0
160 | 


--------------------------------------------------------------------------------
/src/fid.rs:
--------------------------------------------------------------------------------
  1 | mod block;
  2 | mod blocks;
  3 | mod chunk;
  4 | mod chunks;
  5 | mod fid_impl;
  6 | mod fid_iter;
  7 | 
  8 | use super::internal_data_structure::popcount_table::PopcountTable;
  9 | 
 10 | #[cfg(feature = "serde")]
 11 | use serde::{Deserialize, Serialize};
 12 | 
 13 | #[cfg(feature = "mem_dbg")]
 14 | use mem_dbg::{MemDbg, MemSize};
 15 | 
 16 | /// FID (Fully Indexable Dictionary).
 17 | ///
 18 | /// This class can handle bit sequence of virtually **arbitrary length.**
 19 | ///
 20 | /// In fact, _N_ (FID's length) is designed to be limited to: _N <= 2^64_.<br>
 21 | /// It should be enough for almost all usecases since a binary data of length of _2^64_ consumes _2^21 = 2,097,152_ TB (terabyte), which is hard to handle by state-of-the-art computer architecture.
 22 | ///
 23 | /// # Implementation detail
 24 | /// [Index&lt;u64&gt;](#impl-Index<u64>)'s implementation is trivial.
 25 | ///
 26 | /// [select()](#method.select) just uses binary search of `rank()` results.
 27 | ///
 28 | /// [rank()](#method.rank)'s implementation is standard but non-trivial.
 29 | /// So here explains implementation of _rank()_.
 30 | ///
 31 | /// ## [rank()](#method.rank)'s implementation
 32 | /// Say you have the following bit vector.
 33 | ///
 34 | /// ```text
 35 | /// 00001000 01000001 00000100 11000000 00100000 00000101 10100000 00010000 001 ; (N=67)
 36 | /// ```
 37 | ///
 38 | /// Answer _rank(48)_ in _O(1)_ time-complexity and _o(N)_ space-complexity.
 39 | ///
 40 | /// Naively, you can count the number of '1' from left to right.
 41 | /// You will find _rank(48) == 10_ but it took _O(N)_ time-complexity.
 42 | ///
 43 | /// To reduce time-complexity to _O(1)_, you can use _memonization_ technique.<br>
 44 | /// Of course, you can memonize results of _rank(i)_ for every _i ([0, N-1])_.
 45 | ///
 46 | /// ```text
 47 | /// Bit vector;   0  0  0  0  1  0  0  0  0  1  0  0  0  0  0  1  0  0  0  0  0  1  0  0  1  1  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  1  0  1  [1]  0  1  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  1 ; (N=67)
 48 | /// Memo rank(i); 0  0  0  0  1  1  1  1  1  2  2  2  2  2  2  3  3  3  3  3  3  4  4  4  5  6  6  6  6  6  6  6  6  6  7  7  7  7  7  7  7  7  7  7  7  8  8  9  10  10 11 11 11 11 11 11 11 11 11 12 12 12 12 12 12 12 13
 49 | /// ```
 50 | ///
 51 | /// From this memo, you can answer _rank(48) == 10_ in constant time, although space-complexity for this memo is _O(N) > o(N)_.
 52 | ///
 53 | /// To reduce space-complexity using memonization, we divide the bit vector into **Chunk** and **Block**.
 54 | ///
 55 | /// ```text
 56 | /// Bit vector; 00001000 01000001 00000100 11000000 00100000 00000101 [1]0100000 00010000 001  ; (N=67)
 57 | /// Chunk;     |                  7                    |                13                  |  ; (size = (log N)^2 = 36)
 58 | /// Block;     |0 |1 |1  |2 |2 |3  |3 |4 |6  |6 |6  |7 |0 |0  |0 |2 |4    |4 |4  |5 |5 |5  |6| ; (size = (log N) / 2 = 3)
 59 | /// ```
 60 | ///
 61 | /// - A **Chunk** has size of _(log N)^2_. Its value is _rank(<u>index of the last bit of the chunk</u>)_.
 62 | /// - A **Block** has size of _(log N) / 2_. A chunk has many blocks. Block's value is the number of '1's in _[<u>index of the first bit of the chunk the block belongs to</u>, <u>index of the last bit of the block</u>]_ (note that the value is reset to 0 at the first bit of a chunk).
 63 | ///
 64 | /// Now you want to answer _rank(48)_. 48-th bit is in the 2nd chunk, and in the 5th block in the chunk.<br>
 65 | /// So the _rank(48)_ is at least:
 66 | ///
 67 | ///   _<u>7 (value of 1st chunk)</u> + <u>2 (value of 4th block in the 2nd chunk)</u>_
 68 | ///
 69 | /// Then, focus on 3 bits in 5th block in the 2nd chunk; `[1]01`.<br>
 70 | /// As you can see, only 1 '1' is included up to 48-th bit (`101` has 2 '1's but 2nd '1' is 50-th bit, irrelevant to _rank(48)_).
 71 | ///
 72 | /// Therefore, the _rank(48)_ is calculated as:
 73 | ///
 74 | ///   _<u>7 (value of 1st chunk)</u> + <u>2 (value of 4th block in the 2nd chunk)</u> + <u>1 ('1's in 5th block up to 48-th bit)</u>_
 75 | ///
 76 | /// OK. That's all... Wait!<br>
 77 | /// _rank()_ must be in _O(1)_ time-complexity.
 78 | ///
 79 | /// - _<u>7 (value of 1st chunk)</u>_: _O(1)_ if you store chunk value in array structure.
 80 | /// - _<u>2 (value of 4th block in the 2nd chunk)</u>_: Same as above.
 81 | /// - _<u>1 ('1's in 5th block up to 48-th bit)</u>_: **_O(<u>length of block</u>) = O(log N)_** !
 82 | ///
 83 | /// Counting '1's in a block must also be _O(1)_, while using _o(N)_ space.<br>
 84 | /// We use **Table** for this purpose.
 85 | ///
 86 | /// | Block content | Number of '1's in block |
 87 | /// |---------------|-------------------------|
 88 | /// | `000`         | 0                       |
 89 | /// | `001`         | 1                       |
 90 | /// | `010`         | 1                       |
 91 | /// | `011`         | 2                       |
 92 | /// | `100`         | 1                       |
 93 | /// | `101`         | 2                       |
 94 | /// | `110`         | 2                       |
 95 | /// | `111`         | 3                       |
 96 | ///
 97 | /// This table is constructed in `build()`. So we can find the number of '1's in block in _O(1)_ time.<br>
 98 | /// Note that this table has _O(log N) = o(N)_ length.
 99 | ///
100 | /// In summary:
101 | ///
102 | ///   _rank() = (value of left chunk) + (value of left block) + (value of table keyed by inner block bits)_.
103 | #[derive(Clone, Debug)]
104 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
105 | #[cfg_attr(feature = "mem_dbg", derive(MemDbg, MemSize))]
106 | pub struct Fid {
107 |     /// Raw data.
108 |     byte_vec: Vec<u8>,
109 | 
110 |     /// Bit length
111 |     bit_len: u64,
112 | 
113 |     /// Total popcount of _[0, <u>last bit of the chunk</u>]_.
114 |     ///
115 |     /// Each chunk takes _2^64_ at max (when every bit is '1' for bit vector of length of _2^64_).
116 |     /// A chunk has blocks.
117 |     chunks: Chunks,
118 | 
119 |     /// Table to calculate inner-block `rank()` in _O(1)_.
120 |     table: PopcountTable,
121 | }
122 | 
123 | pub struct FidIter<'iter> {
124 |     fid: &'iter Fid,
125 |     i: u64,
126 | }
127 | 
128 | /// Collection of Chunk.
129 | #[derive(Clone, Debug)]
130 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
131 | #[cfg_attr(feature = "mem_dbg", derive(MemDbg, MemSize))]
132 | struct Chunks {
133 |     chunks: Vec<Chunk>,
134 |     chunks_cnt: u64,
135 | }
136 | 
137 | /// Total popcount of _[0, <u>last bit of the chunk</u>]_ of a bit vector.
138 | ///
139 | /// Each chunk takes _2^64_ at max (when every bit is '1' for Fid of length of _2^64_).
140 | #[derive(Clone, Debug)]
141 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
142 | #[cfg_attr(feature = "mem_dbg", derive(MemDbg, MemSize))]
143 | struct Chunk {
144 |     value: u64, // popcount
145 |     blocks: Blocks,
146 | }
147 | 
148 | /// Collection of Block in a Chunk.
149 | #[derive(Clone, Debug)]
150 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
151 | #[cfg_attr(feature = "mem_dbg", derive(MemDbg, MemSize))]
152 | struct Blocks {
153 |     blocks: Vec<Block>,
154 |     blocks_cnt: u16,
155 | }
156 | 
157 | /// Total popcount of _[_first bit of the chunk which the block belongs to_, _last bit of the block_]_ of a bit vector.
158 | ///
159 | /// Each block takes (log 2^64)^2 = 64^2 = 2^16 at max (when every bit in a chunk is 1 for Fid of length of 2^64)
160 | #[derive(Clone, Debug)]
161 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
162 | #[cfg_attr(feature = "mem_dbg", derive(MemDbg, MemSize))]
163 | struct Block {
164 |     value: u16, // popcount
165 |     length: u8,
166 | }
167 | 


--------------------------------------------------------------------------------
/src/fid/chunks.rs:
--------------------------------------------------------------------------------
  1 | #[cfg(feature = "rayon")]
  2 | use rayon::prelude::*;
  3 | 
  4 | use super::{Chunk, Chunks};
  5 | use crate::internal_data_structure::raw_bit_vector::RawBitVector;
  6 | 
  7 | impl super::Chunks {
  8 |     /// Constructor.
  9 |     #[cfg(feature = "rayon")]
 10 |     pub fn new(rbv: &RawBitVector) -> Chunks {
 11 |         let n = rbv.len();
 12 |         let chunk_size: u16 = Chunks::calc_chunk_size(n);
 13 |         let chunks_cnt: usize = Chunks::calc_chunks_cnt(n) as usize;
 14 | 
 15 |         // In order to use chunks.par_iter_mut(), chunks should have len first.
 16 |         // So fill meaning less None value.
 17 |         let mut chunks: Vec<Chunk> = Vec::with_capacity(chunks_cnt);
 18 | 
 19 |         // Parallel - Each chunk has its popcount.
 20 |         //     Actually, chunk should have total popcount from index 0 but it is calculated later in sequential manner.
 21 |         (0..chunks_cnt)
 22 |             .into_par_iter()
 23 |             .map(|number_of_chunk| {
 24 |                 let this_chunk_size: u16 = if number_of_chunk == chunks_cnt - 1 {
 25 |                     // When `chunk_size == 6`:
 26 |                     //
 27 |                     //  000 111 000 11   : rbv
 28 |                     // |       |      |  : chunks
 29 |                     //
 30 |                     // Here, when `i_chunk == 1` (targeting on last '00011' chunk),
 31 |                     // `this_chunk_size == 5`
 32 |                     let chunk_size_or_0 = (n % chunk_size as u64) as u16;
 33 |                     if chunk_size_or_0 == 0 {
 34 |                         chunk_size
 35 |                     } else {
 36 |                         chunk_size_or_0
 37 |                     }
 38 |                 } else {
 39 |                     chunk_size
 40 |                 };
 41 | 
 42 |                 let chunk_rbv = rbv.clone_sub(
 43 |                     number_of_chunk as u64 * chunk_size as u64,
 44 |                     this_chunk_size as u64,
 45 |                 );
 46 | 
 47 |                 let popcnt_in_chunk = chunk_rbv.popcount();
 48 |                 Chunk::new(
 49 |                     popcnt_in_chunk,
 50 |                     this_chunk_size,
 51 |                     rbv,
 52 |                     number_of_chunk as u64,
 53 |                 )
 54 |             })
 55 |             .collect_into_vec(&mut chunks);
 56 | 
 57 |         // Sequential - Each chunk has total popcount from index 0.
 58 |         for i_chunk in 0..chunks_cnt {
 59 |             chunks[i_chunk].value += if i_chunk == 0 {
 60 |                 0
 61 |             } else {
 62 |                 chunks[i_chunk - 1].value
 63 |             }
 64 |         }
 65 |         Chunks {
 66 |             chunks,
 67 |             chunks_cnt: chunks_cnt as u64,
 68 |         }
 69 |     }
 70 | 
 71 |     /// Constructor.
 72 |     #[cfg(not(feature = "rayon"))]
 73 |     pub fn new(rbv: &RawBitVector) -> Chunks {
 74 |         let n = rbv.len();
 75 |         let chunk_size: u16 = Chunks::calc_chunk_size(n);
 76 |         let chunks_cnt: u64 = Chunks::calc_chunks_cnt(n);
 77 | 
 78 |         let mut chunks: Vec<Chunk> = Vec::with_capacity(chunks_cnt as usize);
 79 |         let mut comulative_popcount = 0;
 80 | 
 81 |         for i_chunk in 0..chunks_cnt {
 82 |             let this_chunk_size: u16 = if i_chunk == chunks_cnt - 1 {
 83 |                 // When `chunk_size == 6`:
 84 |                 //
 85 |                 //  000 111 000 11   : rbv
 86 |                 // |       |      |  : chunks
 87 |                 //
 88 |                 // Here, when `i_chunk == 1` (targeting on last '00011' chunk),
 89 |                 // `this_chunk_size == 5`
 90 |                 let chunk_size_or_0 = (n % chunk_size as u64) as u16;
 91 |                 if chunk_size_or_0 == 0 {
 92 |                     chunk_size
 93 |                 } else {
 94 |                     chunk_size_or_0
 95 |                 }
 96 |             } else {
 97 |                 chunk_size
 98 |             };
 99 | 
100 |             let chunk_rbv = rbv.clone_sub(i_chunk * chunk_size as u64, this_chunk_size as u64);
101 | 
102 |             let popcnt_in_chunk = chunk_rbv.popcount();
103 |             comulative_popcount += popcnt_in_chunk;
104 |             chunks.push(Chunk::new(
105 |                 comulative_popcount,
106 |                 this_chunk_size,
107 |                 rbv,
108 |                 i_chunk,
109 |             ));
110 |         }
111 |         
112 |         Chunks { chunks, chunks_cnt }
113 |     }
114 | 
115 |     /// Returns size of 1 chunk: _(log N)^2_.
116 |     pub fn calc_chunk_size(n: u64) -> u16 {
117 |         let lg2 = (n as f64).log2() as u16;
118 |         let sz = lg2 * lg2;
119 |         if sz == 0 {
120 |             1
121 |         } else {
122 |             sz
123 |         }
124 |     }
125 | 
126 |     /// Returns count of chunks: _N / (log N)^2_.
127 |     ///
128 |     /// At max: N / (log N)^2 = 2^64 / 64^2 = 2^(64-12)
129 |     pub fn calc_chunks_cnt(n: u64) -> u64 {
130 |         let chunk_size = Chunks::calc_chunk_size(n);
131 |         n / (chunk_size as u64) + if n % (chunk_size as u64) == 0 { 0 } else { 1 }
132 |     }
133 | 
134 |     /// Returns i-th chunk.
135 |     ///
136 |     /// # Panics
137 |     /// When _`i` >= `self.chunks_cnt()`_.
138 |     pub fn access(&self, i: u64) -> &Chunk {
139 |         assert!(
140 |             i <= self.chunks_cnt,
141 |             "i = {} must be smaller then {} (self.chunks_cnt())",
142 |             i,
143 |             self.chunks_cnt
144 |         );
145 |         &self.chunks[i as usize]
146 |     }
147 | }
148 | 
149 | #[cfg(test)]
150 | mod new_success_tests {
151 |     use super::Chunks;
152 |     use crate::internal_data_structure::raw_bit_vector::RawBitVector;
153 | 
154 |     struct Input<'a> {
155 |         byte_slice: &'a [u8],
156 |         last_byte_len: u8,
157 |         expected_chunk_size: u16,
158 |         expected_chunks: &'a Vec<u64>,
159 |     }
160 | 
161 |     macro_rules! parameterized_tests {
162 |         ($($name:ident: $value:expr,)*) => {
163 |         $(
164 |             #[test]
165 |             fn $name() {
166 |                 let input: Input = $value;
167 |                 let rbv = RawBitVector::new(input.byte_slice, 0, input.last_byte_len);
168 |                 let n = rbv.len();
169 |                 let chunks = Chunks::new(&rbv);
170 | 
171 |                 assert_eq!(Chunks::calc_chunk_size(n), input.expected_chunk_size);
172 |                 assert_eq!(Chunks::calc_chunks_cnt(n), input.expected_chunks.len() as u64);
173 |                 for (i, expected_chunk) in input.expected_chunks.iter().enumerate() {
174 |                     let chunk = chunks.access(i as u64);
175 |                     assert_eq!(chunk.value(), *expected_chunk);
176 |                 }
177 |             }
178 |         )*
179 |         }
180 |     }
181 | 
182 |     parameterized_tests! {
183 |         t1: Input {
184 |             // N = 1, (log_2(N))^2 = 1
185 |             byte_slice: &[0b0000_0000],
186 |             last_byte_len: 1,
187 |             expected_chunk_size: 1,
188 |             expected_chunks: &vec!(0)
189 |         },
190 |         t2: Input {
191 |             // N = 1, (log_2(N))^2 = 1
192 |             byte_slice: &[0b1000_0000],
193 |             last_byte_len: 1,
194 |             expected_chunk_size: 1,
195 |             expected_chunks: &vec!(1)
196 |         },
197 |         t3: Input {
198 |             // N = 2^2, (log_2(N))^2 = 4
199 |             byte_slice: &[0b0111_0000],
200 |             last_byte_len: 4,
201 |             expected_chunk_size: 4,
202 |             expected_chunks: &vec!(3)
203 |         },
204 |         t4: Input {
205 |             // N = 2^3, (log_2(N))^2 = 9
206 |             byte_slice: &[0b0111_1101],
207 |             last_byte_len: 8,
208 |             expected_chunk_size: 9,
209 |             expected_chunks: &vec!(6)
210 |         },
211 |         t5: Input {
212 |              // N = 2^3 + 1, (log_2(N))^2 = 9
213 |             byte_slice: &[0b0111_1101, 0b1000_0000],
214 |             last_byte_len: 1,
215 |             expected_chunk_size: 9,
216 |             expected_chunks: &vec!(7)
217 |         },
218 |         t6: Input {
219 |             // N = 2^3 + 2, (log_2(N))^2 = 9
220 |             byte_slice: &[0b0111_1101, 0b1100_0000],
221 |             last_byte_len: 2,
222 |             expected_chunk_size: 9,
223 |             expected_chunks: &vec!(7, 8)
224 |         },
225 | 
226 |         bugfix_11: Input {
227 |             // N = 2^1, (log_2(N))^2 = 4
228 |             byte_slice: &[0b1100_0000],
229 |             last_byte_len: 2,
230 |             expected_chunk_size: 1,
231 |             expected_chunks: &vec!(1, 2)
232 |         },
233 |         bugfix_11110110_11010101_01000101_11101111_10101011_10100101_01100011_00110100_01010101_10010000_01001100_10111111_00110011_00111110_01110101_11011100: Input {
234 |             // N = 8 * 16 = 2^7, (log_2(N))^2 = 49
235 |             byte_slice: &[0b11110110, 0b11010101, 0b01000101, 0b11101111, 0b10101011, 0b10100101, 0b0_1100011, 0b00110100, 0b01010101, 0b10010000, 0b01001100, 0b10111111, 0b00_110011, 0b00111110, 0b01110101, 0b11011100],
236 |             last_byte_len: 8,
237 |             expected_chunk_size: 49,
238 |             expected_chunks: &vec!(30, 53, 72)
239 |         },
240 |     }
241 | }
242 | 


--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
  1 |                               Apache License
  2 |                         Version 2.0, January 2004
  3 |                      http://www.apache.org/licenses/
  4 | 
  5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 | 1. Definitions.
  8 | 
  9 |    "License" shall mean the terms and conditions for use, reproduction,
 10 |    and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |    "Licensor" shall mean the copyright owner or entity authorized by
 13 |    the copyright owner that is granting the License.
 14 | 
 15 |    "Legal Entity" shall mean the union of the acting entity and all
 16 |    other entities that control, are controlled by, or are under common
 17 |    control with that entity. For the purposes of this definition,
 18 |    "control" means (i) the power, direct or indirect, to cause the
 19 |    direction or management of such entity, whether by contract or
 20 |    otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |    outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |    "You" (or "Your") shall mean an individual or Legal Entity
 24 |    exercising permissions granted by this License.
 25 | 
 26 |    "Source" form shall mean the preferred form for making modifications,
 27 |    including but not limited to software source code, documentation
 28 |    source, and configuration files.
 29 | 
 30 |    "Object" form shall mean any form resulting from mechanical
 31 |    transformation or translation of a Source form, including but
 32 |    not limited to compiled object code, generated documentation,
 33 |    and conversions to other media types.
 34 | 
 35 |    "Work" shall mean the work of authorship, whether in Source or
 36 |    Object form, made available under the License, as indicated by a
 37 |    copyright notice that is included in or attached to the work
 38 |    (an example is provided in the Appendix below).
 39 | 
 40 |    "Derivative Works" shall mean any work, whether in Source or Object
 41 |    form, that is based on (or derived from) the Work and for which the
 42 |    editorial revisions, annotations, elaborations, or other modifications
 43 |    represent, as a whole, an original work of authorship. For the purposes
 44 |    of this License, Derivative Works shall not include works that remain
 45 |    separable from, or merely link (or bind by name) to the interfaces of,
 46 |    the Work and Derivative Works thereof.
 47 | 
 48 |    "Contribution" shall mean any work of authorship, including
 49 |    the original version of the Work and any modifications or additions
 50 |    to that Work or Derivative Works thereof, that is intentionally
 51 |    submitted to Licensor for inclusion in the Work by the copyright owner
 52 |    or by an individual or Legal Entity authorized to submit on behalf of
 53 |    the copyright owner. For the purposes of this definition, "submitted"
 54 |    means any form of electronic, verbal, or written communication sent
 55 |    to the Licensor or its representatives, including but not limited to
 56 |    communication on electronic mailing lists, source code control systems,
 57 |    and issue tracking systems that are managed by, or on behalf of, the
 58 |    Licensor for the purpose of discussing and improving the Work, but
 59 |    excluding communication that is conspicuously marked or otherwise
 60 |    designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |    "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |    on behalf of whom a Contribution has been received by Licensor and
 64 |    subsequently incorporated within the Work.
 65 | 
 66 | 2. Grant of Copyright License. Subject to the terms and conditions of
 67 |    this License, each Contributor hereby grants to You a perpetual,
 68 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |    copyright license to reproduce, prepare Derivative Works of,
 70 |    publicly display, publicly perform, sublicense, and distribute the
 71 |    Work and such Derivative Works in Source or Object form.
 72 | 
 73 | 3. Grant of Patent License. Subject to the terms and conditions of
 74 |    this License, each Contributor hereby grants to You a perpetual,
 75 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |    (except as stated in this section) patent license to make, have made,
 77 |    use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |    where such license applies only to those patent claims licensable
 79 |    by such Contributor that are necessarily infringed by their
 80 |    Contribution(s) alone or by combination of their Contribution(s)
 81 |    with the Work to which such Contribution(s) was submitted. If You
 82 |    institute patent litigation against any entity (including a
 83 |    cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |    or a Contribution incorporated within the Work constitutes direct
 85 |    or contributory patent infringement, then any patent licenses
 86 |    granted to You under this License for that Work shall terminate
 87 |    as of the date such litigation is filed.
 88 | 
 89 | 4. Redistribution. You may reproduce and distribute copies of the
 90 |    Work or Derivative Works thereof in any medium, with or without
 91 |    modifications, and in Source or Object form, provided that You
 92 |    meet the following conditions:
 93 | 
 94 |    (a) You must give any other recipients of the Work or
 95 |        Derivative Works a copy of this License; and
 96 | 
 97 |    (b) You must cause any modified files to carry prominent notices
 98 |        stating that You changed the files; and
 99 | 
100 |    (c) You must retain, in the Source form of any Derivative Works
101 |        that You distribute, all copyright, patent, trademark, and
102 |        attribution notices from the Source form of the Work,
103 |        excluding those notices that do not pertain to any part of
104 |        the Derivative Works; and
105 | 
106 |    (d) If the Work includes a "NOTICE" text file as part of its
107 |        distribution, then any Derivative Works that You distribute must
108 |        include a readable copy of the attribution notices contained
109 |        within such NOTICE file, excluding those notices that do not
110 |        pertain to any part of the Derivative Works, in at least one
111 |        of the following places: within a NOTICE text file distributed
112 |        as part of the Derivative Works; within the Source form or
113 |        documentation, if provided along with the Derivative Works; or,
114 |        within a display generated by the Derivative Works, if and
115 |        wherever such third-party notices normally appear. The contents
116 |        of the NOTICE file are for informational purposes only and
117 |        do not modify the License. You may add Your own attribution
118 |        notices within Derivative Works that You distribute, alongside
119 |        or as an addendum to the NOTICE text from the Work, provided
120 |        that such additional attribution notices cannot be construed
121 |        as modifying the License.
122 | 
123 |    You may add Your own copyright statement to Your modifications and
124 |    may provide additional or different license terms and conditions
125 |    for use, reproduction, or distribution of Your modifications, or
126 |    for any such Derivative Works as a whole, provided Your use,
127 |    reproduction, and distribution of the Work otherwise complies with
128 |    the conditions stated in this License.
129 | 
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 |    any Contribution intentionally submitted for inclusion in the Work
132 |    by You to the Licensor shall be under the terms and conditions of
133 |    this License, without any additional terms or conditions.
134 |    Notwithstanding the above, nothing herein shall supersede or modify
135 |    the terms of any separate license agreement you may have executed
136 |    with Licensor regarding such Contributions.
137 | 
138 | 6. Trademarks. This License does not grant permission to use the trade
139 |    names, trademarks, service marks, or product names of the Licensor,
140 |    except as required for reasonable and customary use in describing the
141 |    origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 |    agreed to in writing, Licensor provides the Work (and each
145 |    Contributor provides its Contributions) on an "AS IS" BASIS,
146 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |    implied, including, without limitation, any warranties or conditions
148 |    of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |    PARTICULAR PURPOSE. You are solely responsible for determining the
150 |    appropriateness of using or redistributing the Work and assume any
151 |    risks associated with Your exercise of permissions under this License.
152 | 
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 |    whether in tort (including negligence), contract, or otherwise,
155 |    unless required by applicable law (such as deliberate and grossly
156 |    negligent acts) or agreed to in writing, shall any Contributor be
157 |    liable to You for damages, including any direct, indirect, special,
158 |    incidental, or consequential damages of any character arising as a
159 |    result of this License or out of the use or inability to use the
160 |    Work (including but not limited to damages for loss of goodwill,
161 |    work stoppage, computer failure or malfunction, or any and all
162 |    other commercial damages or losses), even if such Contributor
163 |    has been advised of the possibility of such damages.
164 | 
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 |    the Work or Derivative Works thereof, You may choose to offer,
167 |    and charge a fee for, acceptance of support, warranty, indemnity,
168 |    or other liability obligations and/or rights consistent with this
169 |    License. However, in accepting such obligations, You may act only
170 |    on Your own behalf and on Your sole responsibility, not on behalf
171 |    of any other Contributor, and only if You agree to indemnify,
172 |    defend, and hold each Contributor harmless for any liability
173 |    incurred by, or claims asserted against, such Contributor by reason
174 |    of your accepting any such warranty or additional liability.
175 | 
176 | END OF TERMS AND CONDITIONS
177 | 
178 | APPENDIX: How to apply the Apache License to your work.
179 | 
180 |    To apply the Apache License to your work, attach the following
181 |    boilerplate notice, with the fields enclosed by brackets "[]"
182 |    replaced with your own identifying information. (Don't include
183 |    the brackets!)  The text should be enclosed in the appropriate
184 |    comment syntax for the file format. We also recommend that a
185 |    file or class name and description of purpose be included on the
186 |    same "printed page" as the copyright notice for easier
187 |    identification within third-party archives.
188 | 
189 | Copyright 2019 Sho Nakatani
190 | 
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 | 
195 | 	http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 | 


--------------------------------------------------------------------------------
/src/fid/fid_impl.rs:
--------------------------------------------------------------------------------
  1 | use super::{Blocks, Chunks, Fid};
  2 | use crate::internal_data_structure::popcount_table::PopcountTable;
  3 | use crate::internal_data_structure::raw_bit_vector::RawBitVector;
  4 | use std::ops::Index;
  5 | 
  6 | impl From<&str> for Fid {
  7 |     /// Constructor from string representation of bit sequence.
  8 |     ///
  9 |     /// - '0' is interpreted as _0_.
 10 |     /// - '1' is interpreted as _1_.
 11 |     /// - '_' is just ignored.
 12 |     ///
 13 |     /// # Examples
 14 |     /// ```
 15 |     /// use fid_rs::Fid;
 16 |     ///
 17 |     /// let fid = Fid::from("01_11");
 18 |     /// assert_eq!(fid[0], false);
 19 |     /// assert_eq!(fid[1], true);
 20 |     /// assert_eq!(fid[2], true);
 21 |     /// assert_eq!(fid[3], true);
 22 |     /// ```
 23 |     ///
 24 |     /// # Panics
 25 |     /// When:
 26 |     /// - `s` contains any character other than '0', '1', and '_'.
 27 |     /// - `s` does not contain any '0' or '1'
 28 |     fn from(s: &str) -> Self {
 29 |         let bits: Vec<bool> = s
 30 |             .as_bytes()
 31 |             .iter()
 32 |             .filter_map(|c| match c {
 33 |                 48 /* '0' */ => Some(false),
 34 |                 49 /* '1' */ => Some(true),
 35 |                 95 /* '_' */ => None,
 36 |                 _ => panic!("`s` must consist of '0' or '1'. '{}' included.", c),
 37 |             })
 38 |             .collect();
 39 |         Self::from(&bits[..])
 40 |     }
 41 | }
 42 | 
 43 | impl From<&[bool]> for Fid {
 44 |     /// Constructor from slice of boolean.
 45 |     ///
 46 |     /// # Examples
 47 |     /// ```
 48 |     /// use fid_rs::Fid;
 49 |     ///
 50 |     /// let bits = [false, true, true, true];
 51 |     /// let fid = Fid::from(&bits[..]);
 52 |     /// assert_eq!(fid[0], false);
 53 |     /// assert_eq!(fid[1], true);
 54 |     /// assert_eq!(fid[2], true);
 55 |     /// assert_eq!(fid[3], true);
 56 |     /// ```
 57 |     ///
 58 |     /// # Panics
 59 |     /// When:
 60 |     /// - `bits` is empty.
 61 |     fn from(bits: &[bool]) -> Self {
 62 |         assert!(!bits.is_empty());
 63 | 
 64 |         let mut byte_vec: Vec<u8> = Vec::with_capacity(bits.len() / 8 + 1);
 65 |         let mut last_byte_len = 0u8;
 66 | 
 67 |         for bits8 in bits.chunks(8) {
 68 |             last_byte_len = bits8.len() as u8; // although this bits8 might not be a last byte.
 69 | 
 70 |             let byte = (0..last_byte_len).fold(0, |byte, i| {
 71 |                 byte + if bits8[i as usize] { 1 << (7 - i) } else { 0 }
 72 |             });
 73 |             byte_vec.push(byte);
 74 |         }
 75 | 
 76 |         Fid::build(byte_vec, last_byte_len)
 77 |     }
 78 | }
 79 | 
 80 | static TRUE: bool = true;
 81 | static FALSE: bool = false;
 82 | 
 83 | impl Index<u64> for Fid {
 84 |     type Output = bool;
 85 | 
 86 |     /// Returns `i`-th element of the `Fid`.
 87 |     ///
 88 |     /// # Panics
 89 |     /// When _`i` >= length of the `Fid`_.
 90 |     fn index(&self, index: u64) -> &Self::Output {
 91 |         if self.rbv().access(index) {
 92 |             &TRUE
 93 |         } else {
 94 |             &FALSE
 95 |         }
 96 |     }
 97 | }
 98 | 
 99 | impl Fid {
100 |     /// Build FID from byte vector.
101 |     fn build(byte_vec: Vec<u8>, last_byte_len: u8) -> Self {
102 |         let bit_len = (byte_vec.len() - 1) as u64 * 8 + last_byte_len as u64;
103 |         let rbv = RawBitVector::new(&byte_vec[..], 0, last_byte_len);
104 |         let chunks = Chunks::new(&rbv);
105 |         let table = PopcountTable::new(Blocks::calc_block_size(rbv.len()));
106 |         Self {
107 |             byte_vec,
108 |             bit_len,
109 |             chunks,
110 |             table,
111 |         }
112 |     }
113 | 
114 |     /// Returns the number of _1_ in _[0, `i`]_ elements of the `Fid`.
115 |     ///
116 |     /// # Panics
117 |     /// When _`i` >= length of the `Fid`_.
118 |     ///
119 |     /// # Implementation detail
120 |     ///
121 |     /// ```text
122 |     ///  00001000 01000001 00000100 11000000 00100000 00000101 00100000 00010000 001  Raw data (N=67)
123 |     ///                                                           ^
124 |     ///                                                           i = 51
125 |     /// |                  7                    |                13                |  Chunk (size = (log N)^2 = 36)
126 |     ///                                         ^
127 |     ///                chunk_left            i_chunk = 1      chunk_right
128 |     ///
129 |     /// |0 |1 |1  |2 |2 |3  |3 |4 |6  |6 |6  |7 |0 |0  |0 |2 |3 |3 |4  |4 |4 |5  |5|  Block (size = log N / 2 = 3)
130 |     ///                                                         ^
131 |     ///                                                      i_block = 17
132 |     ///                                              block_left | block_right
133 |     /// ```
134 |     ///
135 |     /// 1. Find `i_chunk`. _`i_chunk` = `i` / `chunk_size`_.
136 |     /// 2. Get _`chunk_left` = Chunks[`i_chunk` - 1]_ only if _`i_chunk` > 0_.
137 |     /// 3. Get _rank from chunk_left_ if `chunk_left` exists.
138 |     /// 4. Get _`chunk_right` = Chunks[`i_chunk`]_.
139 |     /// 5. Find `i_block`. _`i_block` = (`i` - `i_chunk` * `chunk_size`) / block size_.
140 |     /// 6. Get _`block_left` = `chunk_right.blocks`[ `i_block` - 1]`_ only if _`i_block` > 0_.
141 |     /// 7. Get _rank from block_left_ if `block_left` exists.
142 |     /// 8. Get inner-block data _`block_bits`. `block_bits` must be of _block size_ length, fulfilled with _0_ in right bits.
143 |     /// 9. Calculate _rank of `block_bits`_ in _O(1)_ using a table memonizing _block size_ bit's popcount.
144 |     pub fn rank(&self, i: u64) -> u64 {
145 |         let n = self.len();
146 |         assert!(i < n);
147 |         let chunk_size = Chunks::calc_chunk_size(n);
148 |         let block_size = Blocks::calc_block_size(n);
149 | 
150 |         // 1.
151 |         let i_chunk = i / chunk_size as u64;
152 | 
153 |         // 3.
154 |         let rank_from_chunk = if i_chunk == 0 {
155 |             0
156 |         } else {
157 |             // 2., 3.
158 |             let chunk_left = self.chunks.access(i_chunk - 1);
159 |             chunk_left.value()
160 |         };
161 | 
162 |         // 4.
163 |         let chunk_right = self.chunks.access(i_chunk);
164 | 
165 |         // 5.
166 |         let i_block = (i - i_chunk * chunk_size as u64) / block_size as u64;
167 | 
168 |         // 7.
169 |         let rank_from_block = if i_block == 0 {
170 |             0
171 |         } else {
172 |             // 6., 7.
173 |             let block_left = chunk_right.blocks.access(i_block - 1);
174 |             block_left.value()
175 |         };
176 | 
177 |         // 8.
178 |         let block_right = chunk_right.blocks.access(i_block);
179 |         let pos_block_start = i_chunk * chunk_size as u64 + i_block * block_size as u64;
180 |         assert!(i - pos_block_start < block_right.length() as u64);
181 |         let block_right_rbv = self
182 |             .rbv()
183 |             .clone_sub(pos_block_start, block_right.length() as u64);
184 |         let block_right_as_u32 = block_right_rbv.as_u32();
185 |         let bits_to_use = i - pos_block_start + 1;
186 |         let block_bits = block_right_as_u32 >> (32 - bits_to_use);
187 |         let rank_from_table = self.table.popcount(block_bits as u64);
188 | 
189 |         // 9.
190 |         rank_from_chunk + rank_from_block as u64 + rank_from_table as u64
191 |     }
192 | 
193 |     /// Returns the number of _0_ in _[0, `i`]_ elements of the `Fid`.
194 |     ///
195 |     /// # Panics
196 |     /// When _`i` >= length of the `Fid`_.
197 |     pub fn rank0(&self, i: u64) -> u64 {
198 |         (i + 1) - self.rank(i)
199 |     }
200 | 
201 |     /// Returns the minimum position (0-origin) `i` where _`rank(i)` == num_ of `num`-th _1_ if exists. Else returns None.
202 |     ///
203 |     /// # Panics
204 |     /// When _`num` > length of the `Fid`_.
205 |     ///
206 |     /// # Implementation detail
207 |     /// Binary search using `rank()`.
208 |     pub fn select(&self, num: u64) -> Option<u64> {
209 |         let n = self.len();
210 |         assert!(num <= n);
211 | 
212 |         if num == 0 || num == 1 && self[0] {
213 |             return Some(0);
214 |         }
215 |         if self.rank(n - 1) < num {
216 |             return None;
217 |         };
218 | 
219 |         let mut ng = 0;
220 |         let mut ok = n - 1;
221 |         while ok - ng > 1 {
222 |             let mid = (ok + ng) / 2;
223 |             if self.rank(mid) >= num {
224 |                 ok = mid;
225 |             } else {
226 |                 ng = mid;
227 |             }
228 |         }
229 |         Some(ok)
230 |     }
231 | 
232 |     /// Returns the minimum position (0-origin) `i` where _`rank(i)` == num_ of `num`-th _0_ if exists. Else returns None.
233 |     ///
234 |     /// # Panics
235 |     /// When _`num` > length of the `Fid`_.
236 |     pub fn select0(&self, num: u64) -> Option<u64> {
237 |         let n = self.bit_len;
238 |         assert!(num <= n);
239 | 
240 |         if num == 0 || num == 1 && !self[0] {
241 |             return Some(0);
242 |         }
243 |         if self.rank0(n - 1) < num {
244 |             return None;
245 |         };
246 | 
247 |         let mut ng = 0;
248 |         let mut ok = n - 1;
249 |         while ok - ng > 1 {
250 |             let mid = (ok + ng) / 2;
251 |             if self.rank0(mid) >= num {
252 |                 ok = mid;
253 |             } else {
254 |                 ng = mid;
255 |             }
256 |         }
257 |         Some(ok)
258 |     }
259 | 
260 |     /// Returns bit length of this FID.
261 |     pub fn len(&self) -> u64 {
262 |         self.bit_len
263 |     }
264 | 
265 |     /// Returns whether the FID is empty.
266 |     pub fn is_empty(&self) -> bool {
267 |         self.bit_len == 0
268 |     }
269 | 
270 |     fn rbv(&self) -> RawBitVector {
271 |         let last_byte_len_or_0 = (self.bit_len % 8) as u8;
272 |         RawBitVector::new(
273 |             &self.byte_vec[..],
274 |             0,
275 |             if last_byte_len_or_0 == 0 {
276 |                 8
277 |             } else {
278 |                 last_byte_len_or_0
279 |             },
280 |         )
281 |     }
282 | }
283 | 
284 | #[cfg(test)]
285 | mod from_str_success_tests {
286 |     use crate::Fid;
287 | 
288 |     macro_rules! parameterized_tests {
289 |         ($($name:ident: $value:expr,)*) => {
290 |         $(
291 |             #[test]
292 |             fn $name() {
293 |                 let (s, expected_bits) = $value;
294 |                 let fid = Fid::from(s);
295 | 
296 |                 // TODO length check
297 |                 // assert_eq!(fid.length(), expected_bits);
298 |                 for (i, bit) in expected_bits.iter().enumerate() {
299 |                     assert_eq!(fid[i as u64], *bit);
300 |                 }
301 |             }
302 |         )*
303 |         }
304 |     }
305 | 
306 |     parameterized_tests! {
307 |         t1: ("0", vec![false]),
308 |         t2: ("1", vec![true]),
309 |         t3: ("00", vec![false, false]),
310 |         t4: ("01", vec![false, true]),
311 |         t5: ("10", vec![true, false]),
312 |         t6: ("11", vec![true, true]),
313 |         t7: ("0101_0101__0101_1100__1000_001", vec![
314 |             false, true, false, true,
315 |             false, true, false, true,
316 |             false, true, false, true,
317 |             true, true, false, false,
318 |             true, false, false, false,
319 |             false, false, true,
320 |         ]),
321 |     }
322 | }
323 | 
324 | #[cfg(test)]
325 | mod from_str_failure_tests {
326 |     // well-tested in BitString::new()
327 | }
328 | 
329 | #[cfg(test)]
330 | mod from_slice_success_tests {
331 |     use crate::Fid;
332 | 
333 |     macro_rules! parameterized_tests {
334 |         ($($name:ident: $value:expr,)*) => {
335 |         $(
336 |             #[test]
337 |             fn $name() {
338 |                 let arr = $value;
339 |                 let fid = Fid::from(&arr[..]);
340 | 
341 |                 // TODO length check
342 |                 // assert_eq!(fid.length(), expected_bits);
343 |                 for (i, bit) in arr.iter().enumerate() {
344 |                     assert_eq!(fid[i as u64], *bit);
345 |                 }
346 |             }
347 |         )*
348 |         }
349 |     }
350 | 
351 |     parameterized_tests! {
352 |         t1: [false],
353 |         t2: [true],
354 |         t3: [false, false],
355 |         t4: [false, true],
356 |         t5: [true, false],
357 |         t6: [true, true],
358 |         t7: [false; 100],
359 |         t8: [true; 100],
360 |     }
361 | }
362 | 
363 | #[cfg(test)]
364 | mod from_slice_failure_tests {
365 |     use crate::Fid;
366 | 
367 |     #[test]
368 |     #[should_panic]
369 |     fn empty() {
370 |         let _ = Fid::from(&[][..]);
371 |     }
372 | }
373 | 
374 | #[cfg(test)]
375 | mod index_u64_success_tests {
376 |     // well-tested in fid_builder::{builder_from_length_success_tests, builder_from_bit_string_success_tests}
377 | }
378 | 
379 | #[cfg(test)]
380 | mod index_u64_failure_tests {
381 |     use crate::Fid;
382 | 
383 |     #[test]
384 |     #[should_panic]
385 |     fn over_upper_bound() {
386 |         let fid = Fid::from("00");
387 |         let _ = fid[2];
388 |     }
389 | }
390 | 
391 | #[cfg(test)]
392 | #[allow(non_snake_case)]
393 | mod rank_success_tests {
394 |     use crate::Fid;
395 | 
396 |     macro_rules! parameterized_tests {
397 |         ($($name:ident: $value:expr,)*) => {
398 |         $(
399 |             #[test]
400 |             fn $name() {
401 |                 let (in_fid_str, in_i, expected_rank) = $value;
402 |                 assert_eq!(
403 |                     Fid::from(in_fid_str).rank(in_i),
404 |                     expected_rank
405 |                 );
406 |             }
407 |         )*
408 |         }
409 |     }
410 | 
411 |     parameterized_tests! {
412 |         rank1_1: ("0", 0, 0),
413 | 
414 |         rank2_1: ("00", 0, 0),
415 |         rank2_2: ("00", 1, 0),
416 | 
417 |         rank3_1: ("01", 0, 0),
418 |         rank3_2: ("01", 1, 1),
419 | 
420 |         rank4_1: ("10", 0, 1),
421 |         rank4_2: ("10", 1, 1),
422 | 
423 |         rank5_1: ("11", 0, 1),
424 |         rank5_2: ("11", 1, 2),
425 | 
426 |         rank6_1: ("10010", 0, 1),
427 |         rank6_2: ("10010", 1, 1),
428 |         rank6_3: ("10010", 2, 1),
429 |         rank6_4: ("10010", 3, 2),
430 |         rank6_5: ("10010", 4, 2),
431 | 
432 |         bugfix_11110110_11010101_01000101_11101111_10101011_10100101_01100011_00110100_01010101_10010000_01001100_10111111_00110011_00111110_01110101_11011100: (
433 |             "11110110_11010101_01000101_11101111_10101011_10100101_01100011_00110100_01010101_10010000_01001100_10111111_00110011_00111110_01110101_11011100",
434 |             49, 31,
435 |         ),
436 |         bugfix_10100001_01010011_10101100_11100001_10110010_10000110_00010100_01001111_01011100_11010011_11110000_00011010_01101111_10101010_11000111_0110011: (
437 |             "10100001_01010011_10101100_11100001_10110010_10000110_00010100_01001111_01011100_11010011_11110000_00011010_01101111_10101010_11000111_0110011",
438 |             111, 55,
439 |         ),
440 |         bugfix_100_111_101_011_011_100_101_001_111_001_001_101_100_011_000_111_1___01_000_101_100_101_101_001_011_110_010_001_101_010_010_010_111_111_111_001_111_001_100_010_001_010_101_11: (
441 |             "100_111_101_011_011_100_101_001_111_001_001_101_100_011_000_111_1___01_000_101_100_101_101_001_011_110_010_001_101_010_010_010_111_111_111_001_111_001_100_010_001_010_101_11",
442 |             48, 28,
443 |         ),
444 |         bugfix_11100100_10110100_10000000_10111111_01110101_01100110_00101111_11101001_01100100_00001000_11010100_10100000_00010001_10100101_01100100_0010010: (
445 |             "11100100_10110100_10000000_10111111_01110101_01100110_00101111_11101001_01100100_00001000_11010100_10100000_00010001_10100101_01100100_0010010",
446 |             126, 56,
447 |         ),
448 |     }
449 |     // Tested more in tests/ (integration test)
450 | }
451 | 
452 | #[cfg(test)]
453 | mod rank_failure_tests {
454 |     use crate::Fid;
455 | 
456 |     #[test]
457 |     #[should_panic]
458 |     fn rank_over_upper_bound() {
459 |         let fid = Fid::from("00");
460 |         let _ = fid.rank(2);
461 |     }
462 | }
463 | 
464 | #[cfg(test)]
465 | #[allow(non_snake_case)]
466 | mod rank0_success_tests {
467 |     use crate::Fid;
468 | 
469 |     macro_rules! parameterized_tests {
470 |         ($($name:ident: $value:expr,)*) => {
471 |         $(
472 |             #[test]
473 |             fn $name() {
474 |                 let (in_fid_str, in_i, expected_rank0) = $value;
475 |                 assert_eq!(
476 |                     Fid::from(in_fid_str).rank0(in_i),
477 |                     expected_rank0
478 |                 );
479 |             }
480 |         )*
481 |         }
482 |     }
483 | 
484 |     parameterized_tests! {
485 |         rank0_1_1: ("0", 0, 1),
486 | 
487 |         rank0_2_1: ("00", 0, 1),
488 |         rank0_2_2: ("00", 1, 2),
489 | 
490 |         rank0_3_1: ("01", 0, 1),
491 |         rank0_3_2: ("01", 1, 1),
492 | 
493 |         rank0_4_1: ("10", 0, 0),
494 |         rank0_4_2: ("10", 1, 1),
495 | 
496 |         rank0_5_1: ("11", 0, 0),
497 |         rank0_5_2: ("11", 1, 0),
498 | 
499 |         rank0_6_1: ("10010", 0, 0),
500 |         rank0_6_2: ("10010", 1, 1),
501 |         rank0_6_3: ("10010", 2, 2),
502 |         rank0_6_4: ("10010", 3, 2),
503 |         rank0_6_5: ("10010", 4, 3),
504 |     }
505 |     // Tested more in tests/ (integration test)
506 | }
507 | 
508 | #[cfg(test)]
509 | mod rank0_0_failure_tests {
510 |     use crate::Fid;
511 | 
512 |     #[test]
513 |     #[should_panic]
514 |     fn rank0_over_upper_bound() {
515 |         let fid = Fid::from("00");
516 |         let _ = fid.rank0(2);
517 |     }
518 | }
519 | 
520 | #[cfg(test)]
521 | mod select_success_tests {
522 |     // Tested well in tests/ (integration test)
523 | }
524 | 
525 | #[cfg(test)]
526 | mod select_failure_tests {
527 |     use crate::Fid;
528 | 
529 |     #[test]
530 |     #[should_panic]
531 |     fn select_over_max_rank() {
532 |         let fid = Fid::from("00");
533 |         let _ = fid.select(3);
534 |     }
535 | }
536 | 
537 | #[cfg(test)]
538 | mod select0_success_tests {
539 |     // Tested well in tests/ (integration test)
540 | }
541 | 
542 | #[cfg(test)]
543 | mod select0_failure_tests {
544 |     use crate::Fid;
545 | 
546 |     #[test]
547 |     #[should_panic]
548 |     fn select_over_max_rank() {
549 |         let fid = Fid::from("00");
550 |         let _ = fid.select0(3);
551 |     }
552 | }
553 | 


--------------------------------------------------------------------------------
/src/internal_data_structure/raw_bit_vector.rs:
--------------------------------------------------------------------------------
  1 | use std::fmt;
  2 | 
  3 | #[derive(Debug)]
  4 | /// Bit vector of arbitrary length (actually the length is limited to _[1, 2^64)_).
  5 | ///
  6 | /// ```text
  7 | /// When fist_byte_offset = 2, last_byte_len = 2:
  8 | ///
  9 | /// 10101010 00000000 11111111
 10 | ///   |  effective bits |
 11 | /// ```
 12 | pub struct RawBitVector<'s> {
 13 |     byte_slice: &'s [u8],
 14 |     first_byte_offset: u8,
 15 | 
 16 |     /// Length used in last byte.
 17 |     /// Although byte_slice has only 1 byte and first_byte_offset > 0,
 18 |     /// this var can take up to 8.
 19 |     last_byte_len: u8,
 20 | }
 21 | 
 22 | impl<'s> RawBitVector<'s> {
 23 |     /// Constructor
 24 |     ///
 25 |     /// # Panics
 26 |     /// When:
 27 |     /// - `byte_slice` is empty.
 28 |     /// - _`first_byte_offset` >= 8_.
 29 |     /// - _`last_byte_len` == 0 || `last_byte_len` > 8_.
 30 |     /// - _`byte_slice.len() == 1 && first_byte_offset >= last_byte_len`_
 31 |     pub fn new(byte_slice: &'s [u8], first_byte_offset: u8, last_byte_len: u8) -> Self {
 32 |         assert!(!byte_slice.is_empty());
 33 |         assert!(first_byte_offset < 8);
 34 |         assert!(0 < last_byte_len && last_byte_len <= 8);
 35 |         assert!(!(byte_slice.len() == 1 && first_byte_offset >= last_byte_len));
 36 |         Self {
 37 |             byte_slice,
 38 |             first_byte_offset,
 39 |             last_byte_len,
 40 |         }
 41 |     }
 42 | 
 43 |     /// Returns i-th bit.
 44 |     ///
 45 |     /// ```text
 46 |     /// When i=7:
 47 |     ///
 48 |     ///          |target |
 49 |     /// 00000000 01000000
 50 |     ///   ^       ^
 51 |     /// offset=2  |
 52 |     ///  i=0     i=7
 53 |     ///        abs_i=9
 54 |     ///
 55 |     /// abs_i = offset + i
 56 |     /// target_byte = at [abs_i / 8]
 57 |     /// access(i) = target_byte[abs_i % 8]
 58 |     /// ```
 59 |     ///
 60 |     /// # Panics
 61 |     /// When _`i` >= `self.len()`_.
 62 |     pub fn access(&self, i: u64) -> bool {
 63 |         assert!(i < self.len());
 64 | 
 65 |         let abs_i = self.first_byte_offset as u64 + i;
 66 |         let byte = self.byte_slice[(abs_i / 8) as usize];
 67 |         match abs_i % 8 {
 68 |             0 => byte & 0b1000_0000 != 0,
 69 |             1 => byte & 0b0100_0000 != 0,
 70 |             2 => byte & 0b0010_0000 != 0,
 71 |             3 => byte & 0b0001_0000 != 0,
 72 |             4 => byte & 0b0000_1000 != 0,
 73 |             5 => byte & 0b0000_0100 != 0,
 74 |             6 => byte & 0b0000_0010 != 0,
 75 |             7 => byte & 0b0000_0001 != 0,
 76 |             _ => panic!("never happen"),
 77 |         }
 78 |     }
 79 | 
 80 |     /// Returns length.
 81 |     pub fn len(&self) -> u64 {
 82 |         if self.byte_slice.len() == 1 {
 83 |             self.last_byte_len as u64 - self.first_byte_offset as u64
 84 |         } else {
 85 |             (self.byte_slice.len() as u64) * 8
 86 |                 - (self.first_byte_offset as u64)
 87 |                 - (8 - self.last_byte_len as u64)
 88 |         }
 89 |     }
 90 | 
 91 |     /// Returns popcount of whole this bit vector.
 92 |     pub fn popcount(&self) -> u64 {
 93 |         let mut popcnt = self
 94 |             .byte_slice
 95 |             .iter()
 96 |             .fold(0, |popcnt: u64, byte| byte.count_ones() as u64 + popcnt);
 97 | 
 98 |         // remove 1s in the left of first_byte_offset
 99 |         let left_1s_byte = match self.first_byte_offset {
100 |             0 => 0,
101 |             1 => 0b10000000 & self.byte_slice[0],
102 |             2 => 0b11000000 & self.byte_slice[0],
103 |             3 => 0b11100000 & self.byte_slice[0],
104 |             4 => 0b11110000 & self.byte_slice[0],
105 |             5 => 0b11111000 & self.byte_slice[0],
106 |             6 => 0b11111100 & self.byte_slice[0],
107 |             7 => 0b11111110 & self.byte_slice[0],
108 |             _ => panic!("never happen"),
109 |         };
110 |         popcnt -= left_1s_byte.count_ones() as u64;
111 | 
112 |         // remove 1s in the left of last_byte_len
113 |         let last_byte = self.byte_slice.last().unwrap();
114 |         let last_offset = self.last_byte_len - 1;
115 |         let right_1s_byte = match last_offset {
116 |             0 => 0b01111111 & last_byte,
117 |             1 => 0b00111111 & last_byte,
118 |             2 => 0b00011111 & last_byte,
119 |             3 => 0b00001111 & last_byte,
120 |             4 => 0b00000111 & last_byte,
121 |             5 => 0b00000011 & last_byte,
122 |             6 => 0b00000001 & last_byte,
123 |             7 => 0,
124 |             _ => panic!("never happen"),
125 |         };
126 |         popcnt -= right_1s_byte.count_ones() as u64;
127 | 
128 |         popcnt
129 |     }
130 | 
131 |     /// Makes another RawBitVector from _[`i`, `i` + `size`)_ of self.
132 |     /// This method is inexpensive in that it does not copy internal bit vector.
133 |     ///
134 |     /// ```text
135 |     /// offset=2
136 |     ///   |
137 |     ///   v  |     size=14         |
138 |     /// 00000000    00000000    00000000
139 |     ///      ^                    ^
140 |     ///  i_start=3             i_end=16
141 |     ///  abs_i_start=5         abs_i_end=18
142 |     /// | first|                |  last |
143 |     ///
144 |     ///
145 |     /// When i=3 & size=14:
146 |     ///
147 |     /// i_start = 3
148 |     /// abs_i_start = i_start + offset = 5
149 |     /// i_end = i_start + size - 1 = 16
150 |     /// abs_i_end = i_end + offset = 18
151 |     ///
152 |     /// first_byte = at [abs_i_start / 8]
153 |     /// last_byte = at [abs_i_end / 8]
154 |     ///
155 |     /// new_offset = abs_i_start % 8
156 |     ///
157 |     /// new_last_byte_len = abs_i_end % 8 + 1
158 |     /// ```
159 |     ///
160 |     /// # Panics
161 |     /// When:
162 |     /// - _`size` == 0_
163 |     /// - _`size` > `self.len`_
164 |     /// - _`abs_i_end` / 8 + 1 == `self.byte_slice.len()` && abs_i_end` % 8 >= `last_byte_len`_
165 |     pub fn clone_sub(&self, i: u64, size: u64) -> Self {
166 |         assert!(size > 0, "length must be > 0");
167 |         assert!(size <= self.len());
168 | 
169 |         let i_start = i;
170 |         let abs_i_start = i_start + self.first_byte_offset as u64;
171 |         let i_end = i_start + size - 1;
172 |         let abs_i_end = i_end + self.first_byte_offset as u64;
173 |         assert!(
174 |             abs_i_end / 8 + 1 < self.byte_slice.len() as u64
175 |                 || abs_i_end % 8 < self.last_byte_len as u64
176 |         );
177 | 
178 |         Self {
179 |             byte_slice: &self.byte_slice[(abs_i_start as usize / 8)..=(abs_i_end as usize / 8)],
180 |             first_byte_offset: (abs_i_start % 8) as u8,
181 |             last_byte_len: (abs_i_end % 8 + 1) as u8,
182 |         }
183 |     }
184 | 
185 |     /// Returns a concatenated number of first 32bits.
186 |     ///
187 |     /// # Panics
188 |     /// If _`self.len()` > 32_
189 |     pub fn as_u32(&self) -> u32 {
190 |         assert!(self.len() <= 32);
191 | 
192 |         let bs = self.byte_slice;
193 |         let off = self.first_byte_offset;
194 | 
195 |         assert!(bs.len() <= 5);
196 |         let mut a = [0u32; 5];
197 |         for i in 0..bs.len() {
198 |             a[i] = bs[i] as u32;
199 |         }
200 |         // discard 1s in the last byte
201 |         a[bs.len() - 1] = a[bs.len() - 1] >> (8 - self.last_byte_len) << (8 - self.last_byte_len);
202 | 
203 |         let mut byte = [0u32; 4];
204 |         for i in 0..4 {
205 |             byte[i] = (a[i] << off) + (a[i + 1] >> (8 - off));
206 |         }
207 | 
208 |         (byte[0] << 24) | (byte[1] << 16) | (byte[2] << 8) | byte[3]
209 |     }
210 | }
211 | 
212 | impl<'s> fmt::Display for RawBitVector<'s> {
213 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
214 |         let bits_str = self
215 |             .byte_slice
216 |             .iter()
217 |             .enumerate()
218 |             .map(|(i, byte)| {
219 |                 let byte_s = format!("{: >8}", format!("{:b}", byte)).replace(' ', "0");
220 |                 if i < self.byte_slice.len() - 1 {
221 |                     byte_s
222 |                 } else {
223 |                     byte_s
224 |                         .chars()
225 |                         .take(self.last_byte_len as usize)
226 |                         .collect::<String>()
227 |                 }
228 |             })
229 |             .collect::<Vec<String>>()
230 |             .concat();
231 | 
232 |         write!(f, "{}", bits_str)
233 |     }
234 | }
235 | 
236 | #[cfg(test)]
237 | mod new_success_tests {
238 |     use super::RawBitVector;
239 | 
240 |     macro_rules! parameterized_tests {
241 |         ($($name:ident: $value:expr,)*) => {
242 |         $(
243 |             #[test]
244 |             fn $name() {
245 |                 let (byte_slice, first_byte_offset, last_byte_len) = $value;
246 |                 let _ = RawBitVector::new(byte_slice, first_byte_offset, last_byte_len);
247 |             }
248 |         )*
249 |         }
250 |     }
251 | 
252 |     parameterized_tests! {
253 |         t_1byte_1: (&[0b00000000], 0, 8),
254 |         t_1byte_2: (&[0b00000000], 1, 8),
255 |         t_1byte_3: (&[0b00000000], 2, 8),
256 |         t_1byte_4: (&[0b00000000], 3, 8),
257 |         t_1byte_5: (&[0b00000000], 4, 8),
258 |         t_1byte_6: (&[0b00000000], 5, 8),
259 |         t_1byte_7: (&[0b00000000], 6, 8),
260 |         t_1byte_8: (&[0b00000000], 7, 8),
261 |     }
262 | }
263 | 
264 | #[cfg(test)]
265 | mod new_failure_tests {
266 |     use super::RawBitVector;
267 | 
268 |     macro_rules! parameterized_tests {
269 |         ($($name:ident: $value:expr,)*) => {
270 |         $(
271 |             #[test]
272 |             #[should_panic]
273 |             fn $name() {
274 |                 let (byte_slice, first_byte_offset, last_byte_len) = $value;
275 |                 let _ = RawBitVector::new(byte_slice, first_byte_offset, last_byte_len);
276 |             }
277 |         )*
278 |         }
279 |     }
280 | 
281 |     parameterized_tests! {
282 |         t_empty: (&[], 0, 1),
283 |         t_offset: (&[0b00000000], 8, 1),
284 | 
285 |         t_last_len_0: (&[0b00000000], 0, 0),
286 |         t_last_len_9: (&[0b00000000, 0b00000000], 0, 9),
287 | 
288 |         t_1byte_1: (&[0b00000000], 0, 9),
289 | 
290 |         t_1byte_off7: (&[0b00000001], 7, 7),
291 |     }
292 | }
293 | 
294 | #[cfg(test)]
295 | mod len_success_tests {
296 |     use super::RawBitVector;
297 | 
298 |     macro_rules! parameterized_tests {
299 |         ($($name:ident: $value:expr,)*) => {
300 |         $(
301 |             #[test]
302 |             fn $name() {
303 |                 let (byte_slice, first_byte_offset, last_byte_len, expected_len) = $value;
304 |                 let rbv = RawBitVector::new(byte_slice, first_byte_offset, last_byte_len);
305 |                 assert_eq!(rbv.len(), expected_len);
306 |             }
307 |         )*
308 |         }
309 |     }
310 | 
311 |     parameterized_tests! {
312 |         t_1byte_off0_1: (&[0b00000000], 0, 8, 8),
313 |         t_1byte_off0_2: (&[0b00000000], 0, 7, 7),
314 |         t_1byte_off0_3: (&[0b00000000], 0, 6, 6),
315 |         t_1byte_off0_4: (&[0b00000000], 0, 5, 5),
316 |         t_1byte_off0_5: (&[0b00000000], 0, 4, 4),
317 |         t_1byte_off0_6: (&[0b00000000], 0, 3, 3),
318 |         t_1byte_off0_7: (&[0b00000000], 0, 2, 2),
319 |         t_1byte_off0_8: (&[0b00000000], 0, 1, 1),
320 | 
321 |         t_1byte_off1_1: (&[0b00000000], 1, 8, 7),
322 |         t_1byte_off1_2: (&[0b00000000], 1, 7, 6),
323 |         t_1byte_off1_3: (&[0b00000000], 1, 6, 5),
324 |         t_1byte_off1_4: (&[0b00000000], 1, 5, 4),
325 |         t_1byte_off1_5: (&[0b00000000], 1, 4, 3),
326 |         t_1byte_off1_6: (&[0b00000000], 1, 3, 2),
327 |         t_1byte_off1_7: (&[0b00000000], 1, 2, 1),
328 | 
329 |         t_1byte_off7_1: (&[0b00000000], 7, 8, 1),
330 | 
331 |         t_2byte_1: (&[0b00000000, 0b00000000], 0, 8, 16),
332 |         t_2byte_2: (&[0b00000000, 0b00000000], 1, 8, 15),
333 |         t_2byte_3: (&[0b00000000, 0b00000000], 7, 8, 9),
334 |         t_2byte_4: (&[0b00000000, 0b00000000], 0, 1, 9),
335 |         t_2byte_5: (&[0b00000000, 0b00000000], 0, 7, 15),
336 |         t_2byte_6: (&[0b00000000, 0b00000000], 7, 1, 2),
337 |     }
338 | }
339 | 
340 | #[cfg(test)]
341 | mod len_failure_tests {
342 |     // Nothing to do
343 | }
344 | 
345 | #[cfg(test)]
346 | mod access_success_tests {
347 |     use super::RawBitVector;
348 | 
349 |     macro_rules! parameterized_tests {
350 |         ($($name:ident: $value:expr,)*) => {
351 |         $(
352 |             #[test]
353 |             fn $name() {
354 |                 let (byte_slice, first_byte_offset, last_byte_len, i, expected_bit) = $value;
355 |                 let rbv = RawBitVector::new(byte_slice, first_byte_offset, last_byte_len);
356 |                 assert_eq!(rbv.access(i), expected_bit);
357 |             }
358 |         )*
359 |         }
360 |     }
361 | 
362 |     parameterized_tests! {
363 |         t_1byte_off0_1: (&[0b10000000], 0, 8, 0, true),
364 | 
365 |         t_1byte_off1_1: (&[0b01000000], 1, 7, 0, true),
366 |         t_1byte_off1_2: (&[0b01000000], 1, 7, 1, false),
367 | 
368 |         t_1byte_off7: (&[0b00000001], 7, 8, 0, true),
369 | 
370 |         t_2byte_1: (&[0b00000000, 0b00000001], 0, 8, 15, true),
371 |         t_2byte_2: (&[0b00000000, 0b00000001], 1, 8, 14, true),
372 |         t_2byte_3: (&[0b00000000, 0b00000001], 7, 8, 8, true),
373 |         t_2byte_4: (&[0b00000000, 0b10000000], 0, 1, 8, true),
374 |         t_2byte_5: (&[0b00000000, 0b00000010], 0, 7, 14, true),
375 |         t_2byte_6: (&[0b00000000, 0b10000000], 7, 1, 1, true),
376 |     }
377 | }
378 | 
379 | #[cfg(test)]
380 | mod access_failure_tests {
381 |     use super::RawBitVector;
382 | 
383 |     #[test]
384 |     #[should_panic]
385 |     fn over_upper_bound() {
386 |         let rbv = RawBitVector::new(&[0b00000000], 1, 2);
387 |         let _ = rbv.access(1);
388 | 
389 |         // basically, well-tested in len_success_tests
390 |     }
391 | }
392 | 
393 | #[cfg(test)]
394 | mod popcount_success_tests {
395 |     use super::RawBitVector;
396 | 
397 |     macro_rules! parameterized_tests {
398 |         ($($name:ident: $value:expr,)*) => {
399 |         $(
400 |             #[test]
401 |             fn $name() {
402 |                 let (byte_slice, first_byte_offset, last_byte_len, expected_popcount) = $value;
403 |                 let rbv = RawBitVector::new(byte_slice, first_byte_offset, last_byte_len);
404 |                 assert_eq!(rbv.popcount(), expected_popcount);
405 |             }
406 |         )*
407 |         }
408 |     }
409 | 
410 |     parameterized_tests! {
411 |         t1: (&[0b11111111], 0, 1, 1),
412 |         t2: (&[0b11111111], 1, 8, 7),
413 |         t3: (&[0b11111111], 1, 7, 6),
414 |         t4: (&[0b11111111], 1, 6, 5),
415 |         t5: (&[0b11101111], 0, 8, 7),
416 | 
417 |         t6: (&[0b01010101, 0b01111111], 0, 1, 4),
418 |         t7: (&[0b10101010, 0b11111111], 0, 1, 5),
419 |         t8: (&[0b11111111, 0b11111111], 0, 1, 9),
420 |         t9: (&[0b11111111, 0b11111111], 1, 1, 8),
421 | 
422 |         t10: (&[0b11111111, 0b00010000, 0b11111111], 7, 1, 3),
423 |     }
424 | }
425 | 
426 | #[cfg(test)]
427 | mod popcount_failure_tests {
428 |     // Nothing to do
429 | }
430 | 
431 | #[cfg(test)]
432 | mod clone_sub_success_tests {
433 |     use super::RawBitVector;
434 | 
435 |     macro_rules! parameterized_tests {
436 |         ($($name:ident: $value:expr,)*) => {
437 |         $(
438 |             #[test]
439 |             fn $name() {
440 |                 let (byte_slice, first_byte_offset, last_byte_len, i, size, expected_bit_vec) = $value;
441 |                 let rbv = RawBitVector::new(byte_slice, first_byte_offset, last_byte_len);
442 |                 let cloned_rbv = rbv.clone_sub(i, size);
443 | 
444 |                 assert_eq!(cloned_rbv.len(), expected_bit_vec.len() as u64);
445 |                 for (i, expected_bit) in expected_bit_vec.iter().enumerate() {
446 |                     assert_eq!(cloned_rbv.access(i as u64), *expected_bit);
447 |                 }
448 |             }
449 |         )*
450 |         }
451 |     }
452 | 
453 |     parameterized_tests! {
454 |         t1_1: (&[0b01000000], 0, 1, 0, 1, vec![false]),
455 |         t1_2: (&[0b01000000], 1, 2, 0, 1, vec![true]),
456 | 
457 |         t8_1_1: (&[0b01000101], 0, 8, 0, 1, vec![false]),
458 |         t8_1_2: (&[0b01000101], 0, 8, 0, 2, vec![false, true]),
459 |         t8_1_3: (&[0b01000101], 0, 8, 0, 3, vec![false, true, false]),
460 |         t8_1_4: (&[0b01000101], 0, 8, 0, 4, vec![false, true, false, false]),
461 |         t8_1_5: (&[0b01000101], 0, 8, 0, 5, vec![false, true, false, false, false]),
462 |         t8_1_6: (&[0b01000101], 0, 8, 0, 6, vec![false, true, false, false, false, true]),
463 |         t8_1_7: (&[0b01000101], 0, 8, 0, 7, vec![false, true, false, false, false, true, false]),
464 |         t8_1_8: (&[0b01000101], 0, 8, 0, 8, vec![false, true, false, false, false, true, false, true]),
465 |         t8_1_9: (&[0b01000101, 0b10000000], 1, 1, 0, 8, vec![true, false, false, false, true, false, true, true]),
466 | 
467 |         t8_2_1: (&[0b01000101], 0, 8, 7, 1, vec![true]),
468 |         t8_2_2: (&[0b01000101, 0b10000000], 1, 1, 6, 2, vec![true, true]),
469 |         t8_2_3: (&[0b01000101, 0b10000000], 1, 1, 7, 1, vec![true]),
470 | 
471 |         t9_1_1: (&[0b01000101, 0b10000000], 0, 1, 0, 1, vec![false]),
472 |         t9_1_2: (&[0b01000101, 0b10000000], 0, 1, 0, 2, vec![false, true]),
473 |         t9_1_3: (&[0b01000101, 0b10000000], 0, 1, 0, 3, vec![false, true, false]),
474 |         t9_1_4: (&[0b01000101, 0b10000000], 0, 1, 0, 4, vec![false, true, false, false]),
475 |         t9_1_5: (&[0b01000101, 0b10000000], 0, 1, 0, 5, vec![false, true, false, false, false]),
476 |         t9_1_6: (&[0b01000101, 0b10000000], 0, 1, 0, 6, vec![false, true, false, false, false, true]),
477 |         t9_1_7: (&[0b01000101, 0b10000000], 0, 1, 0, 7, vec![false, true, false, false, false, true, false]),
478 |         t9_1_8: (&[0b01000101, 0b10000000], 0, 1, 0, 8, vec![false, true, false, false, false, true, false, true]),
479 |         t9_1_9: (&[0b01000101, 0b10000000], 0, 1, 0, 9, vec![false, true, false, false, false, true, false, true, true]),
480 |         t9_1_10: (&[0b01000101, 0b10000000], 1, 2, 0, 9, vec![true, false, false, false, true, false, true, true, false]),
481 | 
482 |         t9_2_1: (&[0b01000101, 0b10000000], 0, 1, 7, 1, vec![true]),
483 |         t9_2_2: (&[0b01000101, 0b10000000], 0, 1, 7, 2, vec![true, true]),
484 |         t9_2_3: (&[0b01000101, 0b10000000], 1, 2, 7, 2, vec![true, false]),
485 | 
486 |         t9_3_1: (&[0b01000101, 0b10000000], 0, 1, 8, 1, vec![true]),
487 |         t9_3_2: (&[0b01000101, 0b10000000], 1, 2, 8, 1, vec![false]),
488 | 
489 |         t13_1_1: (&[0b10110010, 0b01010000], 0, 4, 9, 3, vec![true, false, true]),
490 |         t13_1_2: (&[0b10110010, 0b01010000], 1, 4, 9, 2, vec![false, true]),
491 | 
492 |         t_bugfix1: (&[0b11111111, 0b00101001], 0, 1, 0, 1, vec![true]),
493 |     }
494 | }
495 | 
496 | #[cfg(test)]
497 | mod clone_sub_failure_tests {
498 |     use super::RawBitVector;
499 | 
500 |     macro_rules! parameterized_tests {
501 |         ($($name:ident: $value:expr,)*) => {
502 |         $(
503 |             #[test]
504 |             #[should_panic]
505 |             fn $name() {
506 |                 let (byte_slice, first_byte_offset, last_byte_len, i, size) = $value;
507 |                 let rbv = RawBitVector::new(byte_slice, first_byte_offset, last_byte_len);
508 |                 let _ = rbv.clone_sub(i, size);
509 |             }
510 |         )*
511 |         }
512 |     }
513 | 
514 |     parameterized_tests! {
515 |         t1_1: (&[0b00000000], 0, 1, 0, 0),
516 |         t1_2: (&[0b00000000], 0, 1, 0, 2),
517 |         t1_3: (&[0b00000000], 0, 1, 1, 1),
518 |         t1_4: (&[0b00000000], 1, 1, 0, 2),
519 | 
520 |         t8_1_1: (&[0b01000101], 0, 8, 0, 0),
521 |         t8_1_2: (&[0b01000101], 0, 8, 0, 9),
522 |         t8_1_3: (&[0b01000101, 0b00000000], 1, 1, 0, 9),
523 | 
524 |         t8_2_1: (&[0b01000101], 0, 8, 7, 0),
525 |         t8_2_2: (&[0b01000101], 0, 8, 7, 2),
526 |         t8_2_3: (&[0b01000101, 0b00000000], 1, 1, 7, 2),
527 | 
528 |         t9_1_1: (&[0b01000101, 0b00000000], 0, 1, 0, 0),
529 |         t9_1_2: (&[0b01000101, 0b00000000], 0, 1, 0, 10),
530 |         t9_1_3: (&[0b01000101, 0b00000000], 1, 2, 0, 10),
531 | 
532 |         t9_2_1: (&[0b01000101, 0b00000000], 0, 1, 7, 0),
533 |         t9_2_2: (&[0b01000101, 0b00000000], 0, 1, 7, 3),
534 |         t9_2_3: (&[0b01000101, 0b00000000], 1, 2, 7, 3),
535 | 
536 |         t9_3_1: (&[0b01000101, 0b00000000], 0, 1, 8, 0),
537 |         t9_3_2: (&[0b01000101, 0b00000000], 0, 1, 8, 2),
538 |         t9_3_3: (&[0b01000101, 0b00000000], 1, 2, 8, 2),
539 |     }
540 | }
541 | 
542 | #[cfg(test)]
543 | mod clone_sub_fuzzing_tests {
544 |     use super::RawBitVector;
545 | 
546 |     #[test]
547 |     fn test() {
548 |         let samples = 10000;
549 | 
550 |         fn sub_str(s: &str, i: u64, size: u64) -> String {
551 |             let ss: String = s.chars().skip(i as usize).take(size as usize).collect();
552 |             ss
553 |         }
554 | 
555 |         fn str_into_byte_vec(s: &str) -> (Vec<u8>, u8) {
556 |             let bits: Vec<bool> = s.as_bytes().iter().map(|c| *c == '1' as u8).collect();
557 | 
558 |             let mut byte_vec: Vec<u8> = Vec::with_capacity(bits.len() / 8 + 1);
559 |             let mut last_byte_len = 0u8;
560 | 
561 |             for bits8 in bits.chunks(8) {
562 |                 last_byte_len = bits8.len() as u8; // although this bits8 might not be a last byte.
563 | 
564 |                 let byte = (0..last_byte_len).fold(0, |byte, i| {
565 |                     byte + if bits8[i as usize] { 1 << (7 - i) } else { 0 }
566 |                 });
567 |                 byte_vec.push(byte);
568 |             }
569 | 
570 |             (byte_vec, last_byte_len)
571 |         }
572 | 
573 |         for _ in 0..samples {
574 |             let s = &format!("{:b}", rand::random::<u16>());
575 |             let (byte_vec, last_byte_len) = str_into_byte_vec(s);
576 |             let rbv = RawBitVector::new(&byte_vec[..], 0, last_byte_len);
577 |             // TODO more tests (first_byte_offset > 0)
578 | 
579 |             for i in 0..s.len() {
580 |                 for size in 1..(s.len() - i) {
581 |                     let copied_rbv = rbv.clone_sub(i as u64, size as u64);
582 | 
583 |                     let substr = sub_str(s, i as u64, size as u64);
584 |                     let (substr_byte_vec, substr_last_byte_len) = str_into_byte_vec(&substr);
585 |                     let substr_rbv =
586 |                         RawBitVector::new(&substr_byte_vec[..], 0, substr_last_byte_len);
587 | 
588 |                     assert_eq!(copied_rbv.len(), substr_rbv.len());
589 |                     for i in 0..copied_rbv.len() {
590 |                         assert_eq!(
591 |                             copied_rbv.access(i), substr_rbv.access(i),
592 |                             "\nbit vector = {}, RawBitVector::clone_sub(i={}, size={});\nActual:   {}\nExpected: {}",
593 |                             s, i, size, copied_rbv, substr
594 |                         )
595 |                     }
596 |                 }
597 |             }
598 |         }
599 |     }
600 | }
601 | 
602 | #[cfg(test)]
603 | mod as_u32_success_tests {
604 |     use super::RawBitVector;
605 | 
606 |     macro_rules! parameterized_tests {
607 |         ($($name:ident: $value:expr,)*) => {
608 |         $(
609 |             #[test]
610 |             fn $name() {
611 |                 let (byte_slice, first_byte_offset, last_byte_len, expected_u32) = $value;
612 |                 let rbv = RawBitVector::new(byte_slice, first_byte_offset, last_byte_len);
613 |                 assert_eq!(rbv.as_u32(), expected_u32);
614 |             }
615 |         )*
616 |         }
617 |     }
618 | 
619 |     parameterized_tests! {
620 |         t1_1: (&[0b11111111], 0, 1, 0b10000000_00000000_00000000_00000000),
621 |         t1_2: (&[0b11111111], 0, 7, 0b11111110_00000000_00000000_00000000),
622 |         t1_3: (&[0b11111111], 1, 2, 0b10000000_00000000_00000000_00000000),
623 |         t1_4: (&[0b11111111], 1, 7, 0b11111100_00000000_00000000_00000000),
624 | 
625 |         t8_1: (&[0b10010000], 0, 8, 0b10010000_00000000_00000000_00000000),
626 | 
627 |         t32_1: (&[0b10010000, 0b01000001, 0b00001000, 0b00011010], 0, 7, 0b10010000_01000001_00001000_00011010),
628 |         t32_2: (&[0b10010000, 0b01000001, 0b00001000, 0b00011010], 0, 8, 0b10010000_01000001_00001000_00011010),
629 |     }
630 | }
631 | 
632 | #[cfg(test)]
633 | mod as_u32_failure_tests {
634 |     use super::RawBitVector;
635 | 
636 |     #[test]
637 |     #[should_panic]
638 |     fn test() {
639 |         let byte_slice = &[0b00000000, 0b11111111, 0b00000000, 0b11111111, 0b00000000];
640 |         let rbv = RawBitVector::new(byte_slice, 0, 33);
641 |         // TODO more tests (first_byte_offset > 0)
642 |         let _ = rbv.as_u32();
643 |     }
644 | }
645 | 


--------------------------------------------------------------------------------