├── src ├── internal_data_structure.rs ├── lib.rs ├── fid │ ├── block.rs │ ├── chunk.rs │ ├── fid_iter.rs │ ├── blocks.rs │ ├── chunks.rs │ └── fid_impl.rs ├── internal_data_structure │ ├── popcount_table.rs │ └── raw_bit_vector.rs └── fid.rs ├── .gitignore ├── CHANGELOG.md ├── ci └── pr-check-fix.sh ├── .github └── workflows │ └── clippy.yml ├── README.tpl ├── LICENSE-MIT ├── Cargo.toml ├── .travis.yml ├── tests └── test.rs ├── benches └── bench.rs ├── README.md └── LICENSE-APACHE /src/internal_data_structure.rs: -------------------------------------------------------------------------------- 1 | pub mod popcount_table; 2 | pub mod raw_bit_vector; 3 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![doc = include_str!("../README.md")] 2 | 3 | pub use fid::Fid; 4 | 5 | pub mod fid; 6 | mod internal_data_structure; 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | Cargo.lock 4 | 5 | # local cargo settings 6 | /.cargo 7 | 8 | # used for debugging with VSCode 9 | src/main.rs 10 | -------------------------------------------------------------------------------- /src/fid/block.rs: -------------------------------------------------------------------------------- 1 | use super::Block; 2 | 3 | impl super::Block { 4 | /// Constructor. 5 | pub fn new(value: u16, length: u8) -> Block { 6 | Block { value, length } 7 | } 8 | 9 | /// Returns a content (total rank to go) of the block. 10 | pub fn value(&self) -> u16 { 11 | self.value 12 | } 13 | 14 | /// Returns size of the block. 15 | pub fn length(&self) -> u8 { 16 | self.length 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/fid/chunk.rs: -------------------------------------------------------------------------------- 1 | use super::{Blocks, Chunk}; 2 | use crate::internal_data_structure::raw_bit_vector::RawBitVector; 3 | 4 | impl super::Chunk { 5 | /// Constructor. 6 | pub fn new(value: u64, length: u16, rbv: &RawBitVector, i_chunk: u64) -> Chunk { 7 | let blocks = Blocks::new(rbv, i_chunk, length); 8 | Chunk { 9 | value, 10 | blocks, 11 | } 12 | } 13 | 14 | /// Returns the content of the chunk. 15 | pub fn value(&self) -> u64 { 16 | self.value 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | All notable changes to this project will be documented in this file. 3 | 4 | The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) 5 | and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). 6 | 7 | ## [Unreleased] 8 | 9 | ## [v0.2.0] - 2023-04-15 10 | 11 | ### Added 12 | 13 | - Added serde, made rayon optional, introduced GitHub actions CI, fixed many code smells. ([#25](https://github.com/laysakura/fid-rs/pull/25)) 14 | - feature: Make Fid cloneable. ([#26](https://github.com/laysakura/fid-rs/pull/26)) 15 | 16 | ## [v0.1.1] - 2019-04-26 17 | 18 | Just fixed docs.rs URL. 19 | 20 | ## [v0.1.0] - 2019-04-25 21 | Initial release. 22 | 23 | [Unreleased]: https://github.com/laysakura/fid-rs/compare/v0.2.0...HEAD 24 | [v0.2.0]: 25 | [v0.1.1]: 26 | [v0.1.0]: https://github.com/laysakura/fid-rs/compare/48fe478...v0.1.0 27 | -------------------------------------------------------------------------------- /ci/pr-check-fix.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eux 3 | 4 | travis_terminate() { 5 | set +e 6 | pkill -9 -P $$ &> /dev/null || true 7 | exit $1 8 | } 9 | 10 | rustup component add rustfmt 11 | cargo readme > /dev/null || cargo install cargo-readme # skip if already available 12 | 13 | ## Auto commit & push by CI 14 | ( 15 | cd `mktemp -d` 16 | git clone https://${GITHUB_TOKEN}@github.com/${TRAVIS_REPO_SLUG}.git 17 | cd fid-rs 18 | git checkout ${TRAVIS_PULL_REQUEST_BRANCH} 19 | 20 | committed=0 21 | 22 | ### README.md from src/lib.rs 23 | cargo readme > README.md 24 | git add README.md 25 | git commit -m 'cargo readme > README.md' && committed=1 26 | 27 | ### cargo fmt 28 | cargo fmt --all 29 | git add -A 30 | git commit -m 'cargo fmt --all' && committed=1 31 | 32 | ### git push 33 | git push origin ${TRAVIS_PULL_REQUEST_BRANCH} 34 | 35 | ### Stop build if anything updated in remote 36 | [ $committed -eq 1 ] && travis_terminate 1 || : 37 | ) 38 | -------------------------------------------------------------------------------- /.github/workflows/clippy.yml: -------------------------------------------------------------------------------- 1 | name: Clippy 2 | 3 | 4 | on: 5 | push: 6 | branches: ["master"] 7 | pull_request: 8 | branches: ["master"] 9 | 10 | env: 11 | CARGO_TERM_COLOR: always 12 | 13 | jobs: 14 | clippy: 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Install Clippy 20 | run: 21 | rustup toolchain install nightly --component clippy 22 | - name: Set up Rust 23 | uses: actions-rs/toolchain@v1 24 | with: 25 | toolchain: nightly 26 | override: true 27 | - name: Run clippy 28 | run: cargo clippy --all-features 29 | - name: Run clippy without rayon 30 | run: cargo clippy --no-default-features --features="serde" 31 | - name: Run tests 32 | run: cargo test --all-features 33 | - name: Run tests without rayon 34 | run: cargo test --no-default-features --features="serde" 35 | - name: Run tests release 36 | run: cargo test --release --all-features 37 | -------------------------------------------------------------------------------- /README.tpl: -------------------------------------------------------------------------------- 1 | # {{crate}} 2 | 3 | {{readme}} 4 | 5 | ## Versions 6 | fid-rs uses [semantic versioning](http://semver.org/spec/v2.0.0.html). 7 | 8 | Since current major version is _0_, minor version update might involve breaking public API change (although it is carefully avoided). 9 | 10 | ## Rust Version Supports 11 | 12 | fid-rs is continuously tested with these Rust versions in Travis CI: 13 | 14 | - 1.33.0 15 | - Latest stable version 16 | - Beta version 17 | - Nightly build 18 | 19 | So it expectedly works with Rust 1.33.0 and any newer versions. 20 | 21 | Older versions may also work, but are not tested or guaranteed. 22 | 23 | ## Contributing 24 | 25 | Any kind of pull requests are appreciated. 26 | 27 | ### Guidelines 28 | 29 | - `README.md` is generated from `$ cargo readme` command. Do not manually update `README.md` but edit `src/lib.rs` and then `$ cargo readme > README.md`. 30 | - Travis CI automatically does the following commit & push to your pull-requests: 31 | - `$ cargo readme > README.md` 32 | - `$ cargo fmt --all` 33 | 34 | ## License 35 | 36 | {{license}} 37 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019 Sho Nakatani 2 | 3 | Permission is hereby granted, free of charge, to any 4 | person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without 7 | limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software 10 | is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions 15 | of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "fid-rs" 3 | version = "0.2.0" 4 | authors = ["Sho Nakatani "] 5 | description = "High performance FID (Fully Indexable Dictionary) library" 6 | readme = "README.md" 7 | license = "MIT OR Apache-2.0" 8 | repository = "https://github.com/laysakura/fid-rs" 9 | homepage = "https://github.com/laysakura/fid-rs" 10 | keywords = ["fid", "succinct-bit-vector"] # up to 5 keywords, each keyword should have <= 20 chars 11 | categories = ["compression", "data-structures"] 12 | edition = "2018" 13 | 14 | [dependencies] 15 | # Rayon is an optional feature, which is enabled by default. 16 | # It is used to crate the Chunks collection in parallel. 17 | rayon = { version = "1.5", optional = true } 18 | # Serde is another optional feature, which can be enabled by setting `serde` feature. 19 | # It is used to serialize and deserialize the FID structure. 20 | serde = { version = "1.0", optional = true, features = ["derive"] } 21 | mem_dbg = {version = "0.1.4", optional = true} 22 | 23 | [dev-dependencies] 24 | criterion = "0.5" 25 | rand = "0.8" 26 | 27 | [features] 28 | default = ["rayon"] 29 | 30 | [[bench]] 31 | name = "bench" 32 | harness = false 33 | -------------------------------------------------------------------------------- /src/fid/fid_iter.rs: -------------------------------------------------------------------------------- 1 | use super::{Fid, FidIter}; 2 | 3 | impl<'iter> Fid { 4 | /// Creates an iterator over FID's bit vector. 5 | /// 6 | /// # Examples 7 | /// ``` 8 | /// use fid_rs::Fid; 9 | /// 10 | /// let fid = Fid::from("1010_1010"); 11 | /// for (i, bit) in fid.iter().enumerate() { 12 | /// assert_eq!(bit, fid[i as u64]); 13 | /// } 14 | /// ``` 15 | pub fn iter(&'iter self) -> FidIter<'iter> { 16 | FidIter { fid: self, i: 0 } 17 | } 18 | } 19 | 20 | impl<'iter> Iterator for FidIter<'iter> { 21 | type Item = bool; 22 | fn next(&mut self) -> Option { 23 | if self.i >= self.fid.len() { 24 | None 25 | } else { 26 | self.i += 1; 27 | Some(self.fid[self.i - 1]) 28 | } 29 | } 30 | } 31 | 32 | #[cfg(test)] 33 | mod iter_success_tests { 34 | use crate::Fid; 35 | 36 | #[test] 37 | fn iter() { 38 | let fid = Fid::from("1010_1010"); 39 | for (i, bit) in fid.iter().enumerate() { 40 | assert_eq!(bit, fid[i as u64]); 41 | } 42 | } 43 | } 44 | 45 | #[cfg(test)] 46 | mod iter_failure_tests { 47 | // Nothing to test 48 | } 49 | -------------------------------------------------------------------------------- /src/fid/blocks.rs: -------------------------------------------------------------------------------- 1 | use super::{Block, Blocks, Chunks}; 2 | use crate::internal_data_structure::raw_bit_vector::RawBitVector; 3 | 4 | impl super::Blocks { 5 | /// Constructor. 6 | pub fn new(rbv: &RawBitVector, i_chunk: u64, this_chunk_size: u16) -> Blocks { 7 | let n = rbv.len(); 8 | let chunk_size = Chunks::calc_chunk_size(n); 9 | let block_size = Blocks::calc_block_size(n); 10 | let blocks_cnt = this_chunk_size / block_size as u16 11 | + if this_chunk_size % block_size as u16 == 0 { 12 | 0 13 | } else { 14 | 1 15 | }; 16 | 17 | let mut blocks: Vec = Vec::with_capacity(blocks_cnt as usize); 18 | for i_block in 0..(blocks_cnt as usize) { 19 | let i_rbv = i_chunk * chunk_size as u64 + i_block as u64 * block_size as u64; 20 | assert!(i_rbv < n); 21 | 22 | let this_block_size: u8 = if n - i_rbv >= block_size as u64 { 23 | block_size 24 | } else { 25 | (n - i_rbv) as u8 26 | }; 27 | 28 | let block_rbv = rbv.clone_sub(i_rbv, this_block_size as u64); 29 | let popcount_in_block = block_rbv.popcount() as u16; 30 | let block = Block::new( 31 | popcount_in_block 32 | + if i_block == 0 { 33 | 0 34 | } else { 35 | let block_left = &blocks[i_block - 1]; 36 | block_left.value() 37 | }, 38 | this_block_size, 39 | ); 40 | blocks.push(block); 41 | } 42 | 43 | Blocks { blocks, blocks_cnt } 44 | } 45 | 46 | /// Returns i-th block. 47 | /// 48 | /// # Panics 49 | /// When _`i` >= `self.blocks_cnt()`_. 50 | pub fn access(&self, i: u64) -> &Block { 51 | assert!( 52 | i <= self.blocks_cnt as u64, 53 | "i = {} must be smaller then {} (self.blocks_cnt())", 54 | i, 55 | self.blocks_cnt, 56 | ); 57 | &self.blocks[i as usize] 58 | } 59 | 60 | /// Returns size of 1 block: _(log N) / 2_ 61 | pub fn calc_block_size(n: u64) -> u8 { 62 | let lg2 = (n as f64).log2() as u8; 63 | let sz = lg2 / 2; 64 | if sz == 0 { 65 | 1 66 | } else { 67 | sz 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | branches: 3 | only: 4 | - master 5 | - /^v[0-9]/ # tag 6 | cache: cargo 7 | 8 | stages: 9 | - pr-check-fix 10 | - test 11 | - doc-bench-deploy 12 | 13 | # test stage 14 | rust: 15 | - 1.33.0 # pinned stable Rust release 16 | - stable 17 | #- beta 18 | #- nightly 19 | os: 20 | - linux 21 | - osx 22 | script: 23 | - cargo build --release --verbose --all 24 | - cargo test --release --verbose --all 25 | 26 | jobs: 27 | include: 28 | - stage: pr-check-fix 29 | rust: stable 30 | os: linux 31 | script: ./ci/pr-check-fix.sh 32 | 33 | - stage: doc-bench-deploy 34 | rust: stable 35 | os: linux 36 | 37 | # gnuplot for Criterion HTML Report 38 | addons: 39 | apt: 40 | packages: 41 | - gnuplot-nox 42 | 43 | script: 44 | - cargo doc 45 | - cargo bench --all 46 | ## Move criterion's HTML report into doc/ dir in order to be uploaded in github.io 47 | - rm -rf target/doc/criterion && mv target/criterion target/doc/ 48 | 49 | deploy: 50 | # GitHub Pages 51 | - provider: pages 52 | skip_cleanup: true 53 | github_token: "$GITHUB_TOKEN" 54 | local_dir: target/doc 55 | on: 56 | branch: master 57 | # crates.io 58 | - provider: cargo 59 | token: 60 | secure: T1PLtSay+QeZphz3UjOn1Pn7q0ojNbPMpzxsKimj0ZFtHe09w4mQYASB1hMr55hb7NvTme/cpPxU5KRj2DFs/UpdqoWqgAyuL6NVPTPnJglE9ZXTXPjGXbr3tiH9b24/xQw3Z3j01bzyW1VRHLRQJXzyDk9ykaN/GIm8hxH15wyVT9x5AqqQ6eT83d1LgkgQFkIAcvMaSJ/+MOYKrOeRL3olIH/zLqvkPJVs7zZm1U0Z1aqMJFwDuOlLpV4Tc9485+Gk0gOz04AuBDWOaQcAy4WICKhIK/d6jI2oYptPrkKZmTwGydtOMDdWpOR4TocrdldPdEe8CothZH6k1i52Q4rl0aU2TgVGJRL+qMy0sAlBkRvvtasC9viJg2UFjL6m5Kdbor9xadj8VpSZtq89TTIMHB8WHQJvJwrOlJZk4w8UYhGV5c5EOYrRii00o4VFy2k7bzMWV55SKJXPdDvcVYLBrbTFbXAJtLDQPZWr+w0YfTMP7oxTRR9FzscVDjvR7sszYI8epfpukjmOX32XDXmz0Y2h43Gux+rRqg7eIdjxQR4pgP5wDdxQsf7UZeUwQPPbTPuCwEgf0WJ1uGVgQ0aa0ka8YI2QClor+20io+ZnPdfIX25X9Ggl/Qj4w2/wAan2T7mhMLu3KAkomP0LUDZ+vhRcZbcpgDzU182etMQ= 61 | on: 62 | tags: true 63 | 64 | env: 65 | global: 66 | secure: R96BQcuzhTALH8I0HeP6ZhIsCpAc6X4vr9jmM7euIq7s0ztueZqx2Gorycpi9zfFsLPWvDOU3HNEiL7TRROsPWouOEfS+Sn52KISmYRSamv+wwoIe3c/SBxxLXFudWjxI9qGnXbsIZCWObQvvMGQ7a5yi+uRyrqn45Jgj0oXCvhLowo09CDz56PBud967J2BkU3JF+FV82QYFn/82fu5WfDRSxX3Br2owGyU0DyZ4KZTJ+JFmiVSehpOkzVY0sxCtFvGVNFLJEH0xvj5ARWiRofLz1riJYM4yG9q4XBYxUgeJRLJ8OKnusxySncRi1+GFppusX7EScjMRiHVLM3X0s5roalVC7vToYhaWHv2WH//R60nH5txsQtFZirkbAbZuPqTgsT/KPn1xNMNj7dhaD/T3xCOUXn0nAy61ufVlOUFfkPnAhbU4vm3Pjs5xGCpaOQAgr/qnaAR1BcHRarMjwzI1VCqOVjP3M9yYNR/fo6Zfm/ExhOTLSYxiN4XzrEQO3vvcjc1x/ex1V1n2Th4qwV6tvU7GMcyoDHqFxgVI5NW0hUmuDEpgS1dIvaTgrhUq2Mt9vz+BX5P7VbxyJ9Cmervf2BjKbzUVNNvYnswzU/N5GMX8FWe33jf9RyOMRkHlvEiw1ssC+BM+E+CJoX4vZ/AE3ek3C/sA2RaGiA2TOE= 67 | -------------------------------------------------------------------------------- /src/internal_data_structure/popcount_table.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "serde")] 2 | use serde::{Deserialize, Serialize}; 3 | 4 | #[cfg(feature = "mem_dbg")] 5 | use mem_dbg::{MemDbg, MemSize}; 6 | 7 | /// Cache table of `popcount` results. 8 | #[derive(Clone, Debug)] 9 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] 10 | #[cfg_attr(feature = "mem_dbg", derive(MemDbg, MemSize))] 11 | pub struct PopcountTable { 12 | bit_length: u8, 13 | 14 | /// `table[target_num] == target_num.popcount()` 15 | table: Vec, 16 | } 17 | 18 | impl PopcountTable { 19 | /// Constructor. 20 | /// 21 | /// Time-complexity: `O(bit_length)` (Assuming `u64::count_ones()` takes `O(1)`) 22 | /// Space-complexity: `O(bit_length)` 23 | /// 24 | /// `bit_length` must be in [1, 64]. 25 | /// 26 | /// # Panics 27 | /// When `bit_length` is out of [1, 64]. 28 | pub fn new(bit_length: u8) -> PopcountTable { 29 | assert!( 30 | (1..=64).contains(&bit_length), 31 | "bit_length (= {}) must be in [1, 64]", 32 | bit_length 33 | ); 34 | 35 | let table = (0..=(1 << bit_length) - 1) 36 | .map(|target: u64| target.count_ones() as u8) 37 | .collect(); 38 | PopcountTable { bit_length, table } 39 | } 40 | 41 | /// Returns the same value as `target.count_ones()` in `O(1)`. 42 | /// 43 | /// # Panics 44 | /// When `target` is out of [0, 2^ `self.bit_length` ). 45 | pub fn popcount(&self, target: u64) -> u8 { 46 | assert!( 47 | target <= ((1 << self.bit_length) - 1), 48 | "target = {} must be < 2^{}, while PopcountTable::bit_length = {}", 49 | target, 50 | self.bit_length, 51 | self.bit_length 52 | ); 53 | 54 | self.table[target as usize] 55 | } 56 | } 57 | 58 | #[cfg(test)] 59 | mod new_success_tests { 60 | // well-tested in popcount_success_tests 61 | } 62 | 63 | #[cfg(test)] 64 | mod new_failure_tests { 65 | use super::PopcountTable; 66 | 67 | #[test] 68 | #[should_panic] 69 | fn new_0() { 70 | let _ = PopcountTable::new(0); 71 | } 72 | 73 | #[test] 74 | #[should_panic] 75 | fn new_65() { 76 | let _ = PopcountTable::new(65); 77 | } 78 | } 79 | 80 | #[cfg(test)] 81 | mod popcount_success_tests { 82 | use super::PopcountTable; 83 | use std::ops::RangeInclusive; 84 | 85 | macro_rules! parameterized_tests { 86 | ($($name:ident: $value:expr,)*) => { 87 | $( 88 | #[test] 89 | fn $name() { 90 | let bit_length = $value; 91 | let tbl = PopcountTable::new(bit_length); 92 | 93 | let range: RangeInclusive = 0..= ((1 << bit_length) - 1); 94 | for target in range { 95 | assert_eq!(tbl.popcount(target), target.count_ones() as u8); 96 | } 97 | } 98 | )* 99 | } 100 | } 101 | 102 | parameterized_tests! { 103 | bit_length1: 1, 104 | bit_length2: 2, 105 | bit_length4: 4, 106 | bit_length8: 8, 107 | bit_length16: 16, 108 | // wants to test 32, 64 but takes too long time 109 | 110 | bit_length15: 15, 111 | bit_length17: 17, 112 | } 113 | } 114 | 115 | #[cfg(test)] 116 | mod popcount_failure_tests { 117 | use super::PopcountTable; 118 | 119 | macro_rules! parameterized_tests { 120 | ($($name:ident: $value:expr,)*) => { 121 | $( 122 | #[test] 123 | #[should_panic] 124 | fn $name() { 125 | let bit_length = $value; 126 | let tbl = PopcountTable::new(bit_length); 127 | let _ = tbl.popcount(1 << bit_length); 128 | } 129 | )* 130 | } 131 | } 132 | 133 | parameterized_tests! { 134 | bit_length1: 1, 135 | bit_length2: 2, 136 | bit_length4: 4, 137 | bit_length8: 8, 138 | bit_length16: 16, 139 | 140 | bit_length15: 15, 141 | bit_length17: 17, 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /tests/test.rs: -------------------------------------------------------------------------------- 1 | use fid_rs::Fid; 2 | 3 | #[test] 4 | fn from_str() { 5 | let fid = Fid::from("01"); 6 | assert_eq!(fid[0], false); 7 | assert_eq!(fid[1], true); 8 | } 9 | 10 | #[test] 11 | fn fuzzing_test() { 12 | let samples = 10000; 13 | 14 | fn access_from_bit_string(s: &str, i: u64) -> bool { 15 | s.chars().collect::>()[i as usize] == '1' 16 | } 17 | 18 | fn rank_from_bit_string(s: &str, i: u64) -> u64 { 19 | let chs = s.chars().collect::>(); 20 | let mut rank: u64 = 0; 21 | for j in 0..=i as usize { 22 | if chs[j] == '1' { 23 | rank += 1 24 | }; 25 | } 26 | rank 27 | } 28 | 29 | fn rank0_from_bit_string(s: &str, i: u64) -> u64 { 30 | let chs = s.chars().collect::>(); 31 | let mut rank0: u64 = 0; 32 | for j in 0..=i as usize { 33 | if chs[j] == '0' { 34 | rank0 += 1 35 | }; 36 | } 37 | rank0 38 | } 39 | 40 | fn select_from_bit_string(s: &str, num: u64) -> Option { 41 | if num == 0 { 42 | return Some(0); 43 | } 44 | 45 | let mut cnt: u64 = 0; 46 | for (i, ch) in s.chars().enumerate() { 47 | if ch == '1' { 48 | cnt += 1; 49 | } 50 | if cnt == num { 51 | return Some(i as u64); 52 | } 53 | } 54 | None 55 | } 56 | 57 | fn select0_from_bit_string(s: &str, num: u64) -> Option { 58 | if num == 0 { 59 | return Some(0); 60 | } 61 | 62 | let mut cnt: u64 = 0; 63 | for (i, ch) in s.chars().enumerate() { 64 | if ch == '0' { 65 | cnt += 1; 66 | } 67 | if cnt == num { 68 | return Some(i as u64); 69 | } 70 | } 71 | None 72 | } 73 | 74 | for _ in 0..samples { 75 | let s = &format!("{:b}", rand::random::()); 76 | eprintln!("build(): bit vec = \"{}\"", s); 77 | 78 | let fid = Fid::from(s.as_str()); 79 | 80 | for i in 0..s.len() { 81 | eprintln!("[] op: bit vec = \"{}\", i = {}, ", s, i); 82 | assert_eq!( 83 | fid[i as u64], 84 | access_from_bit_string(s, i as u64), 85 | "bit vec = \"{}\", i={}, Index()={}, access_from_bit_string={}", 86 | s, 87 | i, 88 | fid[i as u64], 89 | access_from_bit_string(s, i as u64) 90 | ); 91 | 92 | eprintln!("rank(): bit vec = \"{}\", i = {}, ", s, i); 93 | assert_eq!( 94 | fid.rank(i as u64), 95 | rank_from_bit_string(s, i as u64), 96 | "bit vec = \"{}\", i={}, Fid::rank()={}, rank_from_bit_string={}", 97 | s, 98 | i, 99 | fid.rank(i as u64), 100 | rank_from_bit_string(s, i as u64) 101 | ); 102 | 103 | let num = i as u64; 104 | eprintln!("select(): bit vec = \"{}\", num = {}, ", s, num); 105 | assert_eq!( 106 | fid.select(num), 107 | select_from_bit_string(s, num), 108 | "bit vec = \"{}\", num={}, Fid::select()={:?}, select_from_bit_string={:?}", 109 | s, 110 | num, 111 | fid.select(num), 112 | select_from_bit_string(s, num) 113 | ); 114 | 115 | eprintln!("rank0(): bit vec = \"{}\", i = {}, ", s, i); 116 | assert_eq!( 117 | fid.rank0(i as u64), 118 | rank0_from_bit_string(s, i as u64), 119 | "bit vec = \"{}\", i={}, Fid::rank0()={}, rank0_from_bit_string={}", 120 | s, 121 | i, 122 | fid.rank0(i as u64), 123 | rank0_from_bit_string(s, i as u64) 124 | ); 125 | 126 | let num = i as u64; 127 | eprintln!("select0(): bit vec = \"{}\", num = {}, ", s, num); 128 | assert_eq!( 129 | fid.select0(num), 130 | select0_from_bit_string(s, num), 131 | "bit vec = \"{}\", num={}, Fid::select0()={:?}, select0_from_bit_string={:?}", 132 | s, 133 | num, 134 | fid.select0(num), 135 | select0_from_bit_string(s, num) 136 | ); 137 | } 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /benches/bench.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate criterion; 3 | 4 | use criterion::Criterion; 5 | use std::time::Duration; 6 | 7 | fn c() -> Criterion { 8 | Criterion::default() 9 | .sample_size(10) // must be >= 10 for Criterion v0.3 10 | .warm_up_time(Duration::from_secs(1)) 11 | .with_plots() 12 | } 13 | 14 | fn git_hash() -> String { 15 | use std::process::Command; 16 | let output = Command::new("git") 17 | .args(&["rev-parse", "--short", "HEAD"]) 18 | .output() 19 | .unwrap(); 20 | String::from(String::from_utf8(output.stdout).unwrap().trim()) 21 | } 22 | 23 | mod fid { 24 | use criterion::{BatchSize, Criterion}; 25 | use fid_rs::Fid; 26 | 27 | const NS: [u64; 5] = [1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20]; 28 | 29 | pub fn from_str_benchmark(_: &mut Criterion) { 30 | super::c().bench_function_over_inputs( 31 | &format!( 32 | "[{}] Fid::from(\"00...(repeated N-times)\")", 33 | super::git_hash() 34 | ), 35 | |b, &&n| { 36 | b.iter_batched( 37 | || String::from_utf8(vec!['0' as u8; n as usize]).unwrap(), 38 | |s| Fid::from(s.as_str()), 39 | BatchSize::SmallInput, 40 | ) 41 | }, 42 | &NS, 43 | ); 44 | } 45 | 46 | pub fn from_slice_benchmark(_: &mut Criterion) { 47 | super::c().bench_function_over_inputs( 48 | &format!("[{}] Fid::from(&[false; N])", super::git_hash()), 49 | |b, &&n| { 50 | b.iter_batched( 51 | || vec![false; n as usize], 52 | |v| Fid::from(&v[..]), 53 | BatchSize::SmallInput, 54 | ) 55 | }, 56 | &NS, 57 | ); 58 | } 59 | 60 | pub fn rank_benchmark(_: &mut Criterion) { 61 | let times = 1_000_000; 62 | 63 | super::c().bench_function_over_inputs( 64 | &format!("[{}] Fid::rank(N) {} times", super::git_hash(), times), 65 | move |b, &&n| { 66 | b.iter_batched( 67 | || { 68 | let v = vec![false; n as usize]; 69 | Fid::from(&v[..]) 70 | }, 71 | |fid| { 72 | // iter_batched() does not properly time `routine` time when `setup` time is far longer than `routine` time. 73 | // Tested function takes too short compared to build(). So loop many times. 74 | for _ in 0..times { 75 | assert_eq!(fid.rank(n - 1), 0); 76 | } 77 | }, 78 | BatchSize::SmallInput, 79 | ) 80 | }, 81 | &NS, 82 | ); 83 | } 84 | 85 | pub fn select_benchmark(_: &mut Criterion) { 86 | let times = 1_000; 87 | 88 | super::c().bench_function_over_inputs( 89 | &format!("[{}] Fid::select(N) {} times", super::git_hash(), times), 90 | move |b, &&n| { 91 | b.iter_batched( 92 | || { 93 | let v = vec![true; n as usize]; 94 | Fid::from(&v[..]) 95 | }, 96 | |fid| { 97 | // iter_batched() does not properly time `routine` time when `setup` time is far longer than `routine` time. 98 | // Tested function takes too short compared to build(). So loop many times. 99 | for _ in 0..times { 100 | assert_eq!(fid.select(n - 1), Some(n - 2)); 101 | } 102 | }, 103 | BatchSize::SmallInput, 104 | ) 105 | }, 106 | &NS, 107 | ); 108 | } 109 | 110 | pub fn rank0_benchmark(_: &mut Criterion) { 111 | let times = 1_000_000; 112 | 113 | super::c().bench_function_over_inputs( 114 | &format!("[{}] Fid::rank0(N) {} times", super::git_hash(), times), 115 | move |b, &&n| { 116 | b.iter_batched( 117 | || { 118 | let v = vec![false; n as usize]; 119 | Fid::from(&v[..]) 120 | }, 121 | |fid| { 122 | // iter_batched() does not properly time `routine` time when `setup` time is far longer than `routine` time. 123 | // Tested function takes too short compared to build(). So loop many times. 124 | for _ in 0..times { 125 | assert_eq!(fid.rank0(n - 1), n); 126 | } 127 | }, 128 | BatchSize::SmallInput, 129 | ) 130 | }, 131 | &NS, 132 | ); 133 | } 134 | 135 | pub fn select0_benchmark(_: &mut Criterion) { 136 | let times = 1_000; 137 | 138 | super::c().bench_function_over_inputs( 139 | &format!("[{}] Fid::select0(N) {} times", super::git_hash(), times), 140 | move |b, &&n| { 141 | b.iter_batched( 142 | || { 143 | let v = vec![false; n as usize]; 144 | Fid::from(&v[..]) 145 | }, 146 | |fid| { 147 | // iter_batched() does not properly time `routine` time when `setup` time is far longer than `routine` time. 148 | // Tested function takes too short compared to build(). So loop many times. 149 | for _ in 0..times { 150 | assert_eq!(fid.select0(n - 1), Some(n - 2)); 151 | } 152 | }, 153 | BatchSize::SmallInput, 154 | ) 155 | }, 156 | &NS, 157 | ); 158 | } 159 | } 160 | 161 | criterion_group!( 162 | benches, 163 | fid::from_str_benchmark, 164 | fid::from_slice_benchmark, 165 | fid::rank_benchmark, 166 | fid::select_benchmark, 167 | fid::rank0_benchmark, 168 | fid::select0_benchmark, 169 | ); 170 | criterion_main!(benches); 171 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fid-rs 2 | 3 | High performance FID (Fully Indexable Dictionary) library. 4 | 5 | [Master API Docs](https://laysakura.github.io/fid-rs/fid_rs/) 6 | | 7 | [Released API Docs](https://docs.rs/crate/fid-rs) 8 | | 9 | [Benchmark Results](https://laysakura.github.io/fid-rs/criterion/report/) 10 | | 11 | [Changelog](https://github.com/laysakura/fid-rs/blob/master/CHANGELOG.md) 12 | 13 | [![GitHub Actions Status](https://github.com/laysakura/fid-rs/actions/workflows/clippy.yml/badge.svg)](https://github.com/laysakura/fid-rs/actions) 14 | [![Travis Status](https://travis-ci.com/laysakura/fid-rs.svg?branch=master)](https://travis-ci.com/laysakura/fid-rs) 15 | [![Crates.io Version](https://img.shields.io/crates/v/fid-rs.svg)](https://crates.io/crates/fid-rs) 16 | [![Crates.io Downloads](https://img.shields.io/crates/d/fid-rs.svg)](https://crates.io/crates/fid-rs) 17 | [![Minimum rustc version](https://img.shields.io/badge/rustc-1.33+-lightgray.svg)](https://github.com/laysakura/fid-rs#rust-version-supports) 18 | [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/laysakura/fid-rs/blob/master/LICENSE-MIT) 19 | [![License: Apache 2.0](https://img.shields.io/badge/license-Apache_2.0-blue.svg)](https://github.com/laysakura/fid-rs/blob/master/LICENSE-APACHE) 20 | 21 | ## Quickstart 22 | 23 | To use fid-rs, add the following to your `Cargo.toml` file: 24 | 25 | ```toml 26 | [dependencies] 27 | fid-rs = "0.1" # NOTE: Replace to latest minor version. 28 | ``` 29 | 30 | ### Usage Overview 31 | 32 | ```rust 33 | use fid_rs::Fid; 34 | 35 | let fid = Fid::from("0100_1"); // Tips: Fid::from::<&str>() ignores '_'. 36 | 37 | // Basic operations --------------------- 38 | assert_eq!(fid[0], false); // [0]1001; 0th bit is '0' (false) 39 | assert_eq!(fid[1], true); // 0[1]001; 1st bit is '1' (true) 40 | assert_eq!(fid[4], true); // 0100[1]; 4th bit is '1' (true) 41 | 42 | assert_eq!(fid.rank(0), 0); // [0]1001; Range [0, 0] has no '1' 43 | assert_eq!(fid.rank(3), 1); // [0100]1; Range [0, 3] has 1 '1' 44 | assert_eq!(fid.rank(4), 2); // [01001]; Range [0, 4] has 2 '1's 45 | 46 | assert_eq!(fid.select(0), Some(0)); // []01001; Minimum i where range [0, i] has 0 '1's is i=0 47 | assert_eq!(fid.select(1), Some(1)); // 0[1]001; Minimum i where range [0, i] has 1 '1's is i=1 48 | assert_eq!(fid.select(2), Some(4)); // 0100[1]; Minimum i where range [0, i] has 2 '1's is i=4 49 | assert_eq!(fid.select(3), None); // There is no i where range [0, i] has 3 '1's 50 | 51 | // rank0, select0 ----------------------- 52 | assert_eq!(fid.rank0(0), 1); // [0]1001; Range [0, 0] has no '0' 53 | assert_eq!(fid.rank0(3), 3); // [0100]1; Range [0, 3] has 3 '0's 54 | assert_eq!(fid.rank0(4), 3); // [01001]; Range [0, 4] has 3 '0's 55 | 56 | assert_eq!(fid.select0(0), Some(0)); // []01001; Minimum i where range [0, i] has 0 '0's is i=0 57 | assert_eq!(fid.select0(1), Some(0)); // [0]1001; Minimum i where range [0, i] has 1 '0's is i=0 58 | assert_eq!(fid.select0(2), Some(2)); // 01[0]01; Minimum i where range [0, i] has 2 '0's is i=2 59 | assert_eq!(fid.select0(4), None); // There is no i where range [0, i] has 4 '0's 60 | ``` 61 | 62 | ### Constructors 63 | 64 | ```rust 65 | use fid_rs::Fid; 66 | 67 | // Most human-friendly way: Fid::from::<&str>() 68 | let fid = Fid::from("0100_1"); 69 | 70 | // Complex construction in simple way: Fid::from::<&[bool]>() 71 | let mut arr = [false; 5]; 72 | arr[1] = true; 73 | arr[4] = true; 74 | let fid = Fid::from(&arr[..]); 75 | ``` 76 | 77 | ### Iterator 78 | 79 | ```rust 80 | use fid_rs::Fid; 81 | 82 | let fid = Fid::from("0100_1"); 83 | 84 | for bit in fid.iter() { 85 | println!("{}", bit); 86 | } 87 | // => 88 | // false 89 | // true 90 | // false 91 | // false 92 | // true 93 | ``` 94 | 95 | ### Utility Methods 96 | 97 | ```rust 98 | use fid_rs::Fid; 99 | 100 | let fid = Fid::from("0100_1"); 101 | 102 | assert_eq!(fid.len(), 5); 103 | ``` 104 | 105 | ## Features 106 | 107 | - **Arbitrary length support with minimum working memory**: fid-rs provides virtually _arbitrary size_ of FID. It is carefully designed to use as small memory space as possible. 108 | - **Parallel build of FID**: Build operations (`Fid::from()`) takes _O(N)_ time. It is parallelized and achieves nearly optimal scale-out. 109 | - **No memory copy while/after build operations**: After internally creating bit vector representation, any operation does not do memory copy. 110 | - **Latest benchmark results are always accessible**: fid-rs is continuously benchmarked in Travis CI using [Criterion.rs](https://crates.io/crates/criterion). Graphical benchmark results are published [here](https://laysakura.github.io/fid-rs/criterion/report/). 111 | 112 | ### Complexity 113 | 114 | When the length of a `Fid` is _N_: 115 | 116 | | Operation | Time-complexity | Space-complexity | 117 | |-----------|-----------------|------------------| 118 | | [Fid::from::<&str>()](https://laysakura.github.io/fid-rs/fid_rs/fid/struct.Fid.html#implementations) | _O(N)_ | _N + o(N)_ | 119 | | [Fid::from::<&[bool]>()](https://laysakura.github.io/fid-rs/fid_rs/fid/struct.Fid.html#implementations) | _O(N)_ | _N + o(N)_ | 120 | | [Index<u64>](https://laysakura.github.io/fid-rs/fid_rs/fid/struct.Fid.html#impl-Index) | _O(1)_ | _0_ | 121 | | [Fid::rank()](https://laysakura.github.io/fid-rs/fid_rs/fid/struct.Fid.html#method.rank) | _O(1)_ | _O(1)_ | 122 | | [Fid::rank0()](https://laysakura.github.io/fid-rs/fid_rs/fid/struct.Fid.html#method.rank0) | _O(1)_ | _O(1)_ | 123 | | [Fid::select()](https://laysakura.github.io/fid-rs/fid_rs/fid/struct.Fid.html#method.select) | _O(log N)_ | _O(1)_ | 124 | | [Fid::select0()](https://laysakura.github.io/fid-rs/fid_rs/fid/struct.Fid.html#method.select0) | _O(log N)_ | _O(1)_ | 125 | 126 | (Actually, `select()`'s time-complexity can be _O(1)_ with complex implementation but fid-rs, like many other libraries, uses binary search of `rank()`'s result). 127 | 128 | ## Versions 129 | fid-rs uses [semantic versioning](http://semver.org/spec/v2.0.0.html). 130 | 131 | Since current major version is _0_, minor version update might involve breaking public API change (although it is carefully avoided). 132 | 133 | ## Rust Version Supports 134 | 135 | fid-rs is continuously tested with these Rust versions in Travis CI: 136 | 137 | - 1.33.0 138 | - Latest stable version 139 | - Beta version 140 | - Nightly build 141 | 142 | So it expectedly works with Rust 1.33.0 and any newer versions. 143 | 144 | Older versions may also work, but are not tested or guaranteed. 145 | 146 | ## Contributing 147 | 148 | Any kind of pull requests are appreciated. 149 | 150 | ### Guidelines 151 | 152 | - `README.md` is generated from `$ cargo readme` command. Do not manually update `README.md` but edit `src/lib.rs` and then `$ cargo readme > README.md`. 153 | - Travis CI automatically does the following commit & push to your pull-requests: 154 | - `$ cargo readme > README.md` 155 | - `$ cargo fmt --all` 156 | 157 | ## License 158 | 159 | MIT OR Apache-2.0 160 | -------------------------------------------------------------------------------- /src/fid.rs: -------------------------------------------------------------------------------- 1 | mod block; 2 | mod blocks; 3 | mod chunk; 4 | mod chunks; 5 | mod fid_impl; 6 | mod fid_iter; 7 | 8 | use super::internal_data_structure::popcount_table::PopcountTable; 9 | 10 | #[cfg(feature = "serde")] 11 | use serde::{Deserialize, Serialize}; 12 | 13 | #[cfg(feature = "mem_dbg")] 14 | use mem_dbg::{MemDbg, MemSize}; 15 | 16 | /// FID (Fully Indexable Dictionary). 17 | /// 18 | /// This class can handle bit sequence of virtually **arbitrary length.** 19 | /// 20 | /// In fact, _N_ (FID's length) is designed to be limited to: _N <= 2^64_.
21 | /// It should be enough for almost all usecases since a binary data of length of _2^64_ consumes _2^21 = 2,097,152_ TB (terabyte), which is hard to handle by state-of-the-art computer architecture. 22 | /// 23 | /// # Implementation detail 24 | /// [Index<u64>](#impl-Index)'s implementation is trivial. 25 | /// 26 | /// [select()](#method.select) just uses binary search of `rank()` results. 27 | /// 28 | /// [rank()](#method.rank)'s implementation is standard but non-trivial. 29 | /// So here explains implementation of _rank()_. 30 | /// 31 | /// ## [rank()](#method.rank)'s implementation 32 | /// Say you have the following bit vector. 33 | /// 34 | /// ```text 35 | /// 00001000 01000001 00000100 11000000 00100000 00000101 10100000 00010000 001 ; (N=67) 36 | /// ``` 37 | /// 38 | /// Answer _rank(48)_ in _O(1)_ time-complexity and _o(N)_ space-complexity. 39 | /// 40 | /// Naively, you can count the number of '1' from left to right. 41 | /// You will find _rank(48) == 10_ but it took _O(N)_ time-complexity. 42 | /// 43 | /// To reduce time-complexity to _O(1)_, you can use _memonization_ technique.
44 | /// Of course, you can memonize results of _rank(i)_ for every _i ([0, N-1])_. 45 | /// 46 | /// ```text 47 | /// Bit vector; 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 [1] 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 ; (N=67) 48 | /// Memo rank(i); 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 3 3 3 3 3 3 4 4 4 5 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 8 8 9 10 10 11 11 11 11 11 11 11 11 11 12 12 12 12 12 12 12 13 49 | /// ``` 50 | /// 51 | /// From this memo, you can answer _rank(48) == 10_ in constant time, although space-complexity for this memo is _O(N) > o(N)_. 52 | /// 53 | /// To reduce space-complexity using memonization, we divide the bit vector into **Chunk** and **Block**. 54 | /// 55 | /// ```text 56 | /// Bit vector; 00001000 01000001 00000100 11000000 00100000 00000101 [1]0100000 00010000 001 ; (N=67) 57 | /// Chunk; | 7 | 13 | ; (size = (log N)^2 = 36) 58 | /// Block; |0 |1 |1 |2 |2 |3 |3 |4 |6 |6 |6 |7 |0 |0 |0 |2 |4 |4 |4 |5 |5 |5 |6| ; (size = (log N) / 2 = 3) 59 | /// ``` 60 | /// 61 | /// - A **Chunk** has size of _(log N)^2_. Its value is _rank(index of the last bit of the chunk)_. 62 | /// - A **Block** has size of _(log N) / 2_. A chunk has many blocks. Block's value is the number of '1's in _[index of the first bit of the chunk the block belongs to, index of the last bit of the block]_ (note that the value is reset to 0 at the first bit of a chunk). 63 | /// 64 | /// Now you want to answer _rank(48)_. 48-th bit is in the 2nd chunk, and in the 5th block in the chunk.
65 | /// So the _rank(48)_ is at least: 66 | /// 67 | /// _7 (value of 1st chunk) + 2 (value of 4th block in the 2nd chunk)_ 68 | /// 69 | /// Then, focus on 3 bits in 5th block in the 2nd chunk; `[1]01`.
70 | /// As you can see, only 1 '1' is included up to 48-th bit (`101` has 2 '1's but 2nd '1' is 50-th bit, irrelevant to _rank(48)_). 71 | /// 72 | /// Therefore, the _rank(48)_ is calculated as: 73 | /// 74 | /// _7 (value of 1st chunk) + 2 (value of 4th block in the 2nd chunk) + 1 ('1's in 5th block up to 48-th bit)_ 75 | /// 76 | /// OK. That's all... Wait!
77 | /// _rank()_ must be in _O(1)_ time-complexity. 78 | /// 79 | /// - _7 (value of 1st chunk)_: _O(1)_ if you store chunk value in array structure. 80 | /// - _2 (value of 4th block in the 2nd chunk)_: Same as above. 81 | /// - _1 ('1's in 5th block up to 48-th bit)_: **_O(length of block) = O(log N)_** ! 82 | /// 83 | /// Counting '1's in a block must also be _O(1)_, while using _o(N)_ space.
84 | /// We use **Table** for this purpose. 85 | /// 86 | /// | Block content | Number of '1's in block | 87 | /// |---------------|-------------------------| 88 | /// | `000` | 0 | 89 | /// | `001` | 1 | 90 | /// | `010` | 1 | 91 | /// | `011` | 2 | 92 | /// | `100` | 1 | 93 | /// | `101` | 2 | 94 | /// | `110` | 2 | 95 | /// | `111` | 3 | 96 | /// 97 | /// This table is constructed in `build()`. So we can find the number of '1's in block in _O(1)_ time.
98 | /// Note that this table has _O(log N) = o(N)_ length. 99 | /// 100 | /// In summary: 101 | /// 102 | /// _rank() = (value of left chunk) + (value of left block) + (value of table keyed by inner block bits)_. 103 | #[derive(Clone, Debug)] 104 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] 105 | #[cfg_attr(feature = "mem_dbg", derive(MemDbg, MemSize))] 106 | pub struct Fid { 107 | /// Raw data. 108 | byte_vec: Vec, 109 | 110 | /// Bit length 111 | bit_len: u64, 112 | 113 | /// Total popcount of _[0, last bit of the chunk]_. 114 | /// 115 | /// Each chunk takes _2^64_ at max (when every bit is '1' for bit vector of length of _2^64_). 116 | /// A chunk has blocks. 117 | chunks: Chunks, 118 | 119 | /// Table to calculate inner-block `rank()` in _O(1)_. 120 | table: PopcountTable, 121 | } 122 | 123 | pub struct FidIter<'iter> { 124 | fid: &'iter Fid, 125 | i: u64, 126 | } 127 | 128 | /// Collection of Chunk. 129 | #[derive(Clone, Debug)] 130 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] 131 | #[cfg_attr(feature = "mem_dbg", derive(MemDbg, MemSize))] 132 | struct Chunks { 133 | chunks: Vec, 134 | chunks_cnt: u64, 135 | } 136 | 137 | /// Total popcount of _[0, last bit of the chunk]_ of a bit vector. 138 | /// 139 | /// Each chunk takes _2^64_ at max (when every bit is '1' for Fid of length of _2^64_). 140 | #[derive(Clone, Debug)] 141 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] 142 | #[cfg_attr(feature = "mem_dbg", derive(MemDbg, MemSize))] 143 | struct Chunk { 144 | value: u64, // popcount 145 | blocks: Blocks, 146 | } 147 | 148 | /// Collection of Block in a Chunk. 149 | #[derive(Clone, Debug)] 150 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] 151 | #[cfg_attr(feature = "mem_dbg", derive(MemDbg, MemSize))] 152 | struct Blocks { 153 | blocks: Vec, 154 | blocks_cnt: u16, 155 | } 156 | 157 | /// Total popcount of _[_first bit of the chunk which the block belongs to_, _last bit of the block_]_ of a bit vector. 158 | /// 159 | /// Each block takes (log 2^64)^2 = 64^2 = 2^16 at max (when every bit in a chunk is 1 for Fid of length of 2^64) 160 | #[derive(Clone, Debug)] 161 | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] 162 | #[cfg_attr(feature = "mem_dbg", derive(MemDbg, MemSize))] 163 | struct Block { 164 | value: u16, // popcount 165 | length: u8, 166 | } 167 | -------------------------------------------------------------------------------- /src/fid/chunks.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "rayon")] 2 | use rayon::prelude::*; 3 | 4 | use super::{Chunk, Chunks}; 5 | use crate::internal_data_structure::raw_bit_vector::RawBitVector; 6 | 7 | impl super::Chunks { 8 | /// Constructor. 9 | #[cfg(feature = "rayon")] 10 | pub fn new(rbv: &RawBitVector) -> Chunks { 11 | let n = rbv.len(); 12 | let chunk_size: u16 = Chunks::calc_chunk_size(n); 13 | let chunks_cnt: usize = Chunks::calc_chunks_cnt(n) as usize; 14 | 15 | // In order to use chunks.par_iter_mut(), chunks should have len first. 16 | // So fill meaning less None value. 17 | let mut chunks: Vec = Vec::with_capacity(chunks_cnt); 18 | 19 | // Parallel - Each chunk has its popcount. 20 | // Actually, chunk should have total popcount from index 0 but it is calculated later in sequential manner. 21 | (0..chunks_cnt) 22 | .into_par_iter() 23 | .map(|number_of_chunk| { 24 | let this_chunk_size: u16 = if number_of_chunk == chunks_cnt - 1 { 25 | // When `chunk_size == 6`: 26 | // 27 | // 000 111 000 11 : rbv 28 | // | | | : chunks 29 | // 30 | // Here, when `i_chunk == 1` (targeting on last '00011' chunk), 31 | // `this_chunk_size == 5` 32 | let chunk_size_or_0 = (n % chunk_size as u64) as u16; 33 | if chunk_size_or_0 == 0 { 34 | chunk_size 35 | } else { 36 | chunk_size_or_0 37 | } 38 | } else { 39 | chunk_size 40 | }; 41 | 42 | let chunk_rbv = rbv.clone_sub( 43 | number_of_chunk as u64 * chunk_size as u64, 44 | this_chunk_size as u64, 45 | ); 46 | 47 | let popcnt_in_chunk = chunk_rbv.popcount(); 48 | Chunk::new( 49 | popcnt_in_chunk, 50 | this_chunk_size, 51 | rbv, 52 | number_of_chunk as u64, 53 | ) 54 | }) 55 | .collect_into_vec(&mut chunks); 56 | 57 | // Sequential - Each chunk has total popcount from index 0. 58 | for i_chunk in 0..chunks_cnt { 59 | chunks[i_chunk].value += if i_chunk == 0 { 60 | 0 61 | } else { 62 | chunks[i_chunk - 1].value 63 | } 64 | } 65 | Chunks { 66 | chunks, 67 | chunks_cnt: chunks_cnt as u64, 68 | } 69 | } 70 | 71 | /// Constructor. 72 | #[cfg(not(feature = "rayon"))] 73 | pub fn new(rbv: &RawBitVector) -> Chunks { 74 | let n = rbv.len(); 75 | let chunk_size: u16 = Chunks::calc_chunk_size(n); 76 | let chunks_cnt: u64 = Chunks::calc_chunks_cnt(n); 77 | 78 | let mut chunks: Vec = Vec::with_capacity(chunks_cnt as usize); 79 | let mut comulative_popcount = 0; 80 | 81 | for i_chunk in 0..chunks_cnt { 82 | let this_chunk_size: u16 = if i_chunk == chunks_cnt - 1 { 83 | // When `chunk_size == 6`: 84 | // 85 | // 000 111 000 11 : rbv 86 | // | | | : chunks 87 | // 88 | // Here, when `i_chunk == 1` (targeting on last '00011' chunk), 89 | // `this_chunk_size == 5` 90 | let chunk_size_or_0 = (n % chunk_size as u64) as u16; 91 | if chunk_size_or_0 == 0 { 92 | chunk_size 93 | } else { 94 | chunk_size_or_0 95 | } 96 | } else { 97 | chunk_size 98 | }; 99 | 100 | let chunk_rbv = rbv.clone_sub(i_chunk * chunk_size as u64, this_chunk_size as u64); 101 | 102 | let popcnt_in_chunk = chunk_rbv.popcount(); 103 | comulative_popcount += popcnt_in_chunk; 104 | chunks.push(Chunk::new( 105 | comulative_popcount, 106 | this_chunk_size, 107 | rbv, 108 | i_chunk, 109 | )); 110 | } 111 | 112 | Chunks { chunks, chunks_cnt } 113 | } 114 | 115 | /// Returns size of 1 chunk: _(log N)^2_. 116 | pub fn calc_chunk_size(n: u64) -> u16 { 117 | let lg2 = (n as f64).log2() as u16; 118 | let sz = lg2 * lg2; 119 | if sz == 0 { 120 | 1 121 | } else { 122 | sz 123 | } 124 | } 125 | 126 | /// Returns count of chunks: _N / (log N)^2_. 127 | /// 128 | /// At max: N / (log N)^2 = 2^64 / 64^2 = 2^(64-12) 129 | pub fn calc_chunks_cnt(n: u64) -> u64 { 130 | let chunk_size = Chunks::calc_chunk_size(n); 131 | n / (chunk_size as u64) + if n % (chunk_size as u64) == 0 { 0 } else { 1 } 132 | } 133 | 134 | /// Returns i-th chunk. 135 | /// 136 | /// # Panics 137 | /// When _`i` >= `self.chunks_cnt()`_. 138 | pub fn access(&self, i: u64) -> &Chunk { 139 | assert!( 140 | i <= self.chunks_cnt, 141 | "i = {} must be smaller then {} (self.chunks_cnt())", 142 | i, 143 | self.chunks_cnt 144 | ); 145 | &self.chunks[i as usize] 146 | } 147 | } 148 | 149 | #[cfg(test)] 150 | mod new_success_tests { 151 | use super::Chunks; 152 | use crate::internal_data_structure::raw_bit_vector::RawBitVector; 153 | 154 | struct Input<'a> { 155 | byte_slice: &'a [u8], 156 | last_byte_len: u8, 157 | expected_chunk_size: u16, 158 | expected_chunks: &'a Vec, 159 | } 160 | 161 | macro_rules! parameterized_tests { 162 | ($($name:ident: $value:expr,)*) => { 163 | $( 164 | #[test] 165 | fn $name() { 166 | let input: Input = $value; 167 | let rbv = RawBitVector::new(input.byte_slice, 0, input.last_byte_len); 168 | let n = rbv.len(); 169 | let chunks = Chunks::new(&rbv); 170 | 171 | assert_eq!(Chunks::calc_chunk_size(n), input.expected_chunk_size); 172 | assert_eq!(Chunks::calc_chunks_cnt(n), input.expected_chunks.len() as u64); 173 | for (i, expected_chunk) in input.expected_chunks.iter().enumerate() { 174 | let chunk = chunks.access(i as u64); 175 | assert_eq!(chunk.value(), *expected_chunk); 176 | } 177 | } 178 | )* 179 | } 180 | } 181 | 182 | parameterized_tests! { 183 | t1: Input { 184 | // N = 1, (log_2(N))^2 = 1 185 | byte_slice: &[0b0000_0000], 186 | last_byte_len: 1, 187 | expected_chunk_size: 1, 188 | expected_chunks: &vec!(0) 189 | }, 190 | t2: Input { 191 | // N = 1, (log_2(N))^2 = 1 192 | byte_slice: &[0b1000_0000], 193 | last_byte_len: 1, 194 | expected_chunk_size: 1, 195 | expected_chunks: &vec!(1) 196 | }, 197 | t3: Input { 198 | // N = 2^2, (log_2(N))^2 = 4 199 | byte_slice: &[0b0111_0000], 200 | last_byte_len: 4, 201 | expected_chunk_size: 4, 202 | expected_chunks: &vec!(3) 203 | }, 204 | t4: Input { 205 | // N = 2^3, (log_2(N))^2 = 9 206 | byte_slice: &[0b0111_1101], 207 | last_byte_len: 8, 208 | expected_chunk_size: 9, 209 | expected_chunks: &vec!(6) 210 | }, 211 | t5: Input { 212 | // N = 2^3 + 1, (log_2(N))^2 = 9 213 | byte_slice: &[0b0111_1101, 0b1000_0000], 214 | last_byte_len: 1, 215 | expected_chunk_size: 9, 216 | expected_chunks: &vec!(7) 217 | }, 218 | t6: Input { 219 | // N = 2^3 + 2, (log_2(N))^2 = 9 220 | byte_slice: &[0b0111_1101, 0b1100_0000], 221 | last_byte_len: 2, 222 | expected_chunk_size: 9, 223 | expected_chunks: &vec!(7, 8) 224 | }, 225 | 226 | bugfix_11: Input { 227 | // N = 2^1, (log_2(N))^2 = 4 228 | byte_slice: &[0b1100_0000], 229 | last_byte_len: 2, 230 | expected_chunk_size: 1, 231 | expected_chunks: &vec!(1, 2) 232 | }, 233 | bugfix_11110110_11010101_01000101_11101111_10101011_10100101_01100011_00110100_01010101_10010000_01001100_10111111_00110011_00111110_01110101_11011100: Input { 234 | // N = 8 * 16 = 2^7, (log_2(N))^2 = 49 235 | byte_slice: &[0b11110110, 0b11010101, 0b01000101, 0b11101111, 0b10101011, 0b10100101, 0b0_1100011, 0b00110100, 0b01010101, 0b10010000, 0b01001100, 0b10111111, 0b00_110011, 0b00111110, 0b01110101, 0b11011100], 236 | last_byte_len: 8, 237 | expected_chunk_size: 49, 238 | expected_chunks: &vec!(30, 53, 72) 239 | }, 240 | } 241 | } 242 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2019 Sho Nakatani 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /src/fid/fid_impl.rs: -------------------------------------------------------------------------------- 1 | use super::{Blocks, Chunks, Fid}; 2 | use crate::internal_data_structure::popcount_table::PopcountTable; 3 | use crate::internal_data_structure::raw_bit_vector::RawBitVector; 4 | use std::ops::Index; 5 | 6 | impl From<&str> for Fid { 7 | /// Constructor from string representation of bit sequence. 8 | /// 9 | /// - '0' is interpreted as _0_. 10 | /// - '1' is interpreted as _1_. 11 | /// - '_' is just ignored. 12 | /// 13 | /// # Examples 14 | /// ``` 15 | /// use fid_rs::Fid; 16 | /// 17 | /// let fid = Fid::from("01_11"); 18 | /// assert_eq!(fid[0], false); 19 | /// assert_eq!(fid[1], true); 20 | /// assert_eq!(fid[2], true); 21 | /// assert_eq!(fid[3], true); 22 | /// ``` 23 | /// 24 | /// # Panics 25 | /// When: 26 | /// - `s` contains any character other than '0', '1', and '_'. 27 | /// - `s` does not contain any '0' or '1' 28 | fn from(s: &str) -> Self { 29 | let bits: Vec = s 30 | .as_bytes() 31 | .iter() 32 | .filter_map(|c| match c { 33 | 48 /* '0' */ => Some(false), 34 | 49 /* '1' */ => Some(true), 35 | 95 /* '_' */ => None, 36 | _ => panic!("`s` must consist of '0' or '1'. '{}' included.", c), 37 | }) 38 | .collect(); 39 | Self::from(&bits[..]) 40 | } 41 | } 42 | 43 | impl From<&[bool]> for Fid { 44 | /// Constructor from slice of boolean. 45 | /// 46 | /// # Examples 47 | /// ``` 48 | /// use fid_rs::Fid; 49 | /// 50 | /// let bits = [false, true, true, true]; 51 | /// let fid = Fid::from(&bits[..]); 52 | /// assert_eq!(fid[0], false); 53 | /// assert_eq!(fid[1], true); 54 | /// assert_eq!(fid[2], true); 55 | /// assert_eq!(fid[3], true); 56 | /// ``` 57 | /// 58 | /// # Panics 59 | /// When: 60 | /// - `bits` is empty. 61 | fn from(bits: &[bool]) -> Self { 62 | assert!(!bits.is_empty()); 63 | 64 | let mut byte_vec: Vec = Vec::with_capacity(bits.len() / 8 + 1); 65 | let mut last_byte_len = 0u8; 66 | 67 | for bits8 in bits.chunks(8) { 68 | last_byte_len = bits8.len() as u8; // although this bits8 might not be a last byte. 69 | 70 | let byte = (0..last_byte_len).fold(0, |byte, i| { 71 | byte + if bits8[i as usize] { 1 << (7 - i) } else { 0 } 72 | }); 73 | byte_vec.push(byte); 74 | } 75 | 76 | Fid::build(byte_vec, last_byte_len) 77 | } 78 | } 79 | 80 | static TRUE: bool = true; 81 | static FALSE: bool = false; 82 | 83 | impl Index for Fid { 84 | type Output = bool; 85 | 86 | /// Returns `i`-th element of the `Fid`. 87 | /// 88 | /// # Panics 89 | /// When _`i` >= length of the `Fid`_. 90 | fn index(&self, index: u64) -> &Self::Output { 91 | if self.rbv().access(index) { 92 | &TRUE 93 | } else { 94 | &FALSE 95 | } 96 | } 97 | } 98 | 99 | impl Fid { 100 | /// Build FID from byte vector. 101 | fn build(byte_vec: Vec, last_byte_len: u8) -> Self { 102 | let bit_len = (byte_vec.len() - 1) as u64 * 8 + last_byte_len as u64; 103 | let rbv = RawBitVector::new(&byte_vec[..], 0, last_byte_len); 104 | let chunks = Chunks::new(&rbv); 105 | let table = PopcountTable::new(Blocks::calc_block_size(rbv.len())); 106 | Self { 107 | byte_vec, 108 | bit_len, 109 | chunks, 110 | table, 111 | } 112 | } 113 | 114 | /// Returns the number of _1_ in _[0, `i`]_ elements of the `Fid`. 115 | /// 116 | /// # Panics 117 | /// When _`i` >= length of the `Fid`_. 118 | /// 119 | /// # Implementation detail 120 | /// 121 | /// ```text 122 | /// 00001000 01000001 00000100 11000000 00100000 00000101 00100000 00010000 001 Raw data (N=67) 123 | /// ^ 124 | /// i = 51 125 | /// | 7 | 13 | Chunk (size = (log N)^2 = 36) 126 | /// ^ 127 | /// chunk_left i_chunk = 1 chunk_right 128 | /// 129 | /// |0 |1 |1 |2 |2 |3 |3 |4 |6 |6 |6 |7 |0 |0 |0 |2 |3 |3 |4 |4 |4 |5 |5| Block (size = log N / 2 = 3) 130 | /// ^ 131 | /// i_block = 17 132 | /// block_left | block_right 133 | /// ``` 134 | /// 135 | /// 1. Find `i_chunk`. _`i_chunk` = `i` / `chunk_size`_. 136 | /// 2. Get _`chunk_left` = Chunks[`i_chunk` - 1]_ only if _`i_chunk` > 0_. 137 | /// 3. Get _rank from chunk_left_ if `chunk_left` exists. 138 | /// 4. Get _`chunk_right` = Chunks[`i_chunk`]_. 139 | /// 5. Find `i_block`. _`i_block` = (`i` - `i_chunk` * `chunk_size`) / block size_. 140 | /// 6. Get _`block_left` = `chunk_right.blocks`[ `i_block` - 1]`_ only if _`i_block` > 0_. 141 | /// 7. Get _rank from block_left_ if `block_left` exists. 142 | /// 8. Get inner-block data _`block_bits`. `block_bits` must be of _block size_ length, fulfilled with _0_ in right bits. 143 | /// 9. Calculate _rank of `block_bits`_ in _O(1)_ using a table memonizing _block size_ bit's popcount. 144 | pub fn rank(&self, i: u64) -> u64 { 145 | let n = self.len(); 146 | assert!(i < n); 147 | let chunk_size = Chunks::calc_chunk_size(n); 148 | let block_size = Blocks::calc_block_size(n); 149 | 150 | // 1. 151 | let i_chunk = i / chunk_size as u64; 152 | 153 | // 3. 154 | let rank_from_chunk = if i_chunk == 0 { 155 | 0 156 | } else { 157 | // 2., 3. 158 | let chunk_left = self.chunks.access(i_chunk - 1); 159 | chunk_left.value() 160 | }; 161 | 162 | // 4. 163 | let chunk_right = self.chunks.access(i_chunk); 164 | 165 | // 5. 166 | let i_block = (i - i_chunk * chunk_size as u64) / block_size as u64; 167 | 168 | // 7. 169 | let rank_from_block = if i_block == 0 { 170 | 0 171 | } else { 172 | // 6., 7. 173 | let block_left = chunk_right.blocks.access(i_block - 1); 174 | block_left.value() 175 | }; 176 | 177 | // 8. 178 | let block_right = chunk_right.blocks.access(i_block); 179 | let pos_block_start = i_chunk * chunk_size as u64 + i_block * block_size as u64; 180 | assert!(i - pos_block_start < block_right.length() as u64); 181 | let block_right_rbv = self 182 | .rbv() 183 | .clone_sub(pos_block_start, block_right.length() as u64); 184 | let block_right_as_u32 = block_right_rbv.as_u32(); 185 | let bits_to_use = i - pos_block_start + 1; 186 | let block_bits = block_right_as_u32 >> (32 - bits_to_use); 187 | let rank_from_table = self.table.popcount(block_bits as u64); 188 | 189 | // 9. 190 | rank_from_chunk + rank_from_block as u64 + rank_from_table as u64 191 | } 192 | 193 | /// Returns the number of _0_ in _[0, `i`]_ elements of the `Fid`. 194 | /// 195 | /// # Panics 196 | /// When _`i` >= length of the `Fid`_. 197 | pub fn rank0(&self, i: u64) -> u64 { 198 | (i + 1) - self.rank(i) 199 | } 200 | 201 | /// Returns the minimum position (0-origin) `i` where _`rank(i)` == num_ of `num`-th _1_ if exists. Else returns None. 202 | /// 203 | /// # Panics 204 | /// When _`num` > length of the `Fid`_. 205 | /// 206 | /// # Implementation detail 207 | /// Binary search using `rank()`. 208 | pub fn select(&self, num: u64) -> Option { 209 | let n = self.len(); 210 | assert!(num <= n); 211 | 212 | if num == 0 || num == 1 && self[0] { 213 | return Some(0); 214 | } 215 | if self.rank(n - 1) < num { 216 | return None; 217 | }; 218 | 219 | let mut ng = 0; 220 | let mut ok = n - 1; 221 | while ok - ng > 1 { 222 | let mid = (ok + ng) / 2; 223 | if self.rank(mid) >= num { 224 | ok = mid; 225 | } else { 226 | ng = mid; 227 | } 228 | } 229 | Some(ok) 230 | } 231 | 232 | /// Returns the minimum position (0-origin) `i` where _`rank(i)` == num_ of `num`-th _0_ if exists. Else returns None. 233 | /// 234 | /// # Panics 235 | /// When _`num` > length of the `Fid`_. 236 | pub fn select0(&self, num: u64) -> Option { 237 | let n = self.bit_len; 238 | assert!(num <= n); 239 | 240 | if num == 0 || num == 1 && !self[0] { 241 | return Some(0); 242 | } 243 | if self.rank0(n - 1) < num { 244 | return None; 245 | }; 246 | 247 | let mut ng = 0; 248 | let mut ok = n - 1; 249 | while ok - ng > 1 { 250 | let mid = (ok + ng) / 2; 251 | if self.rank0(mid) >= num { 252 | ok = mid; 253 | } else { 254 | ng = mid; 255 | } 256 | } 257 | Some(ok) 258 | } 259 | 260 | /// Returns bit length of this FID. 261 | pub fn len(&self) -> u64 { 262 | self.bit_len 263 | } 264 | 265 | /// Returns whether the FID is empty. 266 | pub fn is_empty(&self) -> bool { 267 | self.bit_len == 0 268 | } 269 | 270 | fn rbv(&self) -> RawBitVector { 271 | let last_byte_len_or_0 = (self.bit_len % 8) as u8; 272 | RawBitVector::new( 273 | &self.byte_vec[..], 274 | 0, 275 | if last_byte_len_or_0 == 0 { 276 | 8 277 | } else { 278 | last_byte_len_or_0 279 | }, 280 | ) 281 | } 282 | } 283 | 284 | #[cfg(test)] 285 | mod from_str_success_tests { 286 | use crate::Fid; 287 | 288 | macro_rules! parameterized_tests { 289 | ($($name:ident: $value:expr,)*) => { 290 | $( 291 | #[test] 292 | fn $name() { 293 | let (s, expected_bits) = $value; 294 | let fid = Fid::from(s); 295 | 296 | // TODO length check 297 | // assert_eq!(fid.length(), expected_bits); 298 | for (i, bit) in expected_bits.iter().enumerate() { 299 | assert_eq!(fid[i as u64], *bit); 300 | } 301 | } 302 | )* 303 | } 304 | } 305 | 306 | parameterized_tests! { 307 | t1: ("0", vec![false]), 308 | t2: ("1", vec![true]), 309 | t3: ("00", vec![false, false]), 310 | t4: ("01", vec![false, true]), 311 | t5: ("10", vec![true, false]), 312 | t6: ("11", vec![true, true]), 313 | t7: ("0101_0101__0101_1100__1000_001", vec![ 314 | false, true, false, true, 315 | false, true, false, true, 316 | false, true, false, true, 317 | true, true, false, false, 318 | true, false, false, false, 319 | false, false, true, 320 | ]), 321 | } 322 | } 323 | 324 | #[cfg(test)] 325 | mod from_str_failure_tests { 326 | // well-tested in BitString::new() 327 | } 328 | 329 | #[cfg(test)] 330 | mod from_slice_success_tests { 331 | use crate::Fid; 332 | 333 | macro_rules! parameterized_tests { 334 | ($($name:ident: $value:expr,)*) => { 335 | $( 336 | #[test] 337 | fn $name() { 338 | let arr = $value; 339 | let fid = Fid::from(&arr[..]); 340 | 341 | // TODO length check 342 | // assert_eq!(fid.length(), expected_bits); 343 | for (i, bit) in arr.iter().enumerate() { 344 | assert_eq!(fid[i as u64], *bit); 345 | } 346 | } 347 | )* 348 | } 349 | } 350 | 351 | parameterized_tests! { 352 | t1: [false], 353 | t2: [true], 354 | t3: [false, false], 355 | t4: [false, true], 356 | t5: [true, false], 357 | t6: [true, true], 358 | t7: [false; 100], 359 | t8: [true; 100], 360 | } 361 | } 362 | 363 | #[cfg(test)] 364 | mod from_slice_failure_tests { 365 | use crate::Fid; 366 | 367 | #[test] 368 | #[should_panic] 369 | fn empty() { 370 | let _ = Fid::from(&[][..]); 371 | } 372 | } 373 | 374 | #[cfg(test)] 375 | mod index_u64_success_tests { 376 | // well-tested in fid_builder::{builder_from_length_success_tests, builder_from_bit_string_success_tests} 377 | } 378 | 379 | #[cfg(test)] 380 | mod index_u64_failure_tests { 381 | use crate::Fid; 382 | 383 | #[test] 384 | #[should_panic] 385 | fn over_upper_bound() { 386 | let fid = Fid::from("00"); 387 | let _ = fid[2]; 388 | } 389 | } 390 | 391 | #[cfg(test)] 392 | #[allow(non_snake_case)] 393 | mod rank_success_tests { 394 | use crate::Fid; 395 | 396 | macro_rules! parameterized_tests { 397 | ($($name:ident: $value:expr,)*) => { 398 | $( 399 | #[test] 400 | fn $name() { 401 | let (in_fid_str, in_i, expected_rank) = $value; 402 | assert_eq!( 403 | Fid::from(in_fid_str).rank(in_i), 404 | expected_rank 405 | ); 406 | } 407 | )* 408 | } 409 | } 410 | 411 | parameterized_tests! { 412 | rank1_1: ("0", 0, 0), 413 | 414 | rank2_1: ("00", 0, 0), 415 | rank2_2: ("00", 1, 0), 416 | 417 | rank3_1: ("01", 0, 0), 418 | rank3_2: ("01", 1, 1), 419 | 420 | rank4_1: ("10", 0, 1), 421 | rank4_2: ("10", 1, 1), 422 | 423 | rank5_1: ("11", 0, 1), 424 | rank5_2: ("11", 1, 2), 425 | 426 | rank6_1: ("10010", 0, 1), 427 | rank6_2: ("10010", 1, 1), 428 | rank6_3: ("10010", 2, 1), 429 | rank6_4: ("10010", 3, 2), 430 | rank6_5: ("10010", 4, 2), 431 | 432 | bugfix_11110110_11010101_01000101_11101111_10101011_10100101_01100011_00110100_01010101_10010000_01001100_10111111_00110011_00111110_01110101_11011100: ( 433 | "11110110_11010101_01000101_11101111_10101011_10100101_01100011_00110100_01010101_10010000_01001100_10111111_00110011_00111110_01110101_11011100", 434 | 49, 31, 435 | ), 436 | bugfix_10100001_01010011_10101100_11100001_10110010_10000110_00010100_01001111_01011100_11010011_11110000_00011010_01101111_10101010_11000111_0110011: ( 437 | "10100001_01010011_10101100_11100001_10110010_10000110_00010100_01001111_01011100_11010011_11110000_00011010_01101111_10101010_11000111_0110011", 438 | 111, 55, 439 | ), 440 | bugfix_100_111_101_011_011_100_101_001_111_001_001_101_100_011_000_111_1___01_000_101_100_101_101_001_011_110_010_001_101_010_010_010_111_111_111_001_111_001_100_010_001_010_101_11: ( 441 | "100_111_101_011_011_100_101_001_111_001_001_101_100_011_000_111_1___01_000_101_100_101_101_001_011_110_010_001_101_010_010_010_111_111_111_001_111_001_100_010_001_010_101_11", 442 | 48, 28, 443 | ), 444 | bugfix_11100100_10110100_10000000_10111111_01110101_01100110_00101111_11101001_01100100_00001000_11010100_10100000_00010001_10100101_01100100_0010010: ( 445 | "11100100_10110100_10000000_10111111_01110101_01100110_00101111_11101001_01100100_00001000_11010100_10100000_00010001_10100101_01100100_0010010", 446 | 126, 56, 447 | ), 448 | } 449 | // Tested more in tests/ (integration test) 450 | } 451 | 452 | #[cfg(test)] 453 | mod rank_failure_tests { 454 | use crate::Fid; 455 | 456 | #[test] 457 | #[should_panic] 458 | fn rank_over_upper_bound() { 459 | let fid = Fid::from("00"); 460 | let _ = fid.rank(2); 461 | } 462 | } 463 | 464 | #[cfg(test)] 465 | #[allow(non_snake_case)] 466 | mod rank0_success_tests { 467 | use crate::Fid; 468 | 469 | macro_rules! parameterized_tests { 470 | ($($name:ident: $value:expr,)*) => { 471 | $( 472 | #[test] 473 | fn $name() { 474 | let (in_fid_str, in_i, expected_rank0) = $value; 475 | assert_eq!( 476 | Fid::from(in_fid_str).rank0(in_i), 477 | expected_rank0 478 | ); 479 | } 480 | )* 481 | } 482 | } 483 | 484 | parameterized_tests! { 485 | rank0_1_1: ("0", 0, 1), 486 | 487 | rank0_2_1: ("00", 0, 1), 488 | rank0_2_2: ("00", 1, 2), 489 | 490 | rank0_3_1: ("01", 0, 1), 491 | rank0_3_2: ("01", 1, 1), 492 | 493 | rank0_4_1: ("10", 0, 0), 494 | rank0_4_2: ("10", 1, 1), 495 | 496 | rank0_5_1: ("11", 0, 0), 497 | rank0_5_2: ("11", 1, 0), 498 | 499 | rank0_6_1: ("10010", 0, 0), 500 | rank0_6_2: ("10010", 1, 1), 501 | rank0_6_3: ("10010", 2, 2), 502 | rank0_6_4: ("10010", 3, 2), 503 | rank0_6_5: ("10010", 4, 3), 504 | } 505 | // Tested more in tests/ (integration test) 506 | } 507 | 508 | #[cfg(test)] 509 | mod rank0_0_failure_tests { 510 | use crate::Fid; 511 | 512 | #[test] 513 | #[should_panic] 514 | fn rank0_over_upper_bound() { 515 | let fid = Fid::from("00"); 516 | let _ = fid.rank0(2); 517 | } 518 | } 519 | 520 | #[cfg(test)] 521 | mod select_success_tests { 522 | // Tested well in tests/ (integration test) 523 | } 524 | 525 | #[cfg(test)] 526 | mod select_failure_tests { 527 | use crate::Fid; 528 | 529 | #[test] 530 | #[should_panic] 531 | fn select_over_max_rank() { 532 | let fid = Fid::from("00"); 533 | let _ = fid.select(3); 534 | } 535 | } 536 | 537 | #[cfg(test)] 538 | mod select0_success_tests { 539 | // Tested well in tests/ (integration test) 540 | } 541 | 542 | #[cfg(test)] 543 | mod select0_failure_tests { 544 | use crate::Fid; 545 | 546 | #[test] 547 | #[should_panic] 548 | fn select_over_max_rank() { 549 | let fid = Fid::from("00"); 550 | let _ = fid.select0(3); 551 | } 552 | } 553 | -------------------------------------------------------------------------------- /src/internal_data_structure/raw_bit_vector.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | 3 | #[derive(Debug)] 4 | /// Bit vector of arbitrary length (actually the length is limited to _[1, 2^64)_). 5 | /// 6 | /// ```text 7 | /// When fist_byte_offset = 2, last_byte_len = 2: 8 | /// 9 | /// 10101010 00000000 11111111 10 | /// | effective bits | 11 | /// ``` 12 | pub struct RawBitVector<'s> { 13 | byte_slice: &'s [u8], 14 | first_byte_offset: u8, 15 | 16 | /// Length used in last byte. 17 | /// Although byte_slice has only 1 byte and first_byte_offset > 0, 18 | /// this var can take up to 8. 19 | last_byte_len: u8, 20 | } 21 | 22 | impl<'s> RawBitVector<'s> { 23 | /// Constructor 24 | /// 25 | /// # Panics 26 | /// When: 27 | /// - `byte_slice` is empty. 28 | /// - _`first_byte_offset` >= 8_. 29 | /// - _`last_byte_len` == 0 || `last_byte_len` > 8_. 30 | /// - _`byte_slice.len() == 1 && first_byte_offset >= last_byte_len`_ 31 | pub fn new(byte_slice: &'s [u8], first_byte_offset: u8, last_byte_len: u8) -> Self { 32 | assert!(!byte_slice.is_empty()); 33 | assert!(first_byte_offset < 8); 34 | assert!(0 < last_byte_len && last_byte_len <= 8); 35 | assert!(!(byte_slice.len() == 1 && first_byte_offset >= last_byte_len)); 36 | Self { 37 | byte_slice, 38 | first_byte_offset, 39 | last_byte_len, 40 | } 41 | } 42 | 43 | /// Returns i-th bit. 44 | /// 45 | /// ```text 46 | /// When i=7: 47 | /// 48 | /// |target | 49 | /// 00000000 01000000 50 | /// ^ ^ 51 | /// offset=2 | 52 | /// i=0 i=7 53 | /// abs_i=9 54 | /// 55 | /// abs_i = offset + i 56 | /// target_byte = at [abs_i / 8] 57 | /// access(i) = target_byte[abs_i % 8] 58 | /// ``` 59 | /// 60 | /// # Panics 61 | /// When _`i` >= `self.len()`_. 62 | pub fn access(&self, i: u64) -> bool { 63 | assert!(i < self.len()); 64 | 65 | let abs_i = self.first_byte_offset as u64 + i; 66 | let byte = self.byte_slice[(abs_i / 8) as usize]; 67 | match abs_i % 8 { 68 | 0 => byte & 0b1000_0000 != 0, 69 | 1 => byte & 0b0100_0000 != 0, 70 | 2 => byte & 0b0010_0000 != 0, 71 | 3 => byte & 0b0001_0000 != 0, 72 | 4 => byte & 0b0000_1000 != 0, 73 | 5 => byte & 0b0000_0100 != 0, 74 | 6 => byte & 0b0000_0010 != 0, 75 | 7 => byte & 0b0000_0001 != 0, 76 | _ => panic!("never happen"), 77 | } 78 | } 79 | 80 | /// Returns length. 81 | pub fn len(&self) -> u64 { 82 | if self.byte_slice.len() == 1 { 83 | self.last_byte_len as u64 - self.first_byte_offset as u64 84 | } else { 85 | (self.byte_slice.len() as u64) * 8 86 | - (self.first_byte_offset as u64) 87 | - (8 - self.last_byte_len as u64) 88 | } 89 | } 90 | 91 | /// Returns popcount of whole this bit vector. 92 | pub fn popcount(&self) -> u64 { 93 | let mut popcnt = self 94 | .byte_slice 95 | .iter() 96 | .fold(0, |popcnt: u64, byte| byte.count_ones() as u64 + popcnt); 97 | 98 | // remove 1s in the left of first_byte_offset 99 | let left_1s_byte = match self.first_byte_offset { 100 | 0 => 0, 101 | 1 => 0b10000000 & self.byte_slice[0], 102 | 2 => 0b11000000 & self.byte_slice[0], 103 | 3 => 0b11100000 & self.byte_slice[0], 104 | 4 => 0b11110000 & self.byte_slice[0], 105 | 5 => 0b11111000 & self.byte_slice[0], 106 | 6 => 0b11111100 & self.byte_slice[0], 107 | 7 => 0b11111110 & self.byte_slice[0], 108 | _ => panic!("never happen"), 109 | }; 110 | popcnt -= left_1s_byte.count_ones() as u64; 111 | 112 | // remove 1s in the left of last_byte_len 113 | let last_byte = self.byte_slice.last().unwrap(); 114 | let last_offset = self.last_byte_len - 1; 115 | let right_1s_byte = match last_offset { 116 | 0 => 0b01111111 & last_byte, 117 | 1 => 0b00111111 & last_byte, 118 | 2 => 0b00011111 & last_byte, 119 | 3 => 0b00001111 & last_byte, 120 | 4 => 0b00000111 & last_byte, 121 | 5 => 0b00000011 & last_byte, 122 | 6 => 0b00000001 & last_byte, 123 | 7 => 0, 124 | _ => panic!("never happen"), 125 | }; 126 | popcnt -= right_1s_byte.count_ones() as u64; 127 | 128 | popcnt 129 | } 130 | 131 | /// Makes another RawBitVector from _[`i`, `i` + `size`)_ of self. 132 | /// This method is inexpensive in that it does not copy internal bit vector. 133 | /// 134 | /// ```text 135 | /// offset=2 136 | /// | 137 | /// v | size=14 | 138 | /// 00000000 00000000 00000000 139 | /// ^ ^ 140 | /// i_start=3 i_end=16 141 | /// abs_i_start=5 abs_i_end=18 142 | /// | first| | last | 143 | /// 144 | /// 145 | /// When i=3 & size=14: 146 | /// 147 | /// i_start = 3 148 | /// abs_i_start = i_start + offset = 5 149 | /// i_end = i_start + size - 1 = 16 150 | /// abs_i_end = i_end + offset = 18 151 | /// 152 | /// first_byte = at [abs_i_start / 8] 153 | /// last_byte = at [abs_i_end / 8] 154 | /// 155 | /// new_offset = abs_i_start % 8 156 | /// 157 | /// new_last_byte_len = abs_i_end % 8 + 1 158 | /// ``` 159 | /// 160 | /// # Panics 161 | /// When: 162 | /// - _`size` == 0_ 163 | /// - _`size` > `self.len`_ 164 | /// - _`abs_i_end` / 8 + 1 == `self.byte_slice.len()` && abs_i_end` % 8 >= `last_byte_len`_ 165 | pub fn clone_sub(&self, i: u64, size: u64) -> Self { 166 | assert!(size > 0, "length must be > 0"); 167 | assert!(size <= self.len()); 168 | 169 | let i_start = i; 170 | let abs_i_start = i_start + self.first_byte_offset as u64; 171 | let i_end = i_start + size - 1; 172 | let abs_i_end = i_end + self.first_byte_offset as u64; 173 | assert!( 174 | abs_i_end / 8 + 1 < self.byte_slice.len() as u64 175 | || abs_i_end % 8 < self.last_byte_len as u64 176 | ); 177 | 178 | Self { 179 | byte_slice: &self.byte_slice[(abs_i_start as usize / 8)..=(abs_i_end as usize / 8)], 180 | first_byte_offset: (abs_i_start % 8) as u8, 181 | last_byte_len: (abs_i_end % 8 + 1) as u8, 182 | } 183 | } 184 | 185 | /// Returns a concatenated number of first 32bits. 186 | /// 187 | /// # Panics 188 | /// If _`self.len()` > 32_ 189 | pub fn as_u32(&self) -> u32 { 190 | assert!(self.len() <= 32); 191 | 192 | let bs = self.byte_slice; 193 | let off = self.first_byte_offset; 194 | 195 | assert!(bs.len() <= 5); 196 | let mut a = [0u32; 5]; 197 | for i in 0..bs.len() { 198 | a[i] = bs[i] as u32; 199 | } 200 | // discard 1s in the last byte 201 | a[bs.len() - 1] = a[bs.len() - 1] >> (8 - self.last_byte_len) << (8 - self.last_byte_len); 202 | 203 | let mut byte = [0u32; 4]; 204 | for i in 0..4 { 205 | byte[i] = (a[i] << off) + (a[i + 1] >> (8 - off)); 206 | } 207 | 208 | (byte[0] << 24) | (byte[1] << 16) | (byte[2] << 8) | byte[3] 209 | } 210 | } 211 | 212 | impl<'s> fmt::Display for RawBitVector<'s> { 213 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 214 | let bits_str = self 215 | .byte_slice 216 | .iter() 217 | .enumerate() 218 | .map(|(i, byte)| { 219 | let byte_s = format!("{: >8}", format!("{:b}", byte)).replace(' ', "0"); 220 | if i < self.byte_slice.len() - 1 { 221 | byte_s 222 | } else { 223 | byte_s 224 | .chars() 225 | .take(self.last_byte_len as usize) 226 | .collect::() 227 | } 228 | }) 229 | .collect::>() 230 | .concat(); 231 | 232 | write!(f, "{}", bits_str) 233 | } 234 | } 235 | 236 | #[cfg(test)] 237 | mod new_success_tests { 238 | use super::RawBitVector; 239 | 240 | macro_rules! parameterized_tests { 241 | ($($name:ident: $value:expr,)*) => { 242 | $( 243 | #[test] 244 | fn $name() { 245 | let (byte_slice, first_byte_offset, last_byte_len) = $value; 246 | let _ = RawBitVector::new(byte_slice, first_byte_offset, last_byte_len); 247 | } 248 | )* 249 | } 250 | } 251 | 252 | parameterized_tests! { 253 | t_1byte_1: (&[0b00000000], 0, 8), 254 | t_1byte_2: (&[0b00000000], 1, 8), 255 | t_1byte_3: (&[0b00000000], 2, 8), 256 | t_1byte_4: (&[0b00000000], 3, 8), 257 | t_1byte_5: (&[0b00000000], 4, 8), 258 | t_1byte_6: (&[0b00000000], 5, 8), 259 | t_1byte_7: (&[0b00000000], 6, 8), 260 | t_1byte_8: (&[0b00000000], 7, 8), 261 | } 262 | } 263 | 264 | #[cfg(test)] 265 | mod new_failure_tests { 266 | use super::RawBitVector; 267 | 268 | macro_rules! parameterized_tests { 269 | ($($name:ident: $value:expr,)*) => { 270 | $( 271 | #[test] 272 | #[should_panic] 273 | fn $name() { 274 | let (byte_slice, first_byte_offset, last_byte_len) = $value; 275 | let _ = RawBitVector::new(byte_slice, first_byte_offset, last_byte_len); 276 | } 277 | )* 278 | } 279 | } 280 | 281 | parameterized_tests! { 282 | t_empty: (&[], 0, 1), 283 | t_offset: (&[0b00000000], 8, 1), 284 | 285 | t_last_len_0: (&[0b00000000], 0, 0), 286 | t_last_len_9: (&[0b00000000, 0b00000000], 0, 9), 287 | 288 | t_1byte_1: (&[0b00000000], 0, 9), 289 | 290 | t_1byte_off7: (&[0b00000001], 7, 7), 291 | } 292 | } 293 | 294 | #[cfg(test)] 295 | mod len_success_tests { 296 | use super::RawBitVector; 297 | 298 | macro_rules! parameterized_tests { 299 | ($($name:ident: $value:expr,)*) => { 300 | $( 301 | #[test] 302 | fn $name() { 303 | let (byte_slice, first_byte_offset, last_byte_len, expected_len) = $value; 304 | let rbv = RawBitVector::new(byte_slice, first_byte_offset, last_byte_len); 305 | assert_eq!(rbv.len(), expected_len); 306 | } 307 | )* 308 | } 309 | } 310 | 311 | parameterized_tests! { 312 | t_1byte_off0_1: (&[0b00000000], 0, 8, 8), 313 | t_1byte_off0_2: (&[0b00000000], 0, 7, 7), 314 | t_1byte_off0_3: (&[0b00000000], 0, 6, 6), 315 | t_1byte_off0_4: (&[0b00000000], 0, 5, 5), 316 | t_1byte_off0_5: (&[0b00000000], 0, 4, 4), 317 | t_1byte_off0_6: (&[0b00000000], 0, 3, 3), 318 | t_1byte_off0_7: (&[0b00000000], 0, 2, 2), 319 | t_1byte_off0_8: (&[0b00000000], 0, 1, 1), 320 | 321 | t_1byte_off1_1: (&[0b00000000], 1, 8, 7), 322 | t_1byte_off1_2: (&[0b00000000], 1, 7, 6), 323 | t_1byte_off1_3: (&[0b00000000], 1, 6, 5), 324 | t_1byte_off1_4: (&[0b00000000], 1, 5, 4), 325 | t_1byte_off1_5: (&[0b00000000], 1, 4, 3), 326 | t_1byte_off1_6: (&[0b00000000], 1, 3, 2), 327 | t_1byte_off1_7: (&[0b00000000], 1, 2, 1), 328 | 329 | t_1byte_off7_1: (&[0b00000000], 7, 8, 1), 330 | 331 | t_2byte_1: (&[0b00000000, 0b00000000], 0, 8, 16), 332 | t_2byte_2: (&[0b00000000, 0b00000000], 1, 8, 15), 333 | t_2byte_3: (&[0b00000000, 0b00000000], 7, 8, 9), 334 | t_2byte_4: (&[0b00000000, 0b00000000], 0, 1, 9), 335 | t_2byte_5: (&[0b00000000, 0b00000000], 0, 7, 15), 336 | t_2byte_6: (&[0b00000000, 0b00000000], 7, 1, 2), 337 | } 338 | } 339 | 340 | #[cfg(test)] 341 | mod len_failure_tests { 342 | // Nothing to do 343 | } 344 | 345 | #[cfg(test)] 346 | mod access_success_tests { 347 | use super::RawBitVector; 348 | 349 | macro_rules! parameterized_tests { 350 | ($($name:ident: $value:expr,)*) => { 351 | $( 352 | #[test] 353 | fn $name() { 354 | let (byte_slice, first_byte_offset, last_byte_len, i, expected_bit) = $value; 355 | let rbv = RawBitVector::new(byte_slice, first_byte_offset, last_byte_len); 356 | assert_eq!(rbv.access(i), expected_bit); 357 | } 358 | )* 359 | } 360 | } 361 | 362 | parameterized_tests! { 363 | t_1byte_off0_1: (&[0b10000000], 0, 8, 0, true), 364 | 365 | t_1byte_off1_1: (&[0b01000000], 1, 7, 0, true), 366 | t_1byte_off1_2: (&[0b01000000], 1, 7, 1, false), 367 | 368 | t_1byte_off7: (&[0b00000001], 7, 8, 0, true), 369 | 370 | t_2byte_1: (&[0b00000000, 0b00000001], 0, 8, 15, true), 371 | t_2byte_2: (&[0b00000000, 0b00000001], 1, 8, 14, true), 372 | t_2byte_3: (&[0b00000000, 0b00000001], 7, 8, 8, true), 373 | t_2byte_4: (&[0b00000000, 0b10000000], 0, 1, 8, true), 374 | t_2byte_5: (&[0b00000000, 0b00000010], 0, 7, 14, true), 375 | t_2byte_6: (&[0b00000000, 0b10000000], 7, 1, 1, true), 376 | } 377 | } 378 | 379 | #[cfg(test)] 380 | mod access_failure_tests { 381 | use super::RawBitVector; 382 | 383 | #[test] 384 | #[should_panic] 385 | fn over_upper_bound() { 386 | let rbv = RawBitVector::new(&[0b00000000], 1, 2); 387 | let _ = rbv.access(1); 388 | 389 | // basically, well-tested in len_success_tests 390 | } 391 | } 392 | 393 | #[cfg(test)] 394 | mod popcount_success_tests { 395 | use super::RawBitVector; 396 | 397 | macro_rules! parameterized_tests { 398 | ($($name:ident: $value:expr,)*) => { 399 | $( 400 | #[test] 401 | fn $name() { 402 | let (byte_slice, first_byte_offset, last_byte_len, expected_popcount) = $value; 403 | let rbv = RawBitVector::new(byte_slice, first_byte_offset, last_byte_len); 404 | assert_eq!(rbv.popcount(), expected_popcount); 405 | } 406 | )* 407 | } 408 | } 409 | 410 | parameterized_tests! { 411 | t1: (&[0b11111111], 0, 1, 1), 412 | t2: (&[0b11111111], 1, 8, 7), 413 | t3: (&[0b11111111], 1, 7, 6), 414 | t4: (&[0b11111111], 1, 6, 5), 415 | t5: (&[0b11101111], 0, 8, 7), 416 | 417 | t6: (&[0b01010101, 0b01111111], 0, 1, 4), 418 | t7: (&[0b10101010, 0b11111111], 0, 1, 5), 419 | t8: (&[0b11111111, 0b11111111], 0, 1, 9), 420 | t9: (&[0b11111111, 0b11111111], 1, 1, 8), 421 | 422 | t10: (&[0b11111111, 0b00010000, 0b11111111], 7, 1, 3), 423 | } 424 | } 425 | 426 | #[cfg(test)] 427 | mod popcount_failure_tests { 428 | // Nothing to do 429 | } 430 | 431 | #[cfg(test)] 432 | mod clone_sub_success_tests { 433 | use super::RawBitVector; 434 | 435 | macro_rules! parameterized_tests { 436 | ($($name:ident: $value:expr,)*) => { 437 | $( 438 | #[test] 439 | fn $name() { 440 | let (byte_slice, first_byte_offset, last_byte_len, i, size, expected_bit_vec) = $value; 441 | let rbv = RawBitVector::new(byte_slice, first_byte_offset, last_byte_len); 442 | let cloned_rbv = rbv.clone_sub(i, size); 443 | 444 | assert_eq!(cloned_rbv.len(), expected_bit_vec.len() as u64); 445 | for (i, expected_bit) in expected_bit_vec.iter().enumerate() { 446 | assert_eq!(cloned_rbv.access(i as u64), *expected_bit); 447 | } 448 | } 449 | )* 450 | } 451 | } 452 | 453 | parameterized_tests! { 454 | t1_1: (&[0b01000000], 0, 1, 0, 1, vec![false]), 455 | t1_2: (&[0b01000000], 1, 2, 0, 1, vec![true]), 456 | 457 | t8_1_1: (&[0b01000101], 0, 8, 0, 1, vec![false]), 458 | t8_1_2: (&[0b01000101], 0, 8, 0, 2, vec![false, true]), 459 | t8_1_3: (&[0b01000101], 0, 8, 0, 3, vec![false, true, false]), 460 | t8_1_4: (&[0b01000101], 0, 8, 0, 4, vec![false, true, false, false]), 461 | t8_1_5: (&[0b01000101], 0, 8, 0, 5, vec![false, true, false, false, false]), 462 | t8_1_6: (&[0b01000101], 0, 8, 0, 6, vec![false, true, false, false, false, true]), 463 | t8_1_7: (&[0b01000101], 0, 8, 0, 7, vec![false, true, false, false, false, true, false]), 464 | t8_1_8: (&[0b01000101], 0, 8, 0, 8, vec![false, true, false, false, false, true, false, true]), 465 | t8_1_9: (&[0b01000101, 0b10000000], 1, 1, 0, 8, vec![true, false, false, false, true, false, true, true]), 466 | 467 | t8_2_1: (&[0b01000101], 0, 8, 7, 1, vec![true]), 468 | t8_2_2: (&[0b01000101, 0b10000000], 1, 1, 6, 2, vec![true, true]), 469 | t8_2_3: (&[0b01000101, 0b10000000], 1, 1, 7, 1, vec![true]), 470 | 471 | t9_1_1: (&[0b01000101, 0b10000000], 0, 1, 0, 1, vec![false]), 472 | t9_1_2: (&[0b01000101, 0b10000000], 0, 1, 0, 2, vec![false, true]), 473 | t9_1_3: (&[0b01000101, 0b10000000], 0, 1, 0, 3, vec![false, true, false]), 474 | t9_1_4: (&[0b01000101, 0b10000000], 0, 1, 0, 4, vec![false, true, false, false]), 475 | t9_1_5: (&[0b01000101, 0b10000000], 0, 1, 0, 5, vec![false, true, false, false, false]), 476 | t9_1_6: (&[0b01000101, 0b10000000], 0, 1, 0, 6, vec![false, true, false, false, false, true]), 477 | t9_1_7: (&[0b01000101, 0b10000000], 0, 1, 0, 7, vec![false, true, false, false, false, true, false]), 478 | t9_1_8: (&[0b01000101, 0b10000000], 0, 1, 0, 8, vec![false, true, false, false, false, true, false, true]), 479 | t9_1_9: (&[0b01000101, 0b10000000], 0, 1, 0, 9, vec![false, true, false, false, false, true, false, true, true]), 480 | t9_1_10: (&[0b01000101, 0b10000000], 1, 2, 0, 9, vec![true, false, false, false, true, false, true, true, false]), 481 | 482 | t9_2_1: (&[0b01000101, 0b10000000], 0, 1, 7, 1, vec![true]), 483 | t9_2_2: (&[0b01000101, 0b10000000], 0, 1, 7, 2, vec![true, true]), 484 | t9_2_3: (&[0b01000101, 0b10000000], 1, 2, 7, 2, vec![true, false]), 485 | 486 | t9_3_1: (&[0b01000101, 0b10000000], 0, 1, 8, 1, vec![true]), 487 | t9_3_2: (&[0b01000101, 0b10000000], 1, 2, 8, 1, vec![false]), 488 | 489 | t13_1_1: (&[0b10110010, 0b01010000], 0, 4, 9, 3, vec![true, false, true]), 490 | t13_1_2: (&[0b10110010, 0b01010000], 1, 4, 9, 2, vec![false, true]), 491 | 492 | t_bugfix1: (&[0b11111111, 0b00101001], 0, 1, 0, 1, vec![true]), 493 | } 494 | } 495 | 496 | #[cfg(test)] 497 | mod clone_sub_failure_tests { 498 | use super::RawBitVector; 499 | 500 | macro_rules! parameterized_tests { 501 | ($($name:ident: $value:expr,)*) => { 502 | $( 503 | #[test] 504 | #[should_panic] 505 | fn $name() { 506 | let (byte_slice, first_byte_offset, last_byte_len, i, size) = $value; 507 | let rbv = RawBitVector::new(byte_slice, first_byte_offset, last_byte_len); 508 | let _ = rbv.clone_sub(i, size); 509 | } 510 | )* 511 | } 512 | } 513 | 514 | parameterized_tests! { 515 | t1_1: (&[0b00000000], 0, 1, 0, 0), 516 | t1_2: (&[0b00000000], 0, 1, 0, 2), 517 | t1_3: (&[0b00000000], 0, 1, 1, 1), 518 | t1_4: (&[0b00000000], 1, 1, 0, 2), 519 | 520 | t8_1_1: (&[0b01000101], 0, 8, 0, 0), 521 | t8_1_2: (&[0b01000101], 0, 8, 0, 9), 522 | t8_1_3: (&[0b01000101, 0b00000000], 1, 1, 0, 9), 523 | 524 | t8_2_1: (&[0b01000101], 0, 8, 7, 0), 525 | t8_2_2: (&[0b01000101], 0, 8, 7, 2), 526 | t8_2_3: (&[0b01000101, 0b00000000], 1, 1, 7, 2), 527 | 528 | t9_1_1: (&[0b01000101, 0b00000000], 0, 1, 0, 0), 529 | t9_1_2: (&[0b01000101, 0b00000000], 0, 1, 0, 10), 530 | t9_1_3: (&[0b01000101, 0b00000000], 1, 2, 0, 10), 531 | 532 | t9_2_1: (&[0b01000101, 0b00000000], 0, 1, 7, 0), 533 | t9_2_2: (&[0b01000101, 0b00000000], 0, 1, 7, 3), 534 | t9_2_3: (&[0b01000101, 0b00000000], 1, 2, 7, 3), 535 | 536 | t9_3_1: (&[0b01000101, 0b00000000], 0, 1, 8, 0), 537 | t9_3_2: (&[0b01000101, 0b00000000], 0, 1, 8, 2), 538 | t9_3_3: (&[0b01000101, 0b00000000], 1, 2, 8, 2), 539 | } 540 | } 541 | 542 | #[cfg(test)] 543 | mod clone_sub_fuzzing_tests { 544 | use super::RawBitVector; 545 | 546 | #[test] 547 | fn test() { 548 | let samples = 10000; 549 | 550 | fn sub_str(s: &str, i: u64, size: u64) -> String { 551 | let ss: String = s.chars().skip(i as usize).take(size as usize).collect(); 552 | ss 553 | } 554 | 555 | fn str_into_byte_vec(s: &str) -> (Vec, u8) { 556 | let bits: Vec = s.as_bytes().iter().map(|c| *c == '1' as u8).collect(); 557 | 558 | let mut byte_vec: Vec = Vec::with_capacity(bits.len() / 8 + 1); 559 | let mut last_byte_len = 0u8; 560 | 561 | for bits8 in bits.chunks(8) { 562 | last_byte_len = bits8.len() as u8; // although this bits8 might not be a last byte. 563 | 564 | let byte = (0..last_byte_len).fold(0, |byte, i| { 565 | byte + if bits8[i as usize] { 1 << (7 - i) } else { 0 } 566 | }); 567 | byte_vec.push(byte); 568 | } 569 | 570 | (byte_vec, last_byte_len) 571 | } 572 | 573 | for _ in 0..samples { 574 | let s = &format!("{:b}", rand::random::()); 575 | let (byte_vec, last_byte_len) = str_into_byte_vec(s); 576 | let rbv = RawBitVector::new(&byte_vec[..], 0, last_byte_len); 577 | // TODO more tests (first_byte_offset > 0) 578 | 579 | for i in 0..s.len() { 580 | for size in 1..(s.len() - i) { 581 | let copied_rbv = rbv.clone_sub(i as u64, size as u64); 582 | 583 | let substr = sub_str(s, i as u64, size as u64); 584 | let (substr_byte_vec, substr_last_byte_len) = str_into_byte_vec(&substr); 585 | let substr_rbv = 586 | RawBitVector::new(&substr_byte_vec[..], 0, substr_last_byte_len); 587 | 588 | assert_eq!(copied_rbv.len(), substr_rbv.len()); 589 | for i in 0..copied_rbv.len() { 590 | assert_eq!( 591 | copied_rbv.access(i), substr_rbv.access(i), 592 | "\nbit vector = {}, RawBitVector::clone_sub(i={}, size={});\nActual: {}\nExpected: {}", 593 | s, i, size, copied_rbv, substr 594 | ) 595 | } 596 | } 597 | } 598 | } 599 | } 600 | } 601 | 602 | #[cfg(test)] 603 | mod as_u32_success_tests { 604 | use super::RawBitVector; 605 | 606 | macro_rules! parameterized_tests { 607 | ($($name:ident: $value:expr,)*) => { 608 | $( 609 | #[test] 610 | fn $name() { 611 | let (byte_slice, first_byte_offset, last_byte_len, expected_u32) = $value; 612 | let rbv = RawBitVector::new(byte_slice, first_byte_offset, last_byte_len); 613 | assert_eq!(rbv.as_u32(), expected_u32); 614 | } 615 | )* 616 | } 617 | } 618 | 619 | parameterized_tests! { 620 | t1_1: (&[0b11111111], 0, 1, 0b10000000_00000000_00000000_00000000), 621 | t1_2: (&[0b11111111], 0, 7, 0b11111110_00000000_00000000_00000000), 622 | t1_3: (&[0b11111111], 1, 2, 0b10000000_00000000_00000000_00000000), 623 | t1_4: (&[0b11111111], 1, 7, 0b11111100_00000000_00000000_00000000), 624 | 625 | t8_1: (&[0b10010000], 0, 8, 0b10010000_00000000_00000000_00000000), 626 | 627 | t32_1: (&[0b10010000, 0b01000001, 0b00001000, 0b00011010], 0, 7, 0b10010000_01000001_00001000_00011010), 628 | t32_2: (&[0b10010000, 0b01000001, 0b00001000, 0b00011010], 0, 8, 0b10010000_01000001_00001000_00011010), 629 | } 630 | } 631 | 632 | #[cfg(test)] 633 | mod as_u32_failure_tests { 634 | use super::RawBitVector; 635 | 636 | #[test] 637 | #[should_panic] 638 | fn test() { 639 | let byte_slice = &[0b00000000, 0b11111111, 0b00000000, 0b11111111, 0b00000000]; 640 | let rbv = RawBitVector::new(byte_slice, 0, 33); 641 | // TODO more tests (first_byte_offset > 0) 642 | let _ = rbv.as_u32(); 643 | } 644 | } 645 | --------------------------------------------------------------------------------