├── .VERSION ├── .github └── workflows │ └── wasm.yaml ├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── TODO.md ├── release.toml └── src ├── bit_vec ├── bit_slice.rs ├── bit_vector.rs ├── mod.rs ├── prim.rs └── traits.rs ├── broadword.rs ├── coding ├── comma.rs ├── elias.rs ├── fib.rs ├── mod.rs ├── traits.rs ├── trans.rs └── unary.rs ├── int_vec ├── int_vector.rs ├── mod.rs └── traits.rs ├── internal ├── errors.rs ├── mod.rs ├── search.rs └── vector_base.rs ├── lib.rs ├── macros.rs ├── rank ├── jacobson.rs ├── mod.rs ├── prim.rs ├── rank9.rs └── traits.rs ├── select ├── bin_search.rs ├── mod.rs └── traits.rs ├── space_usage.rs ├── storage.rs └── stream ├── bit_buffer.rs ├── mod.rs └── traits.rs /.VERSION: -------------------------------------------------------------------------------- 1 | 0.4.4 2 | -------------------------------------------------------------------------------- /.github/workflows/wasm.yaml: -------------------------------------------------------------------------------- 1 | on: [push] 2 | 3 | name: wasm32 builds 4 | 5 | jobs: 6 | wasm32-unknown-unknown: 7 | name: wasm32-unknown-unknown 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@master 11 | - uses: actions-rs/toolchain@v1 12 | with: 13 | toolchain: stable 14 | target: wasm32-unknown-unknown 15 | - uses: actions-rs/cargo@v1 16 | with: 17 | command: build 18 | args: --target wasm32-unknown-unknown 19 | wasm32-wasi: 20 | name: wasm32-wasi 21 | runs-on: ubuntu-latest 22 | steps: 23 | - uses: actions/checkout@master 24 | - name: Install wasm32-wasi target 25 | uses: actions-rs/toolchain@v1 26 | with: 27 | toolchain: stable 28 | target: wasm32-wasi 29 | - name: Install wasmtime 30 | run: "curl https://wasmtime.dev/install.sh -sSf | bash" 31 | - name: Add wasmtime to PATH 32 | run: echo "::add-path::$HOME/.wasmtime/bin" 33 | - name: Install cargo-wasi command 34 | uses: actions-rs/cargo@v1 35 | with: 36 | command: install 37 | args: --force cargo-wasi 38 | - name: Build code with cargo-wasi 39 | uses: actions-rs/cargo@v1 40 | with: 41 | command: wasi 42 | args: build 43 | - name: Run tests under wasm32-wasi 44 | uses: actions-rs/cargo@v1 45 | with: 46 | command: wasi 47 | args: test 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | target.* 3 | Cargo.lock 4 | .idea 5 | *.iml 6 | cmake-build-*/ 7 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | rust: 3 | - stable 4 | - beta 5 | - nightly 6 | 7 | matrix: 8 | allow_failures: 9 | - rust: nightly 10 | 11 | notifications: 12 | email: 13 | on_success: never 14 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog] and this project adheres to 6 | [Semantic Versioning]. 7 | 8 | [Keep a Changelog]: http://keepachangelog.com/en/1.0.0/ 9 | [Semantic Versioning]: http://semver.org/spec/v2.0.0.html 10 | 11 | ## [0.5.2] - 2019-08-29 12 | - Moved documentation hosting to docs.rs. 13 | 14 | ## [0.5.1] - 2019-08-29 15 | 16 | ### Removed 17 | - Dependency on `clippy`. 18 | - Use of `extern crate` in example. 19 | 20 | ## [0.5.0] - 2019-08-29 21 | 22 | ### Updated 23 | - `quickcheck` dev dependency to version 0.9.0. 24 | 25 | ### Fixed 26 | - A failing doc test. 27 | 28 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "succinct" 3 | version = "0.5.3-alpha.0" 4 | authors = ["Jesse A. Tov "] 5 | description = "Succinct data structures for Rust" 6 | repository = "https://github.com/tov/succinct-rs" 7 | homepage = "https://github.com/tov/succinct-rs" 8 | documentation = "https://docs.rs/succinct/" 9 | readme = "README.md" 10 | license = "MIT/Apache-2.0" 11 | keywords = ["succinct", "rank", "select"] 12 | 13 | [dependencies] 14 | num-traits = "0.2" 15 | byteorder = "1.2" 16 | 17 | [dev-dependencies] 18 | quickcheck = "0.9.0" 19 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 Jesse A. Tov 2 | 3 | Permission is hereby granted, free of charge, to any 4 | person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without 7 | limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software 10 | is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions 15 | of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Succinct Data Structures for Rust 2 | 3 | [![Build Status](https://travis-ci.org/tov/succinct-rs.svg?branch=master)](https://travis-ci.org/tov/succinct-rs) 4 | [![Crates.io](https://img.shields.io/crates/v/succinct.svg?maxAge=2592000)](https://crates.io/crates/succinct) 5 | [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE-MIT) 6 | [![License: Apache 2.0](https://img.shields.io/badge/license-Apache_2.0-blue.svg)](LICENSE-APACHE) 7 | 8 | So far we have: 9 | 10 | - bit vectors and bit buffer; 11 | - integer vectors with arbitrary-sized (1- to 64-bit) elements; 12 | - a variety of universal codes; 13 | - constant-time rank queries; and 14 | - *O*(lg lg *n*)-time select queries based on binary search over ranks. 15 | 16 | ## Usage 17 | 18 | It’s [on crates.io](https://crates.io/crates/succinct), so you can add 19 | 20 | ```toml 21 | [dependencies] 22 | succinct = "0.5.2" 23 | ``` 24 | 25 | to your `Cargo.toml`. 26 | 27 | ## Credits 28 | 29 | - `IntVec` borrows some implementation techniques from 30 | [`nbitsvec`](https://crates.io/crates/nbits_vec). The main 31 | difference is that `nbitsvec` uses a `typenum` to put the element 32 | size (in bits) as a parameter to the vector type. Also, `nbitsvec` 33 | is likely to be faster. 34 | 35 | - Some of the API is inspired by 36 | [SDSL](https://github.com/simongog/sdsl-lite), a C++ succinct data 37 | structures library. It’s much more complete than `succinct`, and 38 | probably more correct and faster too. 39 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | To Do: 2 | 3 | - compressed integer vectors 4 | - serialization 5 | - balanced parentheses 6 | - more... 7 | 8 | Design questions: 9 | 10 | - How can we properly stack up capabilities like RankSupport and 11 | SelectSupport? 12 | 13 | - Should we use a borrowing model or an ownership model? 14 | 15 | - How should we parameterize RankSupport and SelectSupport to indicate 16 | a structure that supports all bool queries, all u8 queries, only 1 17 | (and not 0) queries, etc? 18 | 19 | - Can UniversalCodes better indicate their domains? In types? 20 | 21 | - What can/should we try to do block-wise rather than bit-wise? 22 | 23 | - How should we account for overflows? 24 | -------------------------------------------------------------------------------- /release.toml: -------------------------------------------------------------------------------- 1 | pre-release-replacements = [ 2 | { file="README.md", search="succinct = \"[0-9.]*\"", replace="succinct = \"{{version}}\"" }, 3 | { file="src/lib.rs", search="succinct = \"[0-9.]*\"", replace="succinct = \"{{version}}\"" }, 4 | { file="src/lib.rs", search="https://docs[.]rs/succinct/[0-9.]*", replace="https://docs.rs/succinct/{{version}}" }, 5 | { file="CHANGELOG.md", search="\\[Unreleased\\]", replace="[{{version}}] - {{date}}" } 6 | ] 7 | -------------------------------------------------------------------------------- /src/bit_vec/bit_slice.rs: -------------------------------------------------------------------------------- 1 | use std::ops::{Range, RangeTo, RangeFrom, RangeFull}; 2 | 3 | use bit_vec::traits::*; 4 | use space_usage::SpaceUsage; 5 | use storage::BlockType; 6 | 7 | /// A borrowed slice of a bit vector. 8 | #[derive(Clone, Copy, Debug)] 9 | pub struct BitSlice<'a, Base: 'a + BitVec + ?Sized> { 10 | data: &'a Base, 11 | start: u64, 12 | len: u64, 13 | } 14 | 15 | /// A borrowed, mutable slice of a bit vector. 16 | #[derive(Debug)] 17 | pub struct BitSliceMut<'a, Base: 'a + BitVecMut + ?Sized> { 18 | data: &'a mut Base, 19 | start: u64, 20 | len: u64, 21 | } 22 | 23 | impl<'a, Base: 'a + BitVec + ?Sized> BitSlice<'a, Base> { 24 | /// Slices base to the specified range. 25 | pub fn new>(base: &'a Base, range: R) -> Self { 26 | let range = range.into_range(0, base.bit_len()); 27 | assert!(range.end <= base.bit_len(), "BitSlice::new: out of bounds"); 28 | BitSlice { 29 | data: base, 30 | start: range.start, 31 | len: range.end.saturating_sub(range.start), 32 | } 33 | } 34 | 35 | /// Slices this slice into a subslice. 36 | /// 37 | /// Unlike `BitSlice::new`, does not create an additional layer of 38 | /// indirection. 39 | pub fn slice>(&self, range: R) -> Self { 40 | let range = range.into_range(0, self.len); 41 | assert!(range.end <= self.len, "BitSlice::slice: out of bounds"); 42 | BitSlice { 43 | data: self.data, 44 | start: self.start + range.start, 45 | len: range.end.saturating_sub(range.start), 46 | } 47 | } 48 | } 49 | 50 | impl<'a, Base: 'a + BitVecMut + ?Sized> BitSliceMut<'a, Base> { 51 | /// Slices base to the specified range. 52 | pub fn new>(base: &'a mut Base, range: R) -> Self { 53 | let range = range.into_range(0, base.bit_len()); 54 | assert!(range.end <= base.bit_len(), "BitSlice::new: out of bounds"); 55 | BitSliceMut { 56 | data: base, 57 | start: range.start, 58 | len: range.end.saturating_sub(range.start), 59 | } 60 | } 61 | 62 | /// Slices this slice into a mutable subslice. 63 | /// 64 | /// Unlike `BitSliceMut::new`, does not create an additional layer of 65 | /// indirection. 66 | pub fn slice_mut>(&mut self, range: R) 67 | -> BitSliceMut { 68 | let range = range.into_range(0, self.len); 69 | assert!(range.end <= self.len, "BitSlice::slice: out of bounds"); 70 | BitSliceMut { 71 | data: self.data, 72 | start: self.start + range.start, 73 | len: range.end.saturating_sub(range.start), 74 | } 75 | } 76 | 77 | /// Slices this slice into an immutable subslice. 78 | pub fn slice>(&self, range: R) -> BitSlice { 79 | let range = range.into_range(0, self.len); 80 | assert!(range.end <= self.len, "BitSlice::slice: out of bounds"); 81 | BitSlice { 82 | data: self.data, 83 | start: self.start + range.start, 84 | len: range.end.saturating_sub(range.start), 85 | } 86 | } 87 | } 88 | 89 | impl<'a, Base: 'a + BitVec + ?Sized> BitVec for BitSlice<'a, Base> { 90 | type Block = Base::Block; 91 | 92 | #[inline] 93 | fn bit_len(&self) -> u64 { 94 | self.len 95 | } 96 | 97 | #[inline] 98 | fn get_bit(&self, position: u64) -> bool { 99 | assert!(position < self.len, "BitSlice::get_bit: out of bounds"); 100 | self.data.get_bit(self.start + position) 101 | } 102 | 103 | fn get_bits(&self, position: u64, count: usize) -> Self::Block { 104 | let end = position.checked_add(count as u64) 105 | .expect("BitSlice::get_bits: index overflow"); 106 | assert!(end <= self.len, "BitSlice::get_bits: out of bounds"); 107 | self.data.get_bits(self.start + position, count) 108 | } 109 | 110 | fn get_block(&self, position: usize) -> Self::Block { 111 | self.get_bits(Self::Block::mul_nbits(position), Self::Block::nbits()) 112 | } 113 | } 114 | 115 | impl<'a, Base: 'a + BitVecMut + ?Sized> BitVec for BitSliceMut<'a, Base> { 116 | type Block = Base::Block; 117 | 118 | #[inline] 119 | fn bit_len(&self) -> u64 { 120 | self.len 121 | } 122 | 123 | #[inline] 124 | fn get_bit(&self, position: u64) -> bool { 125 | assert!(position < self.len, "BitSlice::get_bit: out of bounds"); 126 | self.data.get_bit(self.start + position) 127 | } 128 | 129 | fn get_bits(&self, position: u64, count: usize) -> Self::Block { 130 | let end = position.checked_add(count as u64) 131 | .expect("BitSliceMut::get_bits: index overflow"); 132 | assert!(end <= self.len, "BitSliceMut::get_bits: out of bounds"); 133 | self.data.get_bits(self.start + position, count) 134 | } 135 | 136 | fn get_block(&self, position: usize) -> Self::Block { 137 | self.get_bits(Self::Block::mul_nbits(position), Self::Block::nbits()) 138 | } 139 | } 140 | 141 | impl<'a, Base: 'a + BitVecMut + ?Sized> BitVecMut for BitSliceMut<'a, Base> { 142 | #[inline] 143 | fn set_bit(&mut self, position: u64, value: bool) { 144 | assert!(position < self.len, "BitSlice::set_bit: out of bounds"); 145 | self.data.set_bit(self.start + position, value); 146 | } 147 | 148 | fn set_bits(&mut self, position: u64, count: usize, value: Self::Block) { 149 | let end = position.checked_add(count as u64) 150 | .expect("BitSliceMut::get_bits: index overflow"); 151 | assert!(end <= self.len, "BitSliceMut::get_bits: out of bounds"); 152 | self.data.set_bits(self.start + position, count, value); 153 | } 154 | 155 | fn set_block(&mut self, position: usize, value: Self::Block) { 156 | self.set_bits(Self::Block::mul_nbits(position), 157 | Self::Block::nbits(), value); 158 | } 159 | } 160 | 161 | impl<'a, Base: 'a + BitVec + ?Sized> SpaceUsage for BitSlice<'a, Base> { 162 | fn is_stack_only() -> bool { true } 163 | fn heap_bytes(&self) -> usize { 0 } 164 | } 165 | 166 | impl<'a, Base: 'a + BitVecMut + ?Sized> SpaceUsage for BitSliceMut<'a, Base> { 167 | fn is_stack_only() -> bool { true } 168 | fn heap_bytes(&self) -> usize { 0 } 169 | } 170 | 171 | /// Range polymorphism support. 172 | /// 173 | /// The idea is to realize partial ranges by providing start limits to fill 174 | /// in the missing bounds. 175 | /// 176 | /// # Examples 177 | /// 178 | /// ``` 179 | /// use succinct::bit_vec::IntoRange; 180 | /// 181 | /// assert_eq!((3..5).into_range(0, 8), 3..5); 182 | /// assert_eq!(( ..5).into_range(0, 8), 0..5); 183 | /// assert_eq!((3.. ).into_range(0, 8), 3..8); 184 | /// assert_eq!(( .. ).into_range(0, 8), 0..8); 185 | /// ``` 186 | pub trait IntoRange { 187 | /// Instantiates a range to a structure by provided bounds where bounds 188 | /// are absent. 189 | fn into_range(self, start: T, limit: T) -> Range; 190 | } 191 | 192 | impl IntoRange for Range { 193 | fn into_range(self, _: T, _: T) -> Range { self } 194 | } 195 | 196 | impl IntoRange for RangeTo { 197 | fn into_range(self, start: T, _: T) -> Range { start .. self.end } 198 | } 199 | 200 | impl IntoRange for RangeFrom { 201 | fn into_range(self, _: T, end: T) -> Range { self.start .. end } 202 | } 203 | 204 | impl IntoRange for RangeFull { 205 | fn into_range(self, start: T, end: T) -> Range { start .. end } 206 | } 207 | -------------------------------------------------------------------------------- /src/bit_vec/bit_vector.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | 3 | #[cfg(target_pointer_width = "32")] 4 | use num_traits::ToPrimitive; 5 | 6 | use internal::vector_base::{VectorBase, self}; 7 | use space_usage::SpaceUsage; 8 | use storage::BlockType; 9 | use super::traits::*; 10 | 11 | /// Uncompressed vector of bits. 12 | #[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] 13 | pub struct BitVector(VectorBase); 14 | 15 | impl BitVector { 16 | /// Creates a new, empty bit vector. 17 | pub fn new() -> Self { 18 | BitVector(VectorBase::new()) 19 | } 20 | 21 | /// Creates a new, empty bit vector with space allocated for `capacity` 22 | /// bits. 23 | /// 24 | /// # Panics 25 | /// 26 | /// Panics if `capacity` is too large. In particular the number of 27 | /// blocks required by the capacity (`capacity / Block::nbits()`) 28 | /// must fit in a `usize`. 29 | pub fn with_capacity(capacity: u64) -> Self { 30 | BitVector(VectorBase::with_capacity(1, capacity)) 31 | } 32 | 33 | /// Creates a new, empty bit vector with space allocated for `capacity` 34 | /// blocks. 35 | pub fn block_with_capacity(capacity: usize) -> Self { 36 | BitVector(VectorBase::block_with_capacity(capacity)) 37 | } 38 | 39 | /// Creates a new bit vector of `len` bits initialized to `value`. 40 | /// 41 | /// # Panics 42 | /// 43 | /// Panics if `len` is too large. In particular the number of 44 | /// blocks required by the capacity (`len / Block::nbits()`) 45 | /// must fit in a `usize`. 46 | pub fn with_fill(len: u64, value: bool) -> Self { 47 | let block_size = Block::checked_ceil_div_nbits(len) 48 | .expect("BitVector::with_fill: overflow"); 49 | let block_value = if value {!Block::zero()} else {Block::zero()}; 50 | let mut result = Self::block_with_fill(block_size, block_value); 51 | result.0.truncate(1, len); 52 | result 53 | } 54 | 55 | /// Creates a new bit vector of `block_len` blocks initialized to `value`. 56 | pub fn block_with_fill(block_len: usize, value: Block) -> Self { 57 | BitVector(VectorBase::block_with_fill(1, block_len, value)) 58 | } 59 | 60 | /// How many bits the bit vector can hold without reallocating. 61 | pub fn capacity(&self) -> u64 { 62 | self.0.capacity(1) 63 | } 64 | 65 | /// How many blocks the bit vector can hold without reallocating. 66 | pub fn block_capacity(&self) -> usize { 67 | self.0.block_capacity() 68 | } 69 | 70 | /// Resizes the bit vector to the given number of elements, 71 | /// filling if necessary. 72 | /// 73 | /// # Panics 74 | /// 75 | /// Panics if `new_len` is too large. In particular the number of 76 | /// blocks required by the capacity (`new_len / Block::nbits()`) 77 | /// must fit in a `usize`. 78 | pub fn resize(&mut self, new_len: u64, value: bool) { 79 | let new_block_len = Block::checked_ceil_div_nbits(new_len) 80 | .expect("BitVector::resize: overflow"); 81 | 82 | if new_len < self.bit_len() || !value { 83 | self.block_resize(new_block_len, Block::zero()); 84 | } else { 85 | let trailing = Block::last_block_bits(self.bit_len()); 86 | let remaining = Block::nbits() - trailing; 87 | for _ in 0 .. remaining { 88 | self.0.push_bit(true); 89 | } 90 | self.block_resize(new_block_len, !Block::zero()); 91 | } 92 | 93 | self.0.truncate(1, new_len); 94 | } 95 | 96 | /// Resizes the bit vector to the given number of blocks, 97 | /// filling if necessary. 98 | pub fn block_resize(&mut self, new_len: usize, value: Block) { 99 | self.0.block_resize(1, new_len, value); 100 | } 101 | 102 | /// Reserves capacity for at least `additional` more bits to be 103 | /// inserted. 104 | /// 105 | /// The collection may reserve more space to avoid frequent 106 | /// reallocations. 107 | /// 108 | /// # Panics 109 | /// 110 | /// Panics if the number of blocks overflows a `usize`. 111 | pub fn reserve(&mut self, additional: u64) { 112 | self.0.reserve(1, additional); 113 | } 114 | 115 | /// Reserves capacity for at least `additional` blocks of bits to be 116 | /// inserted. 117 | /// 118 | /// The collection may reserve more space to avoid frequent 119 | /// reallocations. 120 | /// 121 | /// # Panics 122 | /// 123 | /// Panics if the number of blocks overflows a `usize`. 124 | pub fn block_reserve(&mut self, additional: usize) { 125 | self.0.block_reserve(additional); 126 | } 127 | 128 | /// Reserves capacity for at least `additional` more bits to be 129 | /// inserted. 130 | /// 131 | /// Unlike [`reserve`](#method.reserve), does nothing if the 132 | /// capacity is already sufficient. 133 | /// 134 | /// # Panics 135 | /// 136 | /// Panics if the number of blocks overflows a `usize`. 137 | pub fn reserve_exact(&mut self, additional: u64) { 138 | self.0.reserve_exact(1, additional); 139 | } 140 | 141 | /// Reserves capacity for at least `additional` more blocks of bits to be 142 | /// inserted. 143 | /// 144 | /// Unlike [`reserve_block`](#method.reserve_block), does nothing if the 145 | /// capacity is already sufficient. 146 | /// 147 | /// # Panics 148 | /// 149 | /// Panics if the number of blocks overflows a `usize`. 150 | pub fn block_reserve_exact(&mut self, additional: usize) { 151 | self.0.block_reserve_exact(additional); 152 | } 153 | 154 | /// Shrinks the capacity to just fit the number of elements. 155 | pub fn shrink_to_fit(&mut self) { 156 | self.0.shrink_to_fit() 157 | } 158 | 159 | /// Shrinks to the given size. 160 | /// 161 | /// Does nothing if `len` is greater than the current size. 162 | pub fn truncate(&mut self, len: u64) { 163 | self.0.truncate(1, len); 164 | } 165 | 166 | /// Shrinks to the given size in blocks. 167 | /// 168 | /// Does nothing if `block_len` is greater than the current size in blocks. 169 | pub fn block_truncate(&mut self, block_len: usize) { 170 | self.0.block_truncate(1, block_len); 171 | } 172 | 173 | /// Sets the size to 0 while retaining the allocated storage. 174 | pub fn clear(&mut self) { 175 | self.0.clear(); 176 | } 177 | 178 | /// Returns an iterator over the bits of the bit vector 179 | pub fn iter(&self) -> Iter { 180 | Iter(vector_base::Iter::new(1, &self.0)) 181 | } 182 | } 183 | 184 | impl BitVec for BitVector { 185 | type Block = Block; 186 | 187 | #[inline] 188 | fn bit_len(&self) -> u64 { 189 | self.0.len() 190 | } 191 | 192 | fn get_bit(&self, index: u64) -> bool { 193 | self.0.get_bit(index) 194 | } 195 | 196 | #[inline] 197 | fn get_block(&self, index: usize) -> Block { 198 | self.0.get_block(index) 199 | } 200 | } 201 | 202 | impl BitVecMut for BitVector { 203 | fn set_bit(&mut self, index: u64, value: bool) { 204 | self.0.set_bit(index, value); 205 | } 206 | 207 | #[inline] 208 | fn set_block(&mut self, index: usize, value: Block) { 209 | self.0.set_block(1, index, value); 210 | } 211 | } 212 | 213 | impl BitVecPush for BitVector { 214 | fn push_bit(&mut self, value: bool) { 215 | self.0.push_bit(value); 216 | } 217 | 218 | fn pop_bit(&mut self) -> Option { 219 | self.0.pop_bit() 220 | } 221 | 222 | fn push_block(&mut self, value: Block) { 223 | self.0.push_block(1, value); 224 | } 225 | } 226 | 227 | impl fmt::Binary for BitVector { 228 | fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 229 | for bit in self { 230 | let bit = if bit {"1"} else {"0"}; 231 | try!(formatter.write_str(bit)); 232 | } 233 | 234 | Ok(()) 235 | } 236 | } 237 | 238 | impl SpaceUsage for BitVector { 239 | fn is_stack_only() -> bool { false } 240 | 241 | fn heap_bytes(&self) -> usize { 242 | self.0.heap_bytes() 243 | } 244 | } 245 | 246 | impl Default for BitVector { 247 | fn default() -> Self { 248 | BitVector::new() 249 | } 250 | } 251 | 252 | /// Iterator over `BitVector`. 253 | #[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] 254 | pub struct Iter<'a, Block: BlockType + 'a = usize> 255 | (vector_base::Iter<'a, Block>); 256 | 257 | impl<'a, Block: BlockType> Iterator for Iter<'a, Block> { 258 | type Item = bool; 259 | 260 | fn next(&mut self) -> Option { 261 | self.0.next().map(|bit| bit != Block::zero()) 262 | } 263 | 264 | fn size_hint(&self) -> (usize, Option) { 265 | self.0.size_hint() 266 | } 267 | 268 | fn count(self) -> usize { 269 | self.0.count() 270 | } 271 | 272 | fn last(self) -> Option { 273 | self.0.last().map(|bit| bit != Block::zero()) 274 | } 275 | 276 | fn nth(&mut self, n: usize) -> Option { 277 | self.0.nth(n).map(|bit| bit != Block::zero()) 278 | } 279 | } 280 | 281 | #[cfg(target_pointer_width = "64")] 282 | impl<'a, Block: BlockType> ExactSizeIterator for Iter<'a, Block> { 283 | fn len(&self) -> usize { 284 | self.0.len() 285 | } 286 | } 287 | 288 | impl<'a, Block: BlockType> DoubleEndedIterator for Iter<'a, Block> { 289 | fn next_back(&mut self) -> Option { 290 | self.0.next_back().map(|bit| bit != Block::zero()) 291 | } 292 | } 293 | 294 | impl<'a, Block: BlockType + 'a> IntoIterator for &'a BitVector { 295 | type Item = bool; 296 | type IntoIter = Iter<'a, Block>; 297 | 298 | fn into_iter(self) -> Self::IntoIter { 299 | self.iter() 300 | } 301 | } 302 | 303 | #[cfg(test)] 304 | mod test { 305 | use bit_vec::*; 306 | 307 | macro_rules! assert_bv { 308 | ($expected:expr, $actual:expr) => { 309 | assert_eq!($expected, format!("{:b}", $actual)) 310 | } 311 | } 312 | 313 | #[test] 314 | fn new() { 315 | let bit_vector: BitVector = BitVector::new(); 316 | assert_eq!(0, bit_vector.bit_len()); 317 | assert_eq!(0, bit_vector.block_len()); 318 | } 319 | 320 | #[test] 321 | fn capacity() { 322 | let bit_vector: BitVector = BitVector::new(); 323 | assert_eq!(0, bit_vector.capacity()); 324 | 325 | let bit_vector: BitVector = BitVector::with_capacity(65); 326 | assert_eq!(96, bit_vector.capacity()); 327 | } 328 | 329 | #[test] 330 | fn push_binary() { 331 | let mut bit_vector: BitVector = BitVector::new(); 332 | bit_vector.push_bit(true); 333 | bit_vector.push_bit(false); 334 | bit_vector.push_bit(false); 335 | assert_eq!("100", format!("{:b}", bit_vector)); 336 | } 337 | 338 | #[test] 339 | fn block_with_fill() { 340 | let bit_vector: BitVector = BitVector::block_with_fill(3, 0b101); 341 | assert_eq!(3, bit_vector.block_capacity()); 342 | assert_bv!("101000001010000010100000", bit_vector); 343 | } 344 | 345 | #[test] 346 | fn with_fill() { 347 | let bv0: BitVector = BitVector::with_fill(20, false); 348 | let bv1: BitVector = BitVector::with_fill(20, true); 349 | 350 | assert_eq!(false, bv0.get_bit(3)); 351 | assert_eq!(true, bv1.get_bit(3)); 352 | 353 | assert_bv!("00000000000000000000", bv0); 354 | assert_bv!("11111111111111111111", bv1); 355 | } 356 | 357 | #[test] 358 | fn push_pop() { 359 | let mut bit_vector: BitVector = BitVector::new(); 360 | bit_vector.push_bit(true); 361 | bit_vector.push_bit(false); 362 | bit_vector.push_bit(false); 363 | assert_eq!(Some(false), bit_vector.pop_bit()); 364 | assert_eq!(Some(false), bit_vector.pop_bit()); 365 | assert_eq!(Some(true), bit_vector.pop_bit()); 366 | assert_eq!(None, bit_vector.pop_bit()); 367 | } 368 | 369 | #[test] 370 | fn push_get() { 371 | let mut bit_vector: BitVector = BitVector::new(); 372 | bit_vector.push_bit(true); 373 | bit_vector.push_bit(false); 374 | bit_vector.push_bit(false); 375 | assert_eq!(3, bit_vector.bit_len()); 376 | assert_eq!(1, bit_vector.block_len()); 377 | assert_eq!(true, bit_vector.get_bit(0)); 378 | assert_eq!(false, bit_vector.get_bit(1)); 379 | assert_eq!(false, bit_vector.get_bit(2)); 380 | } 381 | 382 | #[test] 383 | #[should_panic] 384 | fn get_oob() { 385 | let mut bit_vector: BitVector = BitVector::new(); 386 | bit_vector.push_bit(true); 387 | bit_vector.get_bit(3); 388 | } 389 | 390 | #[test] 391 | fn push_block() { 392 | let mut bit_vector: BitVector = BitVector::new(); 393 | bit_vector.push_block(0); 394 | assert_bv!("00000000000000000000000000000000", bit_vector); 395 | } 396 | 397 | #[test] 398 | fn push_bits_get_block() { 399 | let mut bit_vector: BitVector = BitVector::new(); 400 | bit_vector.push_bit(true); // 1 401 | bit_vector.push_bit(true); // 2 402 | bit_vector.push_bit(false); // (4) 403 | bit_vector.push_bit(false); // (8) 404 | bit_vector.push_bit(true); // 16 405 | 406 | assert_eq!(19, bit_vector.get_block(0)); 407 | } 408 | 409 | #[test] 410 | fn push_block_get_block() { 411 | let mut bit_vector: BitVector = BitVector::new(); 412 | bit_vector.push_block(358); 413 | bit_vector.push_block(!0); 414 | assert_eq!(358, bit_vector.get_block(0)); 415 | assert_eq!(!0, bit_vector.get_block(1)); 416 | } 417 | 418 | #[test] 419 | #[should_panic] 420 | fn get_block_oob() { 421 | let mut bit_vector: BitVector = BitVector::new(); 422 | bit_vector.push_bit(true); 423 | bit_vector.get_block(3); 424 | } 425 | 426 | #[test] 427 | fn push_block_get_bit() { 428 | let mut bit_vector: BitVector = BitVector::new(); 429 | bit_vector.push_block(0b10101); 430 | assert_eq!(true, bit_vector.get_bit(0)); 431 | assert_eq!(false, bit_vector.get_bit(1)); 432 | assert_eq!(true, bit_vector.get_bit(2)); 433 | assert_eq!(false, bit_vector.get_bit(3)); 434 | assert_eq!(true, bit_vector.get_bit(4)); 435 | assert_eq!(false, bit_vector.get_bit(5)); 436 | } 437 | 438 | #[test] 439 | fn push_block_set_get() { 440 | let mut bit_vector: BitVector = BitVector::new(); 441 | bit_vector.push_block(0); 442 | bit_vector.set_bit(0, true); 443 | bit_vector.set_bit(1, true); 444 | bit_vector.set_bit(2, false); 445 | bit_vector.set_bit(3, true); 446 | bit_vector.set_bit(4, false); 447 | assert_eq!(true, bit_vector.get_bit(0)); 448 | assert_eq!(true, bit_vector.get_bit(1)); 449 | assert_eq!(false, bit_vector.get_bit(2)); 450 | assert_eq!(true, bit_vector.get_bit(3)); 451 | assert_eq!(false, bit_vector.get_bit(4)); 452 | } 453 | 454 | #[test] 455 | fn set_block_mask() { 456 | let mut bit_vector: BitVector = BitVector::new(); 457 | 458 | bit_vector.push_bit(false); 459 | bit_vector.set_block(0, 0b11); 460 | assert_eq!(0b01, bit_vector.get_block(0)); 461 | 462 | bit_vector.push_bit(false); 463 | bit_vector.set_block(0, 0b11); 464 | assert_eq!(0b11, bit_vector.get_block(0)); 465 | } 466 | 467 | #[test] 468 | fn resize() { 469 | let mut bit_vector: BitVector = BitVector::new(); 470 | 471 | bit_vector.push_bit(true); 472 | bit_vector.push_bit(false); 473 | bit_vector.push_bit(true); 474 | assert_bv!("101", bit_vector); 475 | 476 | bit_vector.resize(21, false); 477 | assert_bv!("101000000000000000000", bit_vector); 478 | 479 | bit_vector.resize(22, false); 480 | assert_bv!("1010000000000000000000", bit_vector); 481 | 482 | bit_vector.resize(5, false); 483 | assert_bv!("10100", bit_vector); 484 | 485 | bit_vector.resize(21, true); 486 | assert_bv!("101001111111111111111", bit_vector); 487 | 488 | bit_vector.resize(4, true); 489 | assert_bv!("1010", bit_vector); 490 | 491 | bit_vector.push_block(0b11111111); 492 | assert_bv!("1010000011111111", bit_vector); 493 | } 494 | 495 | #[test] 496 | fn block_resize() { 497 | let mut bit_vector: BitVector = BitVector::new(); 498 | 499 | bit_vector.push_bit(true); 500 | bit_vector.push_bit(false); 501 | bit_vector.push_bit(true); 502 | assert_bv!("101", bit_vector); 503 | 504 | bit_vector.block_resize(1, 0); 505 | assert_bv!("10100000", bit_vector); 506 | 507 | bit_vector.block_resize(3, 0b01000101); 508 | assert_bv!("101000001010001010100010", bit_vector); 509 | 510 | bit_vector.block_resize(2, 0); 511 | assert_bv!("1010000010100010", bit_vector); 512 | } 513 | } 514 | -------------------------------------------------------------------------------- /src/bit_vec/mod.rs: -------------------------------------------------------------------------------- 1 | //! Bit vector interfaces and implementations. 2 | 3 | mod traits; 4 | pub use self::traits::*; 5 | 6 | mod bit_vector; 7 | pub use self::bit_vector::*; 8 | 9 | mod bit_slice; 10 | pub use self::bit_slice::*; 11 | 12 | mod prim; 13 | pub use self::prim::*; 14 | -------------------------------------------------------------------------------- /src/bit_vec/prim.rs: -------------------------------------------------------------------------------- 1 | use storage::BlockType; 2 | use bit_vec::traits::*; 3 | 4 | macro_rules! impl_bits_prim { 5 | ( $t:ident ) 6 | => 7 | { 8 | impl BitVec for $t { 9 | type Block = $t; 10 | 11 | #[inline] 12 | fn bit_len(&self) -> u64 { 13 | Self::nbits() as u64 14 | } 15 | 16 | #[inline] 17 | fn block_len(&self) -> usize { 18 | 1 19 | } 20 | 21 | #[inline] 22 | fn get_bit(&self, position: u64) -> bool { 23 | assert!(position < self.bit_len(), 24 | "prim::get_bit: out of bounds"); 25 | BlockType::get_bit(*self, position as usize) 26 | } 27 | 28 | #[inline] 29 | fn get_block(&self, position: usize) -> Self::Block { 30 | assert!(position == 0, "prim::get_block: out of bounds"); 31 | *self 32 | } 33 | 34 | #[inline] 35 | fn get_bits(&self, start: u64, count: usize) -> Self { 36 | assert!(start + count as u64 <= Self::nbits() as u64, 37 | "prim::get_bits: out of bounds"); 38 | BlockType::get_bits(*self, start as usize, count) 39 | } 40 | } 41 | 42 | impl BitVecMut for $t { 43 | #[inline] 44 | fn set_bit(&mut self, position: u64, value: bool) { 45 | assert!(position < self.bit_len(), 46 | "prim::set_bit: out of bounds"); 47 | *self = self.with_bit(position as usize, value); 48 | } 49 | 50 | #[inline] 51 | fn set_block(&mut self, position: usize, value: Self::Block) { 52 | assert!(position == 0, "prim::set_block: out of bounds"); 53 | *self = value; 54 | } 55 | 56 | #[inline] 57 | fn set_bits(&mut self, start: u64, count: usize, value: Self::Block) { 58 | assert!(start + count as u64 <= Self::nbits() as u64, 59 | "prim::set_bits: out of bounds"); 60 | *self = self.with_bits(start as usize, count, value); 61 | } 62 | } 63 | } 64 | } 65 | 66 | impl_bits_prim!(u8); 67 | impl_bits_prim!(u16); 68 | impl_bits_prim!(u32); 69 | impl_bits_prim!(u64); 70 | impl_bits_prim!(usize); 71 | -------------------------------------------------------------------------------- /src/bit_vec/traits.rs: -------------------------------------------------------------------------------- 1 | use num_traits::{One, Zero, ToPrimitive}; 2 | 3 | use storage::{Address, BlockType}; 4 | 5 | /// Read-only bit vector operations. 6 | /// 7 | /// Minimal complete definition is `get_bit` or `get_block`, since each 8 | /// is defined in terms of the other. Note that `get_block` in terms of 9 | /// `get_bit` is inefficient, and thus you should implement `get_block` 10 | /// directly if possible. 11 | pub trait BitVec { 12 | /// The underlying block type used to store the bits of the vector. 13 | type Block: BlockType; 14 | 15 | /// The length of the slice in bits. 16 | fn bit_len(&self) -> u64; 17 | 18 | /// The length of the slice in blocks. 19 | fn block_len(&self) -> usize { 20 | self.bit_len().ceil_div(Self::Block::nbits() as u64) as usize 21 | } 22 | 23 | /// Gets the bit at `position` 24 | /// 25 | /// The default implementation calls `get_block` and masks out the 26 | /// correct bit. 27 | /// 28 | /// # Panics 29 | /// 30 | /// Panics if `position` is out of bounds. 31 | fn get_bit(&self, position: u64) -> bool { 32 | assert!(position < self.bit_len(), "BitVec::get_bit: out of bounds"); 33 | 34 | let address = Address::new::(position); 35 | let block = self.get_block(address.block_index); 36 | block.get_bit(address.bit_offset) 37 | } 38 | 39 | /// Gets the block at `position` 40 | /// 41 | /// The bits are laid out `Block::nbits()` per block, with the notional 42 | /// zeroth bit in the least significant position. If `self.bit_len()` is 43 | /// not a multiple of `Block::nbits()` then the last block will 44 | /// contain extra bits that are not part of the bit vector. 45 | /// 46 | /// The default implementation assembles a block by reading each of its 47 | /// bits. Consider it a slow reference implementation, and override it. 48 | /// 49 | /// # Panics 50 | /// 51 | /// Panics if `position` is out of bounds. 52 | fn get_block(&self, position: usize) -> Self::Block { 53 | assert!(position < self.block_len(), 54 | "IntSlice::get_block: out of bounds"); 55 | 56 | let bit_position = position as u64 * Self::Block::nbits() as u64; 57 | 58 | let mut result = Self::Block::zero(); 59 | let mut mask = Self::Block::one(); 60 | 61 | for i in 0 .. Self::Block::nbits() as u64 { 62 | if bit_position + i < self.bit_len() && self.get_bit(bit_position + i) { 63 | result = result | mask; 64 | } 65 | mask = mask << 1; 66 | } 67 | 68 | result 69 | } 70 | 71 | /// Gets `count` bits starting at bit index `start`, interpreted as a 72 | /// little-endian integer. 73 | /// 74 | /// # Panics 75 | /// 76 | /// Panics if the bit span goes out of bounds. 77 | fn get_bits(&self, start: u64, count: usize) -> Self::Block { 78 | let limit = start + count as u64; 79 | assert!(limit <= self.bit_len(), "BitVec::get_bits: out of bounds"); 80 | 81 | let address = Address::new::(start); 82 | let margin = Self::Block::nbits() - address.bit_offset; 83 | 84 | if margin >= count { 85 | let block = self.get_block(address.block_index); 86 | return block.get_bits(address.bit_offset, count) 87 | } 88 | 89 | let extra = count - margin; 90 | 91 | let block1 = self.get_block(address.block_index); 92 | let block2 = self.get_block(address.block_index + 1); 93 | 94 | let low_bits = block1.get_bits(address.bit_offset, margin); 95 | let high_bits = block2.get_bits(0, extra); 96 | 97 | (high_bits << margin) | low_bits 98 | } 99 | } 100 | 101 | /// Mutable bit vector operations that don’t affect the length. 102 | /// 103 | /// Minimal complete definition is `set_bit` or `set_block`, since each 104 | /// is defined in terms of the other. Note that `set_block` in terms of 105 | /// `set_bit` is inefficient, and thus you should implement `set_block` 106 | /// directly if possible. 107 | pub trait BitVecMut: BitVec { 108 | /// Sets the bit at `position` to `value`. 109 | /// 110 | /// The default implementation uses `get_block` and `set_block`. 111 | /// 112 | /// # Panics 113 | /// 114 | /// Panics if `position` is out of bounds. 115 | fn set_bit(&mut self, position: u64, value: bool) { 116 | assert!(position < self.bit_len(), "BitVecMut::set_bit: out of bounds"); 117 | 118 | let address = Address::new::(position); 119 | let old_block = self.get_block(address.block_index); 120 | let new_block = old_block.with_bit(address.bit_offset, value); 121 | self.set_block(address.block_index, new_block); 122 | } 123 | 124 | /// Sets the block at `position` to `value`. 125 | /// 126 | /// The bits are laid out `Block::nbits()` per block, with the notional 127 | /// zeroth bit in the least significant position. If `self.bit_len()` is 128 | /// not a multiple of `Block::nbits()` then the last block will 129 | /// contain extra bits that are not part of the bit vector. Implementations 130 | /// of `set_block` should not change those trailing bits. 131 | /// 132 | /// The default implementation sets a block by setting each of its bits 133 | /// in turn. Consider it a slow reference implementation, and override it. 134 | /// 135 | /// # Panics 136 | /// 137 | /// Panics if `position` is out of bounds. 138 | fn set_block(&mut self, position: usize, mut value: Self::Block) { 139 | let limit = if position + 1 == self.block_len() { 140 | Self::Block::last_block_bits(self.bit_len()) 141 | } else { 142 | Self::Block::nbits() 143 | }; 144 | 145 | let start = Self::Block::mul_nbits(position); 146 | for i in 0 .. limit as u64 { 147 | let bit = value & Self::Block::one() != Self::Block::zero(); 148 | self.set_bit(start + i, bit); 149 | value = value >> 1; 150 | } 151 | } 152 | 153 | /// Sets `count` bits starting at bit index `start`, interpreted as a 154 | /// little-endian integer. 155 | /// 156 | /// # Panics 157 | /// 158 | /// Panics if the bit span goes out of bounds. 159 | fn set_bits(&mut self, start: u64, count: usize, value: Self::Block) { 160 | let limit = start + count as u64; 161 | assert!(limit <= self.bit_len(), "BitVecMut::set_bits: out of bounds"); 162 | 163 | let address = Address::new::(start); 164 | let margin = Self::Block::nbits() - address.bit_offset; 165 | 166 | if margin >= count { 167 | let old_block = self.get_block(address.block_index); 168 | let new_block = old_block.with_bits(address.bit_offset, count, value); 169 | self.set_block(address.block_index, new_block); 170 | return; 171 | } 172 | 173 | let extra = count - margin; 174 | 175 | let old_block1 = self.get_block(address.block_index); 176 | let old_block2 = self.get_block(address.block_index + 1); 177 | 178 | let high_bits = value >> margin; 179 | 180 | let new_block1 = old_block1.with_bits(address.bit_offset, 181 | margin, value); 182 | let new_block2 = old_block2.with_bits(0, extra, high_bits); 183 | 184 | self.set_block(address.block_index, new_block1); 185 | self.set_block(address.block_index + 1, new_block2); 186 | } 187 | } 188 | 189 | /// Bit vector operations that change the length. 190 | pub trait BitVecPush: BitVecMut { 191 | /// Adds the given bit to the end of the bit vector. 192 | fn push_bit(&mut self, value: bool); 193 | 194 | /// Removes and returns the last bit, if any. 195 | fn pop_bit(&mut self) -> Option; 196 | 197 | /// Pushes `value` 0 or more times until the size of the bit 198 | /// vector is block-aligned. 199 | fn align_block(&mut self, value: bool) { 200 | while Self::Block::mod_nbits(self.bit_len()) != 0 { 201 | self.push_bit(value); 202 | } 203 | } 204 | 205 | /// Pushes the given block onto the end of the bit vector. 206 | /// 207 | /// If the end of the bit vector is not currently block-aligned, 208 | /// it pads with 0s up to the next block before pushing. 209 | /// 210 | /// The default implementation pushes the block one bit at a time; 211 | /// override it with something more efficient. 212 | fn push_block(&mut self, mut value: Self::Block) { 213 | self.align_block(false); 214 | 215 | for _ in 0 .. Self::Block::nbits() { 216 | self.push_bit(value & Self::Block::one() != Self::Block::zero()); 217 | value = value >> 1; 218 | } 219 | } 220 | } 221 | 222 | impl BitVec for [Block] { 223 | type Block = Block; 224 | 225 | #[inline] 226 | fn bit_len(&self) -> u64 { 227 | self.len() as u64 * Block::nbits() as u64 228 | } 229 | 230 | #[inline] 231 | fn block_len(&self) -> usize { 232 | self.len() 233 | } 234 | 235 | #[inline] 236 | fn get_block(&self, position: usize) -> Block { 237 | self[position] 238 | } 239 | } 240 | 241 | impl BitVecMut for [Block] { 242 | #[inline] 243 | fn set_block(&mut self, position: usize, value: Block) { 244 | self[position] = value; 245 | } 246 | } 247 | 248 | impl<'a, Block: BlockType> BitVec for &'a [Block] { 249 | type Block = Block; 250 | 251 | #[inline] 252 | fn bit_len(&self) -> u64 { 253 | self.len() as u64 * Block::nbits() as u64 254 | } 255 | 256 | #[inline] 257 | fn block_len(&self) -> usize { 258 | self.len() 259 | } 260 | 261 | #[inline] 262 | fn get_block(&self, position: usize) -> Block { 263 | self[position] 264 | } 265 | } 266 | 267 | impl<'a, Block: BlockType> BitVec for &'a mut [Block] { 268 | type Block = Block; 269 | 270 | #[inline] 271 | fn bit_len(&self) -> u64 { 272 | self.len() as u64 * Block::nbits() as u64 273 | } 274 | 275 | #[inline] 276 | fn block_len(&self) -> usize { 277 | self.len() 278 | } 279 | 280 | #[inline] 281 | fn get_block(&self, position: usize) -> Block { 282 | self[position] 283 | } 284 | } 285 | 286 | impl<'a, Block: BlockType> BitVecMut for &'a mut [Block] { 287 | #[inline] 288 | fn set_block(&mut self, position: usize, value: Block) { 289 | self[position] = value; 290 | } 291 | } 292 | 293 | impl BitVec for Vec { 294 | type Block = Block; 295 | 296 | #[inline] 297 | fn bit_len(&self) -> u64 { 298 | self.len() as u64 * Block::nbits() as u64 299 | } 300 | 301 | #[inline] 302 | fn block_len(&self) -> usize { 303 | self.len() 304 | } 305 | 306 | #[inline] 307 | fn get_block(&self, position: usize) -> Block { 308 | self[position] 309 | } 310 | } 311 | 312 | impl BitVecMut for Vec { 313 | #[inline] 314 | fn set_block(&mut self, position: usize, value: Block) { 315 | self[position] = value; 316 | } 317 | } 318 | 319 | impl BitVec for Vec { 320 | type Block = u8; // This is bogus 321 | 322 | #[inline] 323 | fn bit_len(&self) -> u64 { 324 | self.len() as u64 325 | } 326 | 327 | fn get_bit(&self, position: u64) -> bool { 328 | self[position.to_usize().expect("Vec::get_bit: overflow")] 329 | } 330 | } 331 | 332 | impl BitVecMut for Vec { 333 | fn set_bit(&mut self, position: u64, value: bool) { 334 | let position = position.to_usize() 335 | .expect("Vec::set_bit: overflow"); 336 | self[position] = value; 337 | } 338 | } 339 | 340 | impl BitVecPush for Vec { 341 | fn push_bit(&mut self, value: bool) { 342 | self.push(value); 343 | } 344 | 345 | fn pop_bit(&mut self) -> Option { 346 | self.pop() 347 | } 348 | } 349 | 350 | -------------------------------------------------------------------------------- /src/broadword.rs: -------------------------------------------------------------------------------- 1 | //! Broadword operations treating `u64` as a parallel vector. 2 | //! 3 | //! From [Sebastiano Vigna, “Broadword Implementation of 4 | //! Rank/Select Queries.”](http://sux.di.unimi.it/paper.pdf) Changes from 5 | //! that work: 6 | //! 7 | //! - It uses a 17-digit (68-bit) constant “0x0F0F0F0F0F0F0F0F0.” I believe 8 | //! the correct constant is these 64 bits: 0x0F0F_0F0F_0F0F_0F0F. 9 | //! 10 | //! - Arithmetic operations are assumed to wrap on overflow. If this 11 | //! were not the case, Algorithm 1 ([count_ones](fn.count_ones.html)) 12 | //! would overflow its last line, when multiplying by L₈. 13 | //! 14 | //! - Line 2 of Algorithm 2 should read 15 | //! 16 | //! ``` 17 | //! # let mut s = 0u64; 18 | //! s = (s & 0x3333_3333_3333_3333) + ((s >> 2) & 0x3333_3333_3333_3333); 19 | //! ``` 20 | //! 21 | //! In the paper, the shifted `s` appears as `x`. 22 | 23 | use rank::{BitRankSupport, RankSupport}; 24 | use select::Select1Support; 25 | use storage::BlockType; 26 | 27 | /// Newtype for treating a `u64` as a rank or select structure. 28 | pub struct Broadword(pub u64); 29 | 30 | impl BitRankSupport for Broadword { 31 | fn rank1(&self, position: u64) -> u64 { 32 | debug_assert!(position < 64); 33 | count_ones(self.0 & u64::low_mask(position as usize + 1)) as u64 34 | } 35 | } 36 | 37 | impl RankSupport for Broadword { 38 | type Over = bool; 39 | 40 | fn rank(&self, position: u64, value: bool) -> u64 { 41 | if value {self.rank1(position)} else {self.rank0(position)} 42 | } 43 | 44 | fn limit(&self) -> u64 { 64 } 45 | } 46 | 47 | impl Select1Support for Broadword { 48 | fn select1(&self, index: u64) -> Option { 49 | select1(index as usize, self.0).map(|u| u as u64) 50 | } 51 | } 52 | 53 | /// Has the lowest bit of every byte set: `0x0101_0101_0101_0101`. 54 | pub const L8: u64 = 0x0101_0101_0101_0101; 55 | 56 | /// Has the highest bit of every byte set: `0x8080_8080_8080_8080`. 57 | pub const H8: u64 = 0x8080_8080_8080_8080; 58 | 59 | /// Counts the number of ones in a `u64`. 60 | /// 61 | /// Uses the broadword algorithm from Vigna. 62 | pub fn count_ones(mut x: u64) -> usize { 63 | x = x - ((x & 0xAAAA_AAAA_AAAA_AAAA) >> 1); 64 | x = (x & 0x3333_3333_3333_3333) + ((x >> 2) & 0x3333_3333_3333_3333); 65 | x = (x + (x >> 4)) & 0x0F0F_0F0F_0F0F_0F0F; 66 | (x.wrapping_mul(L8) >> 56) as usize 67 | } 68 | 69 | /// Finds the index of the `r`th one bit in `x`. 70 | /// 71 | /// Uses the broadword algorithm from Vigna. 72 | pub fn select1(r: usize, x: u64) -> Option { 73 | let result = select1_raw(r, x); 74 | if result == 72 {None} else {Some(result)} 75 | } 76 | 77 | /// Finds the index of the `r`th one bit in `x`, returning 72 when not found. 78 | /// 79 | /// Uses the broadword algorithm from Vigna. 80 | pub fn select1_raw(r: usize, x: u64) -> usize { 81 | let r = r as u64; 82 | let mut s = x - ((x & 0xAAAA_AAAA_AAAA_AAAA) >> 1); 83 | s = (s & 0x3333_3333_3333_3333) + ((s >> 2) & 0x3333_3333_3333_3333); 84 | s = ((s + (s >> 4)) & 0x0F0F_0F0F_0F0F_0F0F).wrapping_mul(L8); 85 | let b = (le8(s, r.wrapping_mul(L8)) >> 7).wrapping_mul(L8)>> 53; 86 | let l = r - ((s << 8).wrapping_shr(b as u32) & 0xFF); 87 | s = (u_nz8((x.wrapping_shr(b as u32) & 0xFF) 88 | .wrapping_mul(L8) & 0x8040_2010_0804_0201) >> 7) 89 | .wrapping_mul(L8); 90 | (b + ((le8(s, l.wrapping_mul(L8)) >> 7).wrapping_mul(L8) >> 56)) as usize 91 | } 92 | 93 | /// Parallel ≤, treating a `u64` as a vector of 8 `u8`s. 94 | pub fn u_le8(x: u64, y: u64) -> u64 { 95 | ((((y | H8) - (x & !H8)) | (x ^ y)) ^ (x & !y)) & H8 96 | } 97 | 98 | /// Parallel ≤, treating a `u64` as a vector of 8 `i8`s. 99 | pub fn le8(x: u64, y: u64) -> u64 { 100 | (((y | H8) - (x & !H8)) ^ x ^ y) & H8 101 | } 102 | 103 | /// Parallel >0, treating a `u64` as a vector of 8 `u8`s. 104 | pub fn u_nz8(x: u64) -> u64 { 105 | (((x | H8) - L8) | x) & H8 106 | } 107 | 108 | #[cfg(test)] 109 | mod test { 110 | use std::hash::{Hash, Hasher}; 111 | use std::collections::hash_map::DefaultHasher; 112 | use quickcheck::{quickcheck, TestResult}; 113 | 114 | use super::*; 115 | use select::{BinSearchSelect, Select1Support}; 116 | 117 | #[test] 118 | fn count_ones_0() { 119 | assert_eq!(0, count_ones(0)); 120 | } 121 | 122 | #[test] 123 | fn count_ones_1() { 124 | assert_eq!(1, count_ones(1)); 125 | } 126 | 127 | #[test] 128 | fn count_ones_0000_0000_0000_0010() { 129 | assert_eq!(1, count_ones(0x0000_0000_0000_0010)); 130 | } 131 | 132 | #[test] 133 | fn count_ones_1000_0000_0000_0000() { 134 | assert_eq!(1, count_ones(0x1000_0000_0000_0000)); 135 | } 136 | 137 | #[test] 138 | fn count_ones_ffff_ffff_ffff_ffff() { 139 | assert_eq!(64, count_ones(0xFFFF_FFFF_FFFF_FFFF)); 140 | } 141 | 142 | 143 | fn count_ones_prop(word: u64) -> bool { 144 | count_ones(word) == word.count_ones() as usize 145 | } 146 | 147 | fn count_ones_prop_hash(word: u64) -> bool { 148 | count_ones_prop(hash(&word)) 149 | } 150 | 151 | #[test] 152 | fn count_ones_qc() { 153 | quickcheck(count_ones_prop as fn(u64) -> bool); 154 | } 155 | 156 | #[test] 157 | fn count_ones_qc_hash() { 158 | quickcheck(count_ones_prop_hash as fn(u64) -> bool); 159 | } 160 | 161 | #[test] 162 | fn select1_0_0() { 163 | assert_eq!(None, select1(0, 0)); 164 | } 165 | 166 | #[test] 167 | fn select1_0_1() { 168 | assert_eq!(Some(0), select1(0, 1)); 169 | } 170 | 171 | #[test] 172 | fn select1_0_2() { 173 | assert_eq!(Some(1), select1(0, 2)); 174 | } 175 | 176 | #[test] 177 | fn select1_0_3() { 178 | assert_eq!(Some(0), select1(0, 3)); 179 | } 180 | 181 | #[test] 182 | fn select1_1_2() { 183 | assert_eq!(None, select1(1, 2)); 184 | } 185 | 186 | #[test] 187 | fn select1_1_3() { 188 | assert_eq!(Some(1), select1(1, 3)); 189 | } 190 | 191 | #[test] 192 | fn select1_3_13() { 193 | assert_eq!(None, select1(3, 0b1101)); 194 | } 195 | 196 | fn select1_prop(r: u8, x: u64) -> TestResult { 197 | if r > 64 { return TestResult::discard(); } 198 | 199 | let ss = BinSearchSelect::new(x); 200 | TestResult::from_bool( 201 | select1(r as usize, x).map(|n| n as u64) 202 | == ss.select1(r as u64)) 203 | } 204 | 205 | fn select1_prop_hash(r: u8, x: u64) -> TestResult { 206 | select1_prop(r, hash(&x)) 207 | } 208 | 209 | #[test] 210 | fn select1_qc() { 211 | quickcheck(select1_prop as fn(u8, u64) -> TestResult); 212 | } 213 | 214 | #[test] 215 | fn select1_qc_hash() { 216 | quickcheck(select1_prop_hash as fn(u8, u64) -> TestResult); 217 | } 218 | 219 | fn u_nz8_prop((n0, n1, n2, n3): (u64, u64, u64, u64)) -> bool { 220 | let n = hash(&(n0, n1, n2, n3)); 221 | let r = u_nz8(n); 222 | for i in 0..8 { 223 | let ni = n.get_bits(8 * i, 8); 224 | let ri = r.get_bits(8 * i, 8); 225 | if (ni != 0) != (ri == 0x80) { 226 | return false; 227 | } 228 | } 229 | 230 | true 231 | } 232 | 233 | #[test] 234 | fn u_nz8_qc() { 235 | quickcheck(u_nz8_prop as fn((u64, u64, u64, u64)) -> bool); 236 | } 237 | 238 | #[test] 239 | fn u_nz8_works() { 240 | assert_eq!(b(0, 0, 0, 0, 0, 0, 0, 0), 241 | u_nz8(u(0, 0, 0, 0, 0, 0, 0, 0))); 242 | 243 | assert_eq!(b( 1, 1, 0, 1, 0, 1, 1, 1), 244 | u_nz8(u(45, 12, 0, 129, 0, 3, 80, 1))); 245 | 246 | assert_eq!(b(1, 1, 1, 1, 1, 1, 1, 1), 247 | u_nz8(u(1, 2, 3, 4, 5, 6, 7, 8))); 248 | 249 | assert_eq!(b( 1, 1, 1, 1, 0, 1, 1, 1), 250 | u_nz8(0xFF_FF_FF_FF_00_FF_FF_FF)); 251 | } 252 | 253 | // // I don’t understand le8, apparently. 254 | 255 | // #[test] 256 | // fn le8_128_0() { 257 | // let n = 128; 258 | // let m = 0; 259 | // let r = le8(n, m); 260 | // let n0 = n.get_bits(0, 8) as u8 as i8; 261 | // let m0 = m.get_bits(0, 8) as u8 as i8; 262 | // let r0 = r.get_bits(0, 8); 263 | // println!("n0: {}, m0: {}, r0: {}", n0, m0, r0); 264 | // assert_eq!(n0 <= m0, r0 == 0x80); 265 | // } 266 | 267 | // fn le8_prop_hashed((n0, n1, n2, n3): (u64, u64, u64, u64), 268 | // (m0, m1, m2, m3): (u64, u64, u64, u64)) -> bool { 269 | // let n = hash(&(n0, n1, n2, n3)); 270 | // let m = hash(&(m0, m1, m2, m3)); 271 | // le8_prop(n, m) 272 | // } 273 | // 274 | // fn le8_prop(n: u64, m: u64) -> bool { 275 | // let r = le8(n, m); 276 | // for i in 0..8 { 277 | // let ni = n.get_bits(8 * i, 8) as u8 as i8; 278 | // let mi = m.get_bits(8 * i, 8) as u8 as i8; 279 | // let ri = r.get_bits(8 * i, 8); 280 | // if (ni <= mi) != (ri == 0x80) { 281 | // return false; 282 | // } 283 | // } 284 | // 285 | // true 286 | // } 287 | // 288 | // #[test] 289 | // fn le8_qc() { 290 | // quickcheck(le8_prop as fn(u64, u64) -> bool); 291 | // } 292 | // 293 | // #[test] 294 | // fn le8_qc_hashed() { 295 | // quickcheck(le8_prop_hashed as fn((u64, u64, u64, u64), 296 | // (u64, u64, u64, u64)) -> bool); 297 | // } 298 | 299 | fn u_le8_prop_hashed((n0, n1, n2, n3): (u64, u64, u64, u64), 300 | (m0, m1, m2, m3): (u64, u64, u64, u64)) -> bool { 301 | let n = hash(&(n0, n1, n2, n3)); 302 | let m = hash(&(m0, m1, m2, m3)); 303 | u_le8_prop(n, m) 304 | } 305 | 306 | fn u_le8_prop(n: u64, m: u64) -> bool { 307 | let r = u_le8(n, m); 308 | for i in 0..8 { 309 | let ni = n.get_bits(8 * i, 8); 310 | let mi = m.get_bits(8 * i, 8); 311 | let ri = r.get_bits(8 * i, 8); 312 | if (ni <= mi) != (ri == 0x80) { 313 | return false; 314 | } 315 | } 316 | 317 | true 318 | } 319 | 320 | #[test] 321 | fn u_le8_qc() { 322 | quickcheck(u_le8_prop as fn(u64, u64) -> bool); 323 | } 324 | 325 | #[test] 326 | fn u_le8_qc_hashed() { 327 | quickcheck(u_le8_prop_hashed as fn((u64, u64, u64, u64), 328 | (u64, u64, u64, u64)) -> bool); 329 | } 330 | 331 | #[test] 332 | fn le8_works() { 333 | assert_eq!(b( 1, 1, 1, 1, 0, 0, 0, 0), 334 | le8(i( 0, 0, 0, 0, 0, 0, 0, 0), 335 | i( 3, 2, 1, 0, -1, -2, -3, -4))); 336 | 337 | assert_eq!(b( 0, 0, 0, 1, 1, 1, 1, 1), 338 | le8(i( 3, 2, 1, 0, -1, -2, -3, -4), 339 | i( 0, 0, 0, 0, 0, 0, 0, 0))); 340 | 341 | assert_eq!(b( 0, 0, 1, 1, 1, 1, 1, 1), 342 | le8(i(19, 18, 17, 16, 15, 0, -1, -2), 343 | i(17, 17, 17, 17, 17, 17, 17, 17))); 344 | 345 | assert_eq!(b( 1, 1, 0, 0, 0, 0, 0, 0), 346 | le8(i(-9, -8, -7, 0, 1, 2, 3, 4), 347 | i(-8, -8, -8, -8, -8, -8, -8, -8))); 348 | 349 | assert_eq!(b( 0, 1, 0, 1, 1, 0, 1, 0), 350 | le8(i( 8, 3, 46, 0, 0, 0, -6, -1), 351 | i( 7, 3, 24, 1, 0, -9, 5, -2))); 352 | } 353 | 354 | #[test] 355 | fn u_le8_works() { 356 | assert_eq!(b( 1, 1, 1, 1, 1, 1, 1, 1), 357 | u_le8(u( 0, 0, 0, 0, 0, 0, 0, 0), 358 | u( 7, 6, 5, 4, 3, 2, 1, 0))); 359 | 360 | assert_eq!(b( 1, 0, 0, 0, 0, 0, 0, 0), 361 | u_le8(u( 0, 1, 2, 3, 4, 5, 6, 7), 362 | u( 0, 0, 0, 0, 0, 0, 0, 0))); 363 | 364 | assert_eq!(b( 0, 0, 1, 1, 1, 1, 1, 1), 365 | u_le8(u(19, 18, 17, 16, 15, 14, 13, 12), 366 | u(17, 17, 17, 17, 17, 17, 17, 17))); 367 | 368 | assert_eq!(b( 0, 1, 0, 1, 1, 0, 1, 0), 369 | u_le8(u( 8, 3, 46, 0, 0, 9, 3, 2), 370 | u( 7, 3, 24, 1, 0, 0, 5, 1))); 371 | } 372 | 373 | /// Helpers for creating u64s. 374 | 375 | fn b(a: u64, b: u64, c: u64, d: u64, 376 | e: u64, f: u64, g: u64, h: u64) -> u64 { 377 | (a << 63) | (b << 55) | (c << 47) | (d << 39) | 378 | (e << 31) | (f << 23) | (g << 15) | (h << 7) 379 | } 380 | 381 | fn u(a: u8, b: u8, c: u8, d: u8, 382 | e: u8, f: u8, g: u8, h: u8) -> u64 { 383 | ((a as u64) << 56) 384 | | ((b as u64) << 48) 385 | | ((c as u64) << 40) 386 | | ((d as u64) << 32) 387 | | ((e as u64) << 24) 388 | | ((f as u64) << 16) 389 | | ((g as u64) << 8) 390 | | (h as u64) 391 | } 392 | 393 | fn i(a: i8, b: i8, c: i8, d: i8, 394 | e: i8, f: i8, g: i8, h: i8) -> u64 { 395 | u(a as u8, b as u8, c as u8, d as u8, 396 | e as u8, f as u8, g as u8, h as u8) 397 | } 398 | 399 | fn hash(t: &T) -> u64 { 400 | let mut s = DefaultHasher::new(); 401 | t.hash(&mut s); 402 | s.finish() 403 | } 404 | } 405 | 406 | -------------------------------------------------------------------------------- /src/coding/comma.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use internal::errors::*; 3 | use stream::*; 4 | 5 | /// `Comma(n)` encodes in base 2n - 1, using n bits per digit. 6 | pub struct Comma(pub u8); 7 | 8 | /// `Comma(2)` encodes in base 3. 9 | pub const COMMA: Comma = Comma(2); 10 | 11 | impl UniversalCode for Comma { 12 | fn encode(&self, sink: &mut W, mut value: u64) -> Result<()> { 13 | let base = (1 << self.0) - 1; 14 | let mut stack: Vec = Vec::new(); 15 | 16 | while value > 0 { 17 | stack.push(value % base); 18 | value /= base; 19 | } 20 | 21 | while let Some(digit) = stack.pop() { 22 | try!(sink.write_int(self.0 as usize, digit)); 23 | } 24 | 25 | try!(sink.write_int(self.0 as usize, base)); 26 | 27 | Ok(()) 28 | } 29 | 30 | fn decode(&self, source: &mut R) -> Result> { 31 | let base = (1 << self.0) - 1; 32 | let mut result = 0; 33 | let mut consumed = false; 34 | 35 | loop { 36 | if let Some(digit) = try!(source.read_int::(self.0 as usize)) { 37 | if digit == base { return Ok(Some(result)) } 38 | 39 | consumed = true; 40 | result = result * base + digit; 41 | } else if consumed { 42 | return out_of_bits("Comma::decode"); 43 | } else { 44 | return Ok(None); 45 | } 46 | } 47 | } 48 | } 49 | 50 | #[cfg(test)] 51 | mod test { 52 | use std::collections::VecDeque; 53 | use quickcheck::quickcheck; 54 | use coding::*; 55 | use coding::properties; 56 | 57 | #[test] 58 | fn enc234() { 59 | let mut dv = VecDeque::::new(); 60 | 61 | COMMA.encode(&mut dv, 2).unwrap(); 62 | COMMA.encode(&mut dv, 3).unwrap(); 63 | COMMA.encode(&mut dv, 4).unwrap(); 64 | 65 | assert_eq!(Some(2), COMMA.decode(&mut dv).unwrap()); 66 | assert_eq!(Some(3), COMMA.decode(&mut dv).unwrap()); 67 | assert_eq!(Some(4), COMMA.decode(&mut dv).unwrap()); 68 | assert_eq!(None::, COMMA.decode(&mut dv).unwrap()); 69 | } 70 | 71 | #[test] 72 | fn qc_comma2() { 73 | fn prop(v: Vec) -> bool { 74 | properties::code_decode(&Comma(2), v) 75 | } 76 | 77 | quickcheck(prop as fn(Vec) -> bool); 78 | } 79 | 80 | #[test] 81 | fn qc_comma3() { 82 | fn prop(v: Vec) -> bool { 83 | properties::code_decode(&Comma(3), v) 84 | } 85 | 86 | quickcheck(prop as fn(Vec) -> bool); 87 | } 88 | 89 | #[test] 90 | fn qc_comma4() { 91 | fn prop(v: Vec) -> bool { 92 | properties::code_decode(&Comma(4), v) 93 | } 94 | 95 | quickcheck(prop as fn(Vec) -> bool); 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /src/coding/elias.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use internal::errors::*; 3 | use stream::*; 4 | 5 | /// An Elias code. 6 | /// 7 | /// Elias codes do not handle 0. 8 | /// 9 | /// An Elias code first encodes the size of the number using some other 10 | /// code—this is the `Header` parameter. 11 | pub struct Elias(pub Header); 12 | 13 | /// An Elias gamma code encodes the header in unary. 14 | pub type Gamma = Elias; 15 | 16 | /// An instance of `Gamma`. 17 | pub const GAMMA : Gamma = Elias(Unary); 18 | 19 | /// An Elias delta code encodes the header using the Elias gamma code. 20 | pub type Delta = Elias>; 21 | 22 | /// An instance of `Delta`. 23 | pub const DELTA : Delta = Elias(Lift0(GAMMA)); 24 | 25 | /// An Elias omega code iterates the Elias encoding. 26 | pub struct Omega; 27 | 28 | const WORD_BITS: u32 = 64; 29 | 30 | impl UniversalCode for Elias
{ 31 | fn encode(&self, sink: &mut W, value: u64) -> Result<()> { 32 | assert!(value != 0, "Elias codes do not handle 0"); 33 | 34 | let nbits: u32 = WORD_BITS - 1 - value.leading_zeros(); 35 | try!(self.0.encode(sink, nbits as u64)); 36 | sink.write_int(nbits as usize, value) 37 | } 38 | 39 | fn decode(&self, source: &mut R) -> Result> { 40 | if let Some(nbits) = try!(self.0.decode(source)) { 41 | if nbits > WORD_BITS as u64 - 1 { 42 | return too_many_bits("Elias::decode"); 43 | } 44 | 45 | if let Some(low_bits) = try!(source.read_int::(nbits as usize)) 46 | { 47 | Ok(Some(low_bits | (1 << nbits))) 48 | } else { 49 | out_of_bits("Elias::decode") 50 | } 51 | } else { 52 | Ok(None) 53 | } 54 | } 55 | } 56 | 57 | impl UniversalCode for Omega { 58 | fn encode(&self, sink: &mut W, mut value: u64) -> Result<()> { 59 | let mut stack = Vec::<(usize, u64)>::new(); 60 | 61 | while value > 1 { 62 | let nbits = WORD_BITS - value.leading_zeros(); 63 | stack.push((nbits as usize, value)); 64 | value = nbits as u64 - 1; 65 | } 66 | 67 | while let Some((nbits, value)) = stack.pop() { 68 | try!(sink.write_int_be(nbits, value)); 69 | } 70 | try!(sink.write_bit(false)); 71 | 72 | Ok(()) 73 | } 74 | 75 | fn decode(&self, source: &mut R) -> Result> { 76 | let mut result: u64 = 1; 77 | 78 | loop { 79 | if let Some(bit) = try!(source.read_bit()) { 80 | if !bit { return Ok(Some(result)); } 81 | 82 | if let Some(next) = 83 | try!(source.read_int_be::(result as usize)) { 84 | result = next | (1 << result as u32) 85 | } else { 86 | return out_of_bits("Omega::decode"); 87 | } 88 | } else if result == 1 { 89 | return Ok(None); 90 | } else { 91 | return out_of_bits("Omega::decode"); 92 | } 93 | } 94 | } 95 | } 96 | 97 | #[cfg(test)] 98 | mod test { 99 | use std::collections::VecDeque; 100 | use quickcheck::quickcheck; 101 | use coding::*; 102 | use coding::properties; 103 | 104 | #[test] 105 | fn gamma() { 106 | let mut dv = VecDeque::::new(); 107 | 108 | GAMMA.encode(&mut dv, 2).unwrap(); 109 | GAMMA.encode(&mut dv, 3).unwrap(); 110 | GAMMA.encode(&mut dv, 4).unwrap(); 111 | 112 | assert_eq!(Some(2), GAMMA.decode(&mut dv).unwrap()); 113 | assert_eq!(Some(3), GAMMA.decode(&mut dv).unwrap()); 114 | assert_eq!(Some(4), GAMMA.decode(&mut dv).unwrap()); 115 | assert_eq!(None::, GAMMA.decode(&mut dv).unwrap()); 116 | } 117 | 118 | #[test] 119 | fn delta() { 120 | let mut dv = VecDeque::::new(); 121 | 122 | DELTA.encode(&mut dv, 2).unwrap(); 123 | DELTA.encode(&mut dv, 3).unwrap(); 124 | DELTA.encode(&mut dv, 38932).unwrap(); 125 | DELTA.encode(&mut dv, 4).unwrap(); 126 | 127 | assert_eq!(Some(2), DELTA.decode(&mut dv).unwrap()); 128 | assert_eq!(Some(3), DELTA.decode(&mut dv).unwrap()); 129 | assert_eq!(Some(38932), DELTA.decode(&mut dv).unwrap()); 130 | assert_eq!(Some(4), DELTA.decode(&mut dv).unwrap()); 131 | assert_eq!(None::, DELTA.decode(&mut dv).unwrap()); 132 | } 133 | 134 | #[test] 135 | fn omega() { 136 | let mut dv = VecDeque::::new(); 137 | 138 | Omega.encode(&mut dv, 2).unwrap(); 139 | Omega.encode(&mut dv, 3).unwrap(); 140 | Omega.encode(&mut dv, 38932).unwrap(); 141 | Omega.encode(&mut dv, 4).unwrap(); 142 | 143 | assert_eq!(Some(2), Omega.decode(&mut dv).unwrap()); 144 | assert_eq!(Some(3), Omega.decode(&mut dv).unwrap()); 145 | assert_eq!(Some(38932), Omega.decode(&mut dv).unwrap()); 146 | assert_eq!(Some(4), Omega.decode(&mut dv).unwrap()); 147 | assert_eq!(None::, Omega.decode(&mut dv).unwrap()); 148 | } 149 | 150 | #[test] 151 | fn qc_gamma() { 152 | fn prop_gamma(v: Vec) -> bool { 153 | properties::code_decode(&GAMMA, v) 154 | } 155 | 156 | quickcheck(prop_gamma as fn(Vec) -> bool); 157 | } 158 | 159 | #[test] 160 | fn qc_delta() { 161 | fn prop_delta(v: Vec) -> bool { 162 | properties::code_decode(&DELTA, v) 163 | } 164 | 165 | quickcheck(prop_delta as fn(Vec) -> bool); 166 | } 167 | 168 | #[test] 169 | fn qc_omega() { 170 | fn prop_omega(v: Vec) -> bool { 171 | properties::code_decode(&Omega, v) 172 | } 173 | 174 | quickcheck(prop_omega as fn(Vec) -> bool); 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /src/coding/fib.rs: -------------------------------------------------------------------------------- 1 | use std::mem; 2 | 3 | use super::*; 4 | use internal::errors::*; 5 | use stream::*; 6 | 7 | /// A Fibonacci code. 8 | pub struct Fibonacci; 9 | 10 | struct Fib { 11 | i_1: u64, 12 | i: u64, 13 | } 14 | 15 | impl Fib { 16 | fn new() -> Self { 17 | Fib { 18 | i_1: 1, 19 | i: 1, 20 | } 21 | } 22 | 23 | fn next(&mut self) -> Result<()> { 24 | if let Some(next) = self.i_1.checked_add(self.i) { 25 | self.i_1 = self.i; 26 | self.i = next; 27 | Ok(()) 28 | } else { 29 | too_many_bits("Fibonacci") 30 | } 31 | } 32 | 33 | fn prev(&mut self) { 34 | self.i -= self.i_1; 35 | mem::swap(&mut self.i, &mut self.i_1); 36 | } 37 | } 38 | 39 | impl UniversalCode for Fibonacci { 40 | fn encode(&self, sink: &mut W, mut value: u64) -> Result<()> { 41 | assert!(value != 0, "Fibonacci codes cannot handle 0."); 42 | let mut fib = Fib::new(); 43 | 44 | // Having to compute fib.i when we really just need fib.i_1 45 | // means that this gives up on smaller numbers than it needs to. 46 | while fib.i <= value { 47 | try!(fib.next()); 48 | } 49 | 50 | // Now fib.i_1 is the largest Fibonacci number <= value 51 | 52 | let mut stack = vec![true]; 53 | while fib.i > 1 { 54 | if fib.i_1 <= value { 55 | value -= fib.i_1; 56 | stack.push(true); 57 | } else { 58 | stack.push(false); 59 | } 60 | 61 | fib.prev(); 62 | } 63 | 64 | while let Some(bit) = stack.pop() { 65 | try!(sink.write_bit(bit)); 66 | } 67 | 68 | Ok(()) 69 | } 70 | 71 | fn decode(&self, source: &mut R) -> Result> { 72 | let mut result = 0; 73 | let mut fib = Fib::new(); 74 | let mut previous = false; 75 | 76 | while let Some(bit) = try!(source.read_bit()) { 77 | if bit && previous { 78 | return Ok(Some(result)); 79 | } 80 | 81 | if bit { 82 | result += fib.i; 83 | } 84 | 85 | try!(fib.next()); 86 | previous = bit; 87 | } 88 | 89 | if result == 0 { 90 | Ok(None) 91 | } else { 92 | out_of_bits("Fibonacci::decode") 93 | } 94 | } 95 | } 96 | 97 | #[cfg(test)] 98 | mod test { 99 | use std::collections::VecDeque; 100 | use quickcheck::quickcheck; 101 | use coding::*; 102 | use coding::properties; 103 | 104 | #[test] 105 | fn enc234() { 106 | let mut dv = VecDeque::::new(); 107 | 108 | Fibonacci.encode(&mut dv, 2).unwrap(); 109 | Fibonacci.encode(&mut dv, 3).unwrap(); 110 | Fibonacci.encode(&mut dv, 4).unwrap(); 111 | 112 | assert_eq!(Some(2), Fibonacci.decode(&mut dv).unwrap()); 113 | assert_eq!(Some(3), Fibonacci.decode(&mut dv).unwrap()); 114 | assert_eq!(Some(4), Fibonacci.decode(&mut dv).unwrap()); 115 | assert_eq!(None::, Fibonacci.decode(&mut dv).unwrap()); 116 | } 117 | 118 | #[test] 119 | fn qc() { 120 | fn prop(v: Vec) -> bool { 121 | properties::code_decode(&Fibonacci, v) 122 | } 123 | 124 | quickcheck(prop as fn(Vec) -> bool); 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /src/coding/mod.rs: -------------------------------------------------------------------------------- 1 | //! Codes for data compression. 2 | //! 3 | //! These universal codes currently know how to encode to a `BitWrite` 4 | //! and decode from a `BitRead`. However, the code that would use them 5 | //! to implement compressed vectors and such isn’t written yet. 6 | 7 | mod traits; 8 | pub use self::traits::*; 9 | 10 | mod unary; 11 | pub use self::unary::*; 12 | 13 | mod elias; 14 | pub use self::elias::*; 15 | 16 | mod fib; 17 | pub use self::fib::*; 18 | 19 | mod comma; 20 | pub use self::comma::*; 21 | 22 | mod trans; 23 | pub use self::trans::*; 24 | 25 | #[cfg(test)] 26 | mod properties { 27 | use std::collections::VecDeque; 28 | use super::*; 29 | 30 | pub fn code_decode(code: &Code, vec: Vec) 31 | -> bool { 32 | let mut dv = VecDeque::::new(); 33 | for &i in &vec { 34 | code.encode(&mut dv, i + 1).unwrap(); 35 | } 36 | 37 | let mut vec2 = Vec::::new(); 38 | while let Ok(Some(i)) = code.decode(&mut dv) { 39 | vec2.push(i - 1) 40 | } 41 | 42 | vec2 == vec 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/coding/traits.rs: -------------------------------------------------------------------------------- 1 | pub use std::io::Result; 2 | 3 | use stream::*; 4 | 5 | /// A universal code lets us encode arbitrary sized integers in a 6 | /// self-delimiting code. 7 | pub trait UniversalCode { 8 | /// Writes `value` to `sink`. 9 | fn encode(&self, sink: &mut W, value: u64) -> Result<()>; 10 | 11 | /// Reads a value from `source`. 12 | /// 13 | /// `Ok(None)` indicates (benign) EOF. 14 | fn decode(&self, source: &mut R) -> Result>; 15 | 16 | // TODO: bigint support 17 | } 18 | -------------------------------------------------------------------------------- /src/coding/trans.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use internal::errors::*; 3 | use stream::*; 4 | 5 | /// Lifts any code by adding one to each encoded value, and subtracting 6 | /// one from each decoded value. 7 | /// 8 | /// This is useful when the underlying code, like Elias codes, can’t handle 0s. 9 | pub struct Lift0(pub Code); 10 | 11 | impl UniversalCode for Lift0 { 12 | fn encode(&self, sink: &mut W, value: u64) -> Result<()> { 13 | if let Some(value) = value.checked_add(1) { 14 | self.0.encode(sink, value) 15 | } else { 16 | too_many_bits("Lift0::encode") 17 | } 18 | } 19 | 20 | fn decode(&self, source: &mut R) -> Result> { 21 | match self.0.decode(source) { 22 | Ok(Some(n)) => Ok(Some(n - 1)), 23 | otherwise => otherwise, 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/coding/unary.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use internal::errors::*; 3 | use stream::*; 4 | 5 | /// Encodes _n_ as _n_ zeroes followed by a one. 6 | pub struct Unary; 7 | 8 | impl UniversalCode for Unary { 9 | fn encode(&self, sink: &mut W, mut value: u64) -> Result<()> { 10 | while value > 0 { 11 | try!(sink.write_bit(false)); 12 | value = value - 1; 13 | } 14 | 15 | try!(sink.write_bit(true)); 16 | 17 | Ok(()) 18 | } 19 | 20 | fn decode(&self, source: &mut R) -> Result> { 21 | let mut result = 0; 22 | let mut consumed = false; 23 | 24 | while let Some(bit) = try!(source.read_bit()) { 25 | if bit { return Ok(Some(result)); } 26 | // This can't overflow because it would require too many 27 | // unary digits to get there: 28 | result = result + 1; 29 | consumed = true; 30 | } 31 | 32 | if consumed { 33 | out_of_bits("Unary::decode") 34 | } else { 35 | Ok(None) 36 | } 37 | } 38 | } 39 | 40 | #[cfg(test)] 41 | mod test { 42 | use std::collections::VecDeque; 43 | use coding::*; 44 | 45 | #[test] 46 | fn test234() { 47 | let mut dv = VecDeque::::new(); 48 | 49 | Unary.encode(&mut dv, 2).unwrap(); 50 | Unary.encode(&mut dv, 3).unwrap(); 51 | Unary.encode(&mut dv, 4).unwrap(); 52 | 53 | assert_eq!(Some(2), Unary.decode(&mut dv).unwrap()); 54 | assert_eq!(Some(3), Unary.decode(&mut dv).unwrap()); 55 | assert_eq!(Some(4), Unary.decode(&mut dv).unwrap()); 56 | assert_eq!(None, Unary.decode(&mut dv).unwrap()); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/int_vec/int_vector.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | 3 | use super::*; 4 | use bit_vec::{BitVec, BitVecMut}; 5 | use internal::vector_base::{VectorBase, self}; 6 | use space_usage::SpaceUsage; 7 | use storage::BlockType; 8 | 9 | /// Uncompressed vector of *k*-bit unsigned integers. 10 | /// 11 | /// The element width *k* is determined at vector creation time. 12 | /// 13 | /// `Block` gives the representation type. The element width *k* can 14 | /// never exceed the number of bits in `Block`. 15 | #[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] 16 | pub struct IntVector { 17 | element_bits: usize, 18 | base: VectorBase, 19 | } 20 | 21 | impl IntVector { 22 | /// Asserts that `element_bits` is valid. 23 | fn check_element_bits(element_bits: usize) { 24 | assert!(element_bits != 0, 25 | "IntVector: cannot have zero-size elements"); 26 | assert!(element_bits <= Block::nbits(), 27 | "IntVector: element size cannot exceed block size"); 28 | } 29 | 30 | fn check_value_random(element_bits: usize, element_value: Block) { 31 | assert!(element_value <= Block::low_mask(element_bits), 32 | "IntVector: value to large for element size"); 33 | } 34 | 35 | fn check_value(&self, element_value: Block) { 36 | Self::check_value_random(self.element_bits, element_value); 37 | } 38 | 39 | /// Checks `element_bits` before assembling an `IntVector`. 40 | fn create(element_bits: usize, base: VectorBase) -> Self { 41 | Self::check_element_bits(element_bits); 42 | IntVector { 43 | element_bits: element_bits, 44 | base: base, 45 | } 46 | } 47 | 48 | #[inline] 49 | fn compute_address_random(&self, bit_offset: u64, element_bits: usize, 50 | element_index: u64) -> u64 { 51 | element_index 52 | .checked_mul(element_bits as u64) 53 | .and_then(|offset| offset.checked_add(bit_offset)) 54 | .expect("IntVector: index overflow") 55 | } 56 | 57 | #[inline] 58 | fn compute_address(&self, element_index: u64) -> u64 { 59 | element_index 60 | .checked_mul(self.element_bits as u64) 61 | .expect("IntVector: index overflow") 62 | } 63 | 64 | /// Creates a new integer vector. 65 | /// 66 | /// # Arguments 67 | /// 68 | /// - `element_bits` — the size of each element in bits; hence 69 | /// elements range from `0` to `2.pow(element_bits) - 1`. 70 | /// 71 | /// # Result 72 | /// 73 | /// The new, empty integer vector. 74 | pub fn new(element_bits: usize) -> Self { 75 | Self::create(element_bits, VectorBase::new()) 76 | } 77 | 78 | /// Creates a new, empty integer vector, allocating sufficient storage 79 | /// for `capacity` elements. 80 | pub fn with_capacity(element_bits: usize, capacity: u64) -> Self { 81 | Self::create(element_bits, 82 | VectorBase::with_capacity(element_bits, capacity)) 83 | } 84 | 85 | /// Creates a new, empty integer vector, allocating `block_capacity` 86 | /// blocks of storage. 87 | pub fn block_with_capacity(element_bits: usize, block_capacity: usize) 88 | -> Self { 89 | Self::create(element_bits, 90 | VectorBase::block_with_capacity(block_capacity)) 91 | } 92 | 93 | /// Creates a new integer vector containing `len` copies of `value`. 94 | pub fn with_fill(element_bits: usize, len: u64, value: Block) -> Self { 95 | Self::create(element_bits, 96 | VectorBase::with_fill(element_bits, len, value)) 97 | } 98 | 99 | /// Creates a new integer vector containing `block_len` copies of the 100 | /// block `value`. 101 | /// 102 | /// The length of the new vector will be the number of elements of size 103 | /// `element_bits` that fit in `block_len` blocks. 104 | pub fn block_with_fill(element_bits: usize, block_len: usize, 105 | value: Block) -> Self { 106 | Self::create(element_bits, 107 | VectorBase::block_with_fill(element_bits, 108 | block_len, 109 | value)) 110 | } 111 | 112 | /// Returns the element at a given index, also given an arbitrary 113 | /// element size and bit offset. 114 | /// 115 | /// This computes the location of the `element_index`th element 116 | /// supposing that elements are `element_bits` side, then adds 117 | /// `bit_offset` additional bits and returns the `element_bits`-bit 118 | /// value found at that location. 119 | /// 120 | /// # Panics 121 | /// 122 | /// Panics if the referenced bits are out of bounds. Bounds are 123 | /// considered to the end of the support array, even if that goes 124 | /// past the last element of the `IntArray`. 125 | pub fn get_random(&self, 126 | bit_offset: u64, 127 | element_bits: usize, 128 | element_index: u64) -> Block { 129 | let address = self.compute_address_random(bit_offset, 130 | element_bits, 131 | element_index); 132 | self.base.get_bits(self.element_bits, address, element_bits) 133 | } 134 | 135 | /// Sets the element at a given index to a given value, also given 136 | /// an arbitrary element size and bit offset. 137 | /// 138 | /// This computes the location of the `element_index`th element 139 | /// supposing that elements are `element_bits` side, then adds 140 | /// `bit_offset` additional bits and modifies the `element_bits`-bit 141 | /// value found at that location. 142 | /// 143 | /// # Panics 144 | /// 145 | /// - Panics if the referenced bits are out of bounds. Bounds are 146 | /// considered to the end of the support array, even if that goes 147 | /// past the last element of the `IntArray`. 148 | /// 149 | /// - Debug mode only: Panics if `element_value` is too large to 150 | /// fit in the element size. (TODO: What’s the right thing here?) 151 | pub fn set_random(&mut self, bit_offset: u64, element_bits: usize, 152 | element_index: u64, element_value: Block) { 153 | Self::check_value_random(element_bits, element_value); 154 | 155 | let address = self.compute_address_random(bit_offset, 156 | element_bits, 157 | element_index); 158 | self.base.set_bits(self.element_bits, address, 159 | element_bits, element_value); 160 | } 161 | 162 | /// Pushes an element onto the end of the vector, increasing the 163 | /// length by 1. 164 | pub fn push(&mut self, element_value: Block) { 165 | self.check_value(element_value); 166 | self.base.push_bits(self.element_bits, element_value); 167 | } 168 | 169 | /// Removes and returns the last element of the vector, if present. 170 | pub fn pop(&mut self) -> Option { 171 | self.base.pop_bits(self.element_bits) 172 | } 173 | 174 | /// The number of elements the vector can hold without reallocating. 175 | pub fn capacity(&self) -> u64 { 176 | self.base.capacity(self.element_bits) 177 | } 178 | 179 | /// The number of blocks of elements the vector can hold without 180 | /// reallocating. 181 | pub fn block_capacity(&self) -> usize { 182 | self.base.block_capacity() 183 | } 184 | 185 | /// Resizes to the given number of elements, filling if necessary. 186 | pub fn resize(&mut self, n_elements: u64, fill: Block) { 187 | self.base.resize(self.element_bits, n_elements, fill); 188 | } 189 | 190 | /// Resizes to the given number of blocks, filling if necessary. 191 | pub fn block_resize(&mut self, n_blocks: usize, fill: Block) { 192 | self.base.block_resize(self.element_bits, n_blocks, fill); 193 | } 194 | 195 | /// Reserves capacity for at least `additional` more elements to be 196 | /// inserted in the given `IntVector`. 197 | /// 198 | /// The collection may reserve more space to avoid frequent 199 | /// reallocations. 200 | /// 201 | /// # Panics 202 | /// 203 | /// Panics if the size conditions of 204 | /// [`IntVector::::is_okay_size()`](struct.IntVector.html#method.is_okay_size) 205 | /// are not met. This will happen if the total number of bits 206 | /// overflows `u64`. 207 | pub fn reserve(&mut self, additional: u64) { 208 | self.base.reserve(self.element_bits, additional); 209 | } 210 | 211 | /// Reserves capacity for at least `additional` blocks of values to be 212 | /// inserted. 213 | /// 214 | /// The collection may reserve more space to avoid frequent 215 | /// reallocations. 216 | /// 217 | /// # Panics 218 | /// 219 | /// Panics if the number of blocks overflows a `usize`. 220 | pub fn block_reserve(&mut self, additional: usize) { 221 | self.base.block_reserve(additional); 222 | } 223 | 224 | /// Reserves capacity for at least `additional` more elements to be 225 | /// inserted in the given `IntVector`. 226 | /// 227 | /// Unlike [`reserve`](#method.reserve), does nothing if the 228 | /// capacity is already sufficient. 229 | /// 230 | /// # Panics 231 | /// 232 | /// Panics if the size conditions of 233 | /// [`IntVector::::is_okay_size()`](struct.IntVector.html#method.is_okay_size) 234 | /// are not met. This will happen if the total number of bits 235 | /// overflows `u64`. 236 | pub fn reserve_exact(&mut self, additional: u64) { 237 | self.base.reserve_exact(self.element_bits, additional); 238 | } 239 | 240 | /// Reserves capacity for at least `additional` blocks of values to be 241 | /// inserted. 242 | /// 243 | /// Unlike [`reserve_block`](#method.reserve_block), does nothing if the 244 | /// capacity is already sufficient. 245 | /// 246 | /// The collection may reserve more space to avoid frequent 247 | /// reallocations. 248 | /// 249 | /// # Panics 250 | /// 251 | /// Panics if the number of blocks overflows a `usize`. 252 | pub fn block_reserve_exact(&mut self, additional: usize) { 253 | self.base.block_reserve_exact(additional); 254 | } 255 | 256 | /// Shrinks the capacity to just fit the number of elements. 257 | pub fn shrink_to_fit(&mut self) { 258 | self.base.shrink_to_fit(); 259 | } 260 | 261 | /// Shrinks to the given size. 262 | /// 263 | /// Does nothing if `n_elements` is greater than the current size. 264 | pub fn truncate(&mut self, n_elements: u64) { 265 | self.base.truncate(self.element_bits, n_elements); 266 | } 267 | 268 | /// Shrinks to the given number of blocks. 269 | /// 270 | /// Does nothing if `n_blocks` is greater than the current blocks. 271 | pub fn block_truncate(&mut self, n_blocks: usize) { 272 | self.base.block_truncate(self.element_bits, n_blocks); 273 | } 274 | 275 | /// Sets the size to 0 while retaining the allocated storage. 276 | pub fn clear(&mut self) { 277 | self.base.clear(); 278 | } 279 | 280 | /// Gets an iterator over the elements of the vector. 281 | pub fn iter(&self) -> Iter { 282 | Iter(vector_base::Iter::new(self.element_bits, &self.base)) 283 | } 284 | 285 | /// True if the element size matches the block size. 286 | #[inline] 287 | pub fn is_block_sized(&self) -> bool { 288 | self.element_bits() == Block::nbits() 289 | } 290 | 291 | /// True if elements are aligned within blocks. 292 | #[inline] 293 | pub fn is_aligned(&self) -> bool { 294 | Block::nbits() % self.element_bits() == 0 295 | } 296 | } 297 | 298 | impl IntVec for IntVector { 299 | type Block = Block; 300 | 301 | fn len(&self) -> u64 { 302 | self.base.len() 303 | } 304 | 305 | fn get(&self, element_index: u64) -> Block { 306 | if self.is_block_sized() { 307 | return self.base.get_block(element_index as usize); 308 | } 309 | 310 | let address = self.compute_address(element_index); 311 | self.base.get_bits(self.element_bits, address, self.element_bits) 312 | } 313 | 314 | fn element_bits(&self) -> usize { 315 | self.element_bits 316 | } 317 | } 318 | 319 | impl IntVecMut for IntVector { 320 | fn set(&mut self, element_index: u64, element_value: Block) { 321 | if self.is_block_sized() { 322 | self.base.set_block(self.element_bits, 323 | element_index as usize, 324 | element_value); 325 | return; 326 | } 327 | 328 | self.check_value(element_value); 329 | 330 | let address = self.compute_address(element_index); 331 | self.base.set_bits(self.element_bits, address, 332 | self.element_bits, element_value); 333 | } 334 | } 335 | 336 | impl BitVec for IntVector { 337 | type Block = Block; 338 | 339 | fn block_len(&self) -> usize { 340 | self.base.block_len() 341 | } 342 | 343 | fn bit_len(&self) -> u64 { 344 | self.element_bits as u64 * self.base.len() 345 | } 346 | 347 | fn get_block(&self, position: usize) -> Block { 348 | self.base.get_block(position) 349 | } 350 | } 351 | 352 | impl BitVecMut for IntVector { 353 | fn set_block(&mut self, position: usize, value: Block) { 354 | self.base.set_block(self.element_bits, position, value); 355 | } 356 | } 357 | 358 | /// An iterator over the elements of an [`IntVector`](struct.IntVector.html). 359 | #[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] 360 | pub struct Iter<'a, Block: BlockType + 'a = usize> 361 | (vector_base::Iter<'a, Block>); 362 | 363 | impl<'a, Block: BlockType> Iterator for Iter<'a, Block> { 364 | type Item = Block; 365 | 366 | fn next(&mut self) -> Option { 367 | self.0.next() 368 | } 369 | 370 | fn size_hint(&self) -> (usize, Option) { 371 | self.0.size_hint() 372 | } 373 | 374 | fn count(self) -> usize { 375 | self.0.count() 376 | } 377 | 378 | fn last(self) -> Option { 379 | self.0.last() 380 | } 381 | 382 | fn nth(&mut self, n: usize) -> Option { 383 | self.0.nth(n) 384 | } 385 | } 386 | 387 | #[cfg(target_pointer_width = "64")] 388 | impl<'a, Block: BlockType> ExactSizeIterator for Iter<'a, Block> { 389 | fn len(&self) -> usize { 390 | self.0.len() 391 | } 392 | } 393 | 394 | impl<'a, Block: BlockType> DoubleEndedIterator for Iter<'a, Block> { 395 | fn next_back(&mut self) -> Option { 396 | self.0.next_back() 397 | } 398 | } 399 | 400 | impl<'a, Block: BlockType + 'a> IntoIterator for &'a IntVector { 401 | type Item = Block; 402 | type IntoIter = Iter<'a, Block>; 403 | 404 | fn into_iter(self) -> Self::IntoIter { 405 | self.iter() 406 | } 407 | } 408 | 409 | impl fmt::Debug for IntVector 410 | where Block: BlockType + fmt::Debug { 411 | 412 | fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 413 | try!(write!(formatter, "IntVector {{ element_bits: {}, elements: {{ ", 414 | self.element_bits())); 415 | 416 | for element in self { 417 | try!(write!(formatter, "{:?}, ", element)); 418 | } 419 | 420 | write!(formatter, "}} }}") 421 | } 422 | } 423 | 424 | impl SpaceUsage for IntVector { 425 | #[inline] 426 | fn is_stack_only() -> bool { false } 427 | 428 | #[inline] 429 | fn heap_bytes(&self) -> usize { 430 | self.base.heap_bytes() 431 | } 432 | } 433 | 434 | #[cfg(test)] 435 | mod test { 436 | use int_vec::{IntVector, IntVec, IntVecMut}; 437 | use bit_vec::*; 438 | 439 | #[test] 440 | fn create_empty() { 441 | let v: IntVector = IntVector::new(4); 442 | assert!(v.is_empty()); 443 | } 444 | 445 | #[test] 446 | fn block_sized() { 447 | let mut v = IntVector::::with_fill(32, 10, 0); 448 | assert_eq!(10, v.len()); 449 | 450 | assert_eq!(0, v.get(0)); 451 | assert_eq!(0, v.get(9)); 452 | 453 | v.set(0, 89); 454 | assert_eq!(89, v.get(0)); 455 | assert_eq!(0, v.get(1)); 456 | 457 | v.set(0, 56); 458 | v.set(1, 34); 459 | assert_eq!(56, v.get(0)); 460 | assert_eq!(34, v.get(1)); 461 | assert_eq!(0, v.get(2)); 462 | 463 | v.set(9, 12); 464 | assert_eq!(12, v.get(9)); 465 | } 466 | 467 | #[test] 468 | #[should_panic] 469 | fn block_sized_oob() { 470 | let v = IntVector::::with_fill(32, 10, 0); 471 | assert_eq!(0, v.get(10)); 472 | } 473 | 474 | #[test] 475 | fn aligned() { 476 | let mut v = IntVector::::with_fill(4, 20, 0); 477 | assert_eq!(20, v.len()); 478 | 479 | assert_eq!(0, v.get(0)); 480 | assert_eq!(0, v.get(9)); 481 | 482 | v.set(0, 13); 483 | assert_eq!(13, v.get(0)); 484 | assert_eq!(0, v.get(1)); 485 | 486 | v.set(1, 15); 487 | assert_eq!(13, v.get(0)); 488 | assert_eq!(15, v.get(1)); 489 | assert_eq!(0, v.get(2)); 490 | 491 | v.set(1, 4); 492 | v.set(19, 9); 493 | assert_eq!(13, v.get(0)); 494 | assert_eq!(4, v.get(1)); 495 | assert_eq!(0, v.get(2)); 496 | assert_eq!(9, v.get(19)); 497 | } 498 | 499 | #[test] 500 | #[should_panic] 501 | fn aligned_oob() { 502 | let v = IntVector::::with_fill(4, 20, 0); 503 | assert_eq!(0, v.get(20)); 504 | } 505 | 506 | #[test] 507 | fn unaligned() { 508 | let mut v = IntVector::::with_fill(5, 20, 0); 509 | assert_eq!(20, v.len()); 510 | 511 | assert_eq!(0, v.get(0)); 512 | assert_eq!(0, v.get(9)); 513 | 514 | v.set(0, 13); 515 | assert_eq!(13, v.get(0)); 516 | assert_eq!(0, v.get(1)); 517 | 518 | v.set(1, 15); 519 | assert_eq!(13, v.get(0)); 520 | assert_eq!(15, v.get(1)); 521 | assert_eq!(0, v.get(2)); 522 | 523 | v.set(1, 4); 524 | v.set(19, 9); 525 | assert_eq!(13, v.get(0)); 526 | assert_eq!(4, v.get(1)); 527 | assert_eq!(0, v.get(2)); 528 | assert_eq!(9, v.get(19)); 529 | } 530 | 531 | #[test] 532 | #[should_panic] 533 | fn unaligned_oob() { 534 | let v = IntVector::::with_fill(5, 20, 0); 535 | assert_eq!(0, v.get(20)); 536 | } 537 | 538 | #[test] 539 | fn pop() { 540 | let mut v = IntVector::::new(7); 541 | assert_eq!(None, v.pop()); 542 | v.push(1); 543 | v.push(2); 544 | v.push(3); 545 | assert_eq!(Some(3), v.pop()); 546 | v.push(4); 547 | v.push(5); 548 | assert_eq!(Some(5), v.pop()); 549 | assert_eq!(Some(4), v.pop()); 550 | assert_eq!(Some(2), v.pop()); 551 | assert_eq!(Some(1), v.pop()); 552 | assert_eq!(None, v.pop()); 553 | } 554 | 555 | #[test] 556 | fn iter() { 557 | let mut v = IntVector::::new(13); 558 | v.push(1); 559 | v.push(1); 560 | v.push(2); 561 | v.push(3); 562 | v.push(5); 563 | 564 | assert_eq!(vec![1, 1, 2, 3, 5], v.iter().collect::>()); 565 | } 566 | 567 | #[test] 568 | fn debug() { 569 | let mut v = IntVector::::new(13); 570 | v.push(1); 571 | v.push(1); 572 | v.push(2); 573 | v.push(3); 574 | v.push(5); 575 | 576 | assert_eq!("IntVector { element_bits: 13, elements: { 1, 1, 2, 3, 5, } }".to_owned(), 577 | format!("{:?}", v)); 578 | } 579 | 580 | #[test] 581 | #[should_panic] 582 | fn value_overflow() { 583 | let mut v = IntVector::::new(3); 584 | v.push(78); // 78 is too big 585 | } 586 | 587 | #[test] 588 | fn bit_vec() { 589 | let mut v = IntVector::::new(1); 590 | v.push(1); 591 | v.push(0); 592 | v.push(0); 593 | v.push(1); 594 | 595 | assert!( v.get_bit(0)); 596 | assert!(! v.get_bit(1)); 597 | assert!(! v.get_bit(2)); 598 | assert!( v.get_bit(3)); 599 | 600 | v.set_bit(1, true); 601 | 602 | assert!( v.get_bit(0)); 603 | assert!( v.get_bit(1)); 604 | assert!(! v.get_bit(2)); 605 | assert!( v.get_bit(3)); 606 | } 607 | 608 | #[test] 609 | fn push_pop_equals() { 610 | let mut v = IntVector::::new(5); 611 | let mut u = IntVector::::new(5); 612 | 613 | v.push(5); 614 | u.push(5); 615 | assert!( v == u ); 616 | 617 | v.push(6); 618 | u.push(7); 619 | assert!( v != u ); 620 | 621 | v.pop(); 622 | u.pop(); 623 | assert!( v == u ); 624 | } 625 | 626 | #[test] 627 | fn block_size_elements_u16() { 628 | let mut v = IntVector::::new(16); 629 | v.push(0); 630 | v.push(!0); 631 | assert_eq!(Some(!0), v.pop()); 632 | assert_eq!(Some(0), v.pop()); 633 | assert_eq!(None, v.pop()); 634 | } 635 | 636 | #[test] 637 | fn block_size_elements_u64() { 638 | let mut v = IntVector::::new(64); 639 | v.push(0); 640 | v.push(!0); 641 | assert_eq!(Some(!0), v.pop()); 642 | assert_eq!(Some(0), v.pop()); 643 | assert_eq!(None, v.pop()); 644 | } 645 | } 646 | -------------------------------------------------------------------------------- /src/int_vec/mod.rs: -------------------------------------------------------------------------------- 1 | //! Vectors of *k*-bit unsigned integers. 2 | 3 | mod int_vector; 4 | pub use self::int_vector::*; 5 | 6 | mod traits; 7 | pub use self::traits::*; 8 | -------------------------------------------------------------------------------- /src/int_vec/traits.rs: -------------------------------------------------------------------------------- 1 | use storage::BlockType; 2 | 3 | /// An immutable array of integers of limited width. 4 | pub trait IntVec { 5 | /// The type of primitive value to represent elements. 6 | type Block: BlockType; 7 | 8 | /// The number of elements. 9 | fn len(&self) -> u64; 10 | 11 | /// Is the vector empty? 12 | #[inline] 13 | fn is_empty(&self) -> bool { 14 | self.len() == 0 15 | } 16 | 17 | /// The bit width of each element. 18 | fn element_bits(&self) -> usize; 19 | 20 | /// Fetches the value of the `index`th element. 21 | /// 22 | /// # Panics 23 | /// 24 | /// Panics if `index` is out of bounds. 25 | fn get(&self, index: u64) -> Self::Block; 26 | } 27 | 28 | /// A mutable array of integers of limited width. 29 | pub trait IntVecMut: IntVec { 30 | /// Updates the value of the `index`th element. 31 | /// 32 | /// # Panics 33 | /// 34 | /// - Panics if `index` is out of bounds. 35 | /// 36 | /// - May panic (?) if `element_value` is too large to 37 | /// fit in the element size. (TODO: What’s the right thing here?) 38 | fn set(&mut self, index: u64, value: Self::Block); 39 | } 40 | -------------------------------------------------------------------------------- /src/internal/errors.rs: -------------------------------------------------------------------------------- 1 | use std::io::{Error, ErrorKind, Result}; 2 | 3 | pub fn out_of_bits(who: &str) -> Result { 4 | Err(Error::new(ErrorKind::UnexpectedEof, 5 | format!("{}: could not decode: more bits expected", 6 | who))) 7 | } 8 | 9 | pub fn too_many_bits(who: &str) -> Result { 10 | Err(Error::new(ErrorKind::InvalidData, 11 | format!("{}: value too big for type", who))) 12 | } 13 | -------------------------------------------------------------------------------- /src/internal/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod errors; 2 | pub mod search; 3 | pub mod vector_base; 4 | -------------------------------------------------------------------------------- /src/internal/search.rs: -------------------------------------------------------------------------------- 1 | use num_traits::PrimInt; 2 | 3 | /// Averages two `PrimInt`s without overflowing. 4 | pub fn average(x: P, y: P) -> P { 5 | let almost_average = (x >> 1) + (y >> 1); 6 | let extra_bit = (x & P::one()) + (y & P::one()) >> 1; 7 | 8 | almost_average + extra_bit 9 | } 10 | 11 | /// Finds the smallest `d: D` in the interval `start .. limit` such 12 | /// that `f(d) >= value`; requires that `f` be monotonically 13 | /// non-decreasing. 14 | /// 15 | /// Does not call `f` on `D`s outside the specified interval. 16 | pub fn binary_search_function( 17 | mut start: D, mut limit: D, value: R, f: F) -> Option 18 | where D: PrimInt, 19 | R: Ord, 20 | F: Fn(D) -> R { 21 | 22 | if start >= limit { return None; } 23 | if f(start) >= value { return Some(start); } 24 | 25 | // Now we know the answer isn't `start`, which means for every 26 | // candidate `mid`, `mid - 1` will still be in the domain of `f`. 27 | start = start + D::one(); 28 | 29 | while start < limit { 30 | let mid = average(start, limit); 31 | 32 | if f(mid) >= value { 33 | if f(mid - D::one()) < value { 34 | return Some(mid); 35 | } else { 36 | limit = mid; 37 | } 38 | } else { 39 | start = mid + D::one(); 40 | } 41 | } 42 | 43 | None 44 | } 45 | 46 | 47 | #[cfg(test)] 48 | mod test { 49 | use super::*; 50 | 51 | #[test] 52 | fn avg_2_4() { 53 | assert_eq!(3, average(2, 4)); 54 | } 55 | 56 | #[test] 57 | fn avg_2_5() { 58 | assert_eq!(3, average(2, 5)); 59 | } 60 | 61 | #[test] 62 | fn avg_3_4() { 63 | assert_eq!(3, average(3, 4)); 64 | } 65 | 66 | #[test] 67 | fn avg_3_5() { 68 | assert_eq!(4, average(3, 5)); 69 | } 70 | 71 | #[test] 72 | fn avg_big() { 73 | let big: usize = !0; 74 | assert_eq!(big - 1, average(big, big - 1)); 75 | assert_eq!(big - 1, average(big, big - 2)); 76 | assert_eq!(big - 1, average(big - 1, big - 1)); 77 | assert_eq!(big - 2, average(big - 2, big - 1)); 78 | assert_eq!(big - 2, average(big - 2, big - 2)); 79 | assert_eq!(big - 2, average(big - 1, big - 3)); 80 | } 81 | 82 | fn search_slice(value: usize, slice: &[usize]) 83 | -> Option { 84 | binary_search_function(0, slice.len(), value, |index| slice[index]) 85 | } 86 | 87 | const MAX_LEN: usize = 32; 88 | 89 | #[test] 90 | fn binary_search_01() { 91 | let mut vec = Vec::::with_capacity(MAX_LEN); 92 | 93 | for len in 0 .. MAX_LEN + 1 { 94 | for result in 0 .. len { 95 | vec.clear(); 96 | for _ in 0 .. result { vec.push(0); } 97 | for _ in result .. len { vec.push(1); } 98 | assert_eq!(Some(result), search_slice(1, &vec)); 99 | } 100 | 101 | vec.clear(); 102 | for _ in 0 .. len { vec.push(0) } 103 | assert_eq!(None, search_slice(1, &vec)); 104 | } 105 | } 106 | 107 | #[test] 108 | fn binary_search_02() { 109 | let mut vec = Vec::::with_capacity(MAX_LEN); 110 | 111 | for len in 0 .. MAX_LEN + 1 { 112 | for result in 0 .. len { 113 | vec.clear(); 114 | for _ in 0 .. result { vec.push(0); } 115 | for _ in result .. len { vec.push(2); } 116 | assert_eq!(Some(result), search_slice(1, &vec)); 117 | } 118 | 119 | vec.clear(); 120 | for _ in 0 .. len { vec.push(0) } 121 | assert_eq!(None, search_slice(1, &vec)); 122 | } 123 | } 124 | 125 | #[test] 126 | fn binary_search_iota() { 127 | let mut vec = Vec::::with_capacity(MAX_LEN); 128 | 129 | for len in 0 .. MAX_LEN + 1 { 130 | vec.clear(); 131 | for i in 0 .. len { vec.push(i); } 132 | 133 | for i in 0 .. len { 134 | assert_eq!(Some(i), search_slice(i, &vec)); 135 | } 136 | 137 | assert_eq!(None, search_slice(len, &vec)); 138 | } 139 | } 140 | 141 | // #[test] 142 | // fn binary_search_2iota() { 143 | // let mut vec = Vec::::with_capacity(MAX_LEN); 144 | 145 | // for len in 0 .. MAX_LEN + 1 { 146 | // vec.clear(); 147 | // for i in 0 .. len { vec.push(2 * i); } 148 | 149 | // // assert_eq!(Some(0), search_slice(0, &vec)); 150 | // for i in 1 .. len { 151 | // assert_eq!(Some(i), search_slice(2 * i, &vec)); 152 | // assert_eq!(Some(i), search_slice(2 * i - 1, &vec)); 153 | // } 154 | // } 155 | // } 156 | } 157 | -------------------------------------------------------------------------------- /src/internal/vector_base.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | 3 | #[cfg(target_pointer_width = "32")] 4 | use num_traits::ToPrimitive; 5 | 6 | use bit_vec::{BitVec, BitVecMut}; 7 | use space_usage::SpaceUsage; 8 | use storage::BlockType; 9 | 10 | /// VectorBase provides basic functionality for IntVector and BitVector. It 11 | /// doesn’t know its element size, but it does know (once provided its 12 | /// element size) how to maintain the invariants: 13 | /// 14 | /// 1. All blocks are in use storing elements. 15 | /// 2. Any bits not in use are zero. 16 | /// 17 | /// These two properties are what make it safe to use derived 18 | /// implementations of Eq, Ord, Hash, etc. 19 | /// 20 | /// Many `VectorBase` methods take `element_bits` as a parameter. For methods 21 | /// that create a vector, `element_bits` is checked for overflow. For other methods, 22 | /// it is assumed to have already been checked, so the client must ensure that it 23 | /// doesn’t pass bogus `element_bits` values. 24 | #[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] 25 | pub struct VectorBase { 26 | len: u64, 27 | vec: Vec, 28 | } 29 | 30 | #[inline] 31 | fn len_to_block_len(element_bits: usize, len: u64) -> Option { 32 | len.checked_mul(element_bits as u64) 33 | .and_then(Block::checked_ceil_div_nbits) 34 | } 35 | 36 | impl VectorBase { 37 | // Maintains the second invariant: extra bits are zero. 38 | #[inline] 39 | fn clear_extra_bits(&mut self, element_bits: usize) { 40 | let bit_len = self.len * element_bits as u64; 41 | self.vec.last_mut().map(|block| { 42 | let mask = Block::low_mask(Block::last_block_bits(bit_len)); 43 | *block = *block & mask; 44 | }); 45 | } 46 | 47 | // Sets the length based on the number of blocks in the underlying Vec. 48 | #[inline] 49 | fn set_len_from_blocks(&mut self, element_bits: usize) { 50 | self.len = Block::mul_nbits(self.vec.len()) / element_bits as u64; 51 | self.clear_extra_bits(element_bits); 52 | } 53 | 54 | #[inline] 55 | pub fn new() -> Self { 56 | Self::block_with_capacity(0) 57 | } 58 | 59 | #[inline] 60 | pub fn block_with_capacity(block_capacity: usize) -> Self { 61 | VectorBase { 62 | len: 0, 63 | vec: Vec::with_capacity(block_capacity) 64 | } 65 | } 66 | 67 | #[inline] 68 | pub fn with_capacity(element_bits: usize, capacity: u64) -> Self { 69 | Self::block_with_capacity( 70 | len_to_block_len::(element_bits, capacity) 71 | .expect("VectorBase::with_capacity: overflow")) 72 | } 73 | 74 | #[inline] 75 | pub fn block_with_fill(element_bits: usize, block_len: usize, fill: Block) 76 | -> Self { 77 | let mut result = VectorBase { 78 | len: 0, 79 | vec: vec![ fill; block_len ], 80 | }; 81 | 82 | result.set_len_from_blocks(element_bits); 83 | result 84 | } 85 | 86 | #[inline] 87 | pub fn with_fill(element_bits: usize, len: u64, value: Block) -> Self { 88 | let block_len = len_to_block_len::(element_bits, len) 89 | .expect("VectorBase::with_fill: overflow"); 90 | let mut result = VectorBase { 91 | len: len, 92 | vec: vec![ Block::zero(); block_len ], 93 | }; 94 | 95 | for i in 0 .. len { 96 | result.set_bits(element_bits, i * element_bits as u64, 97 | element_bits, value); 98 | } 99 | 100 | result 101 | } 102 | 103 | #[inline] 104 | pub fn get_block(&self, block_index: usize) -> Block { 105 | self.vec[block_index] 106 | } 107 | 108 | #[inline] 109 | pub fn set_block(&mut self, element_bits: usize, 110 | block_index: usize, value: Block) { 111 | self.vec[block_index] = value; 112 | if block_index + 1 == self.vec.len() { 113 | self.clear_extra_bits(element_bits); 114 | } 115 | } 116 | 117 | #[inline] 118 | pub fn get_bits(&self, element_bits: usize, index: u64, count: usize) 119 | -> Block { 120 | // If element_bits is legit then the RHS of the comparison can't overflow. 121 | assert!(index + count as u64 <= self.len * element_bits as u64, 122 | "VectorBase::get_bits: out of bounds"); 123 | self.vec.get_bits(index, count) 124 | } 125 | 126 | #[inline] 127 | pub fn set_bits(&mut self, element_bits: usize, index: u64, 128 | count: usize, value: Block) { 129 | // If element_bits is legit then the RHS of the comparison can't overflow. 130 | assert!(index + count as u64 <= self.len * element_bits as u64, 131 | "VectorBase::set_bits: out of bounds"); 132 | self.vec.set_bits(index, count, value); 133 | } 134 | 135 | // PRECONDITION: element_bits == 1 136 | #[inline] 137 | pub fn get_bit(&self, index: u64) -> bool { 138 | assert!(index < self.len, "VectorBase::get_bit: out of bounds"); 139 | self.vec.get_bit(index) 140 | } 141 | 142 | // PRECONDITION: element_bits == 1 143 | #[inline] 144 | pub fn set_bit(&mut self, index: u64, value: bool) { 145 | assert!(index < self.len, "VectorBase::set_bit: out of bounds"); 146 | self.vec.set_bit(index, value); 147 | } 148 | 149 | #[inline] 150 | pub fn push_block(&mut self, element_bits: usize, value: Block) { 151 | self.vec.push(value); 152 | self.set_len_from_blocks(element_bits); 153 | } 154 | 155 | #[inline] 156 | pub fn pop_block(&mut self, element_bits: usize) -> Option { 157 | let result = self.vec.pop(); 158 | self.set_len_from_blocks(element_bits); 159 | result 160 | } 161 | 162 | #[inline] 163 | pub fn push_bits(&mut self, element_bits: usize, value: Block) { 164 | if element_bits as u64 * (self.len + 1) > Block::mul_nbits(self.vec.len()) { 165 | self.vec.push(Block::zero()); 166 | } 167 | 168 | let pos = self.len; 169 | self.len = pos + 1; 170 | self.set_bits(element_bits, pos as u64 * element_bits as u64, 171 | element_bits, value); 172 | } 173 | 174 | #[inline] 175 | pub fn pop_bits(&mut self, element_bits: usize) -> Option { 176 | if self.len == 0 { return None; } 177 | 178 | let bit_len = element_bits as u64 * (self.len - 1); 179 | let block_len = Block::ceil_div_nbits(bit_len); 180 | 181 | let result = self.get_bits(element_bits, bit_len, element_bits); 182 | self.set_bits(element_bits, bit_len, element_bits, Block::zero()); 183 | self.len -= 1; 184 | 185 | if self.vec.len() > block_len { self.vec.pop(); } 186 | 187 | Some(result) 188 | } 189 | 190 | // PRECONDITION: element_bits == 1 191 | #[inline] 192 | pub fn push_bit(&mut self, value: bool) { 193 | if self.len + 1 > Block::mul_nbits(self.vec.len()) { 194 | self.vec.push(Block::zero()); 195 | } 196 | 197 | let pos = self.len; 198 | self.len = pos + 1; 199 | self.set_bit(pos, value); 200 | } 201 | 202 | #[inline] 203 | pub fn pop_bit(&mut self) -> Option { 204 | if self.len == 0 { return None; } 205 | 206 | let new_len = self.len - 1; 207 | let result = self.get_bit(new_len); 208 | self.set_bit(new_len, false); 209 | self.len = new_len; 210 | 211 | let block_len = Block::ceil_div_nbits(new_len); 212 | if self.vec.len() > block_len { self.vec.pop(); } 213 | 214 | Some(result) 215 | } 216 | 217 | #[inline] 218 | pub fn block_len(&self) -> usize { 219 | self.vec.len() 220 | } 221 | 222 | #[inline] 223 | pub fn len(&self) -> u64 { 224 | self.len 225 | } 226 | 227 | #[inline] 228 | pub fn is_empty(&self) -> bool { 229 | self.vec.is_empty() 230 | } 231 | 232 | #[inline] 233 | pub fn block_capacity(&self) -> usize { 234 | self.vec.capacity() 235 | } 236 | 237 | #[inline] 238 | pub fn capacity(&self, element_bits: usize) -> u64 { 239 | Block::mul_nbits(self.block_capacity()) / element_bits as u64 240 | } 241 | 242 | #[inline] 243 | pub fn block_truncate(&mut self, element_bits: usize, block_len: usize) { 244 | if block_len < self.vec.len() { 245 | self.vec.truncate(block_len); 246 | self.set_len_from_blocks(element_bits); 247 | } 248 | } 249 | 250 | #[inline] 251 | pub fn truncate(&mut self, element_bits: usize, len: u64) { 252 | if len < self.len { 253 | let block_len = Block::ceil_div_nbits(len * element_bits as u64); 254 | self.vec.truncate(block_len); 255 | self.len = len; 256 | self.clear_extra_bits(element_bits); 257 | } 258 | } 259 | 260 | #[inline] 261 | pub fn clear(&mut self) { 262 | self.vec.clear(); 263 | self.len = 0; 264 | } 265 | 266 | #[inline] 267 | pub fn shrink_to_fit(&mut self) { 268 | self.vec.shrink_to_fit() 269 | } 270 | 271 | #[inline] 272 | pub fn block_reserve(&mut self, additional: usize) { 273 | self.vec.reserve(additional); 274 | } 275 | 276 | #[inline] 277 | pub fn block_reserve_exact(&mut self, additional: usize) { 278 | self.vec.reserve_exact(additional); 279 | } 280 | 281 | fn additional_blocks(&self, element_bits: usize, additional: u64) 282 | -> usize { 283 | self.len.checked_add(additional) 284 | .and_then(|e| e.checked_mul(element_bits as u64)) 285 | .and_then(Block::checked_ceil_div_nbits) 286 | .expect("VectorBase::reserve_(exact): overflow") 287 | .saturating_sub(self.vec.capacity()) 288 | } 289 | 290 | #[inline] 291 | pub fn reserve(&mut self, element_bits: usize, additional: u64) { 292 | let difference = self.additional_blocks(element_bits, additional); 293 | self.block_reserve(difference); 294 | } 295 | 296 | #[inline] 297 | pub fn reserve_exact(&mut self, element_bits: usize, additional: u64) { 298 | let difference = self.additional_blocks(element_bits, additional); 299 | self.block_reserve_exact(difference); 300 | } 301 | 302 | #[inline] 303 | pub fn block_resize(&mut self, element_bits: usize, 304 | block_len: usize, fill: Block) { 305 | self.vec.resize(block_len, fill); 306 | self.set_len_from_blocks(element_bits); 307 | } 308 | 309 | #[inline] 310 | pub fn resize(&mut self, element_bits: usize, len: u64, fill: Block) { 311 | let block_len = len_to_block_len::(element_bits, len) 312 | .expect("VectorBase::resize: overflow"); 313 | 314 | self.vec.resize(block_len, Block::zero()); 315 | let old_len = self.len; 316 | self.len = len; 317 | 318 | if len <= old_len { 319 | self.clear_extra_bits(element_bits); 320 | } else { 321 | for i in old_len .. len { 322 | self.set_bits(element_bits, i * element_bits as u64, 323 | element_bits, fill); 324 | } 325 | } 326 | } 327 | } 328 | 329 | #[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] 330 | pub struct Iter<'a, Block: BlockType + 'a> { 331 | start: u64, 332 | limit: u64, 333 | element_bits: usize, 334 | data: &'a VectorBase, 335 | } 336 | 337 | impl<'a, Block: BlockType> Iter<'a, Block> { 338 | #[inline] 339 | pub fn new(element_bits: usize, data: &'a VectorBase) -> Self { 340 | Iter { 341 | start: 0, 342 | limit: data.len(), 343 | element_bits: element_bits, 344 | data: data, 345 | } 346 | } 347 | } 348 | 349 | impl<'a, Block: BlockType> Iterator for Iter<'a, Block> { 350 | type Item = Block; 351 | 352 | #[inline] 353 | fn next(&mut self) -> Option { 354 | if self.start < self.limit { 355 | let result = self.data.get_bits( 356 | self.element_bits, 357 | self.element_bits as u64 * self.start, 358 | self.element_bits); 359 | self.start += 1; 360 | Some(result) 361 | } else { None } 362 | } 363 | 364 | #[cfg(target_pointer_width = "32")] 365 | #[inline] 366 | fn size_hint(&self) -> (usize, Option) { 367 | if let Some(len) = (self.limit - self.start).to_usize() { 368 | (len, Some(len)) 369 | } else { 370 | (0, None) 371 | } 372 | } 373 | 374 | #[cfg(target_pointer_width = "64")] 375 | #[inline] 376 | fn size_hint(&self) -> (usize, Option) { 377 | let len = self.len(); 378 | (len, Some(len)) 379 | } 380 | 381 | #[inline] 382 | fn count(self) -> usize { 383 | self.len() 384 | } 385 | 386 | #[inline] 387 | fn last(mut self) -> Option { 388 | self.next_back() 389 | } 390 | 391 | #[inline] 392 | fn nth(&mut self, n: usize) -> Option { 393 | self.start = self.start.checked_add(n as u64).unwrap_or(self.limit); 394 | self.next() 395 | } 396 | } 397 | 398 | #[cfg(target_pointer_width = "64")] 399 | impl<'a, Block: BlockType> ExactSizeIterator for Iter<'a, Block> { 400 | #[inline] 401 | fn len(&self) -> usize { 402 | (self.limit - self.start) as usize 403 | } 404 | } 405 | 406 | #[cfg(target_pointer_width = "32")] 407 | impl<'a, Block: BlockType> ExactSizeIterator for Iter<'a, Block> { 408 | #[inline] 409 | fn len(&self) -> usize { 410 | (self.limit - self.start) as usize 411 | } 412 | } 413 | 414 | impl<'a, Block: BlockType> DoubleEndedIterator for Iter<'a, Block> { 415 | #[inline] 416 | fn next_back(&mut self) -> Option { 417 | if self.start < self.limit { 418 | self.limit -= 1; 419 | Some(self.data.get_bits( 420 | self.element_bits, 421 | self.element_bits as u64 * self.limit, 422 | self.element_bits)) 423 | } else { None } 424 | } 425 | } 426 | 427 | impl SpaceUsage for VectorBase { 428 | #[inline] 429 | fn is_stack_only() -> bool { false } 430 | 431 | #[inline] 432 | fn heap_bytes(&self) -> usize { 433 | self.vec.heap_bytes() 434 | } 435 | } 436 | 437 | #[cfg(test)] 438 | mod test { 439 | use super::*; 440 | 441 | type VB = VectorBase; 442 | 443 | #[test] 444 | fn new() { 445 | let v = VB::new(); 446 | assert_eq!(0, v.len()); 447 | assert_eq!(0, v.block_len()); 448 | assert_eq!(0, v.capacity(5)); 449 | assert_eq!(0, v.block_capacity()); 450 | } 451 | 452 | #[test] 453 | fn block_with_capacity() { 454 | let v = VB::block_with_capacity(7); 455 | assert_eq!(0, v.len()); 456 | assert_eq!(0, v.block_len()); 457 | assert_eq!(7, v.capacity(8)); 458 | assert_eq!(14, v.capacity(4)); 459 | assert_eq!(11, v.capacity(5)); 460 | assert_eq!(7, v.block_capacity()); 461 | } 462 | 463 | #[test] 464 | fn with_capacity() { 465 | let v = VB::with_capacity(5, 5); 466 | assert_eq!(0, v.len()); 467 | assert_eq!(0, v.block_len()); 468 | assert_eq!(6, v.capacity(5)); 469 | assert_eq!(32, v.capacity(1)); 470 | assert_eq!(4, v.block_capacity()); 471 | } 472 | 473 | #[test] 474 | fn block_with_fill() { 475 | let v = VB::block_with_fill(5, 3, 0b01010101); 476 | assert_eq!(3, v.block_len()); 477 | assert_eq!(4, v.len()); 478 | assert_eq!(3, v.block_capacity()); 479 | assert_eq!(4, v.capacity(5)); 480 | 481 | assert_eq!(true, v.get_bit(0)); 482 | assert_eq!(false, v.get_bit(1)); 483 | assert_eq!(true, v.get_bit(2)); 484 | assert_eq!(false, v.get_bit(3)); 485 | 486 | assert_eq!(0b01010101, v.get_block(0)); 487 | assert_eq!(0b01010101, v.get_block(1)); 488 | assert_eq!(0b00000101, v.get_block(2)); 489 | 490 | assert_eq!(0b10101, v.get_bits(5, 0, 5)); 491 | assert_eq!(0b01010, v.get_bits(5, 1, 5)); 492 | assert_eq!(0b10101, v.get_bits(5, 2, 5)); 493 | assert_eq!(0b01010, v.get_bits(5, 3, 5)); 494 | assert_eq!(0b10101, v.get_bits(5, 4, 5)); 495 | assert_eq!(0b01010, v.get_bits(5, 5, 5)); 496 | } 497 | 498 | #[test] 499 | fn block_with_fill_7() { 500 | let v = VB::block_with_fill(7, 3, 0b01010101); 501 | assert_eq!(0b01010101, v.get_block(0)); 502 | assert_eq!(0b01010101, v.get_block(1)); 503 | assert_eq!(0b00010101, v.get_block(2)); 504 | } 505 | 506 | #[test] 507 | fn with_fill() { 508 | let mut v = VB::with_fill(5, 5, 0b10110); 509 | assert_eq!(5, v.len()); 510 | assert_eq!(4, v.block_len()); 511 | for _ in 0 .. 5 { 512 | assert_eq!(Some(0b10110), v.pop_bits(5)); 513 | } 514 | assert_eq!(0, v.len()); 515 | } 516 | 517 | #[test] 518 | fn set_block_5() { 519 | let mut v = VB::block_with_fill(5, 3, 0b01010101); 520 | assert_eq!(0b01010101, v.get_block(0)); 521 | assert_eq!(0b01010101, v.get_block(1)); 522 | assert_eq!(0b00000101, v.get_block(2)); 523 | 524 | v.set_block(5, 2, 0b11111111); 525 | assert_eq!(0b00001111, v.get_block(2)); 526 | } 527 | 528 | #[test] 529 | fn get_bits() { 530 | let v = VB::block_with_fill(5, 5, 0b01010101); 531 | assert_eq!(0b10101, v.get_bits(5, 0, 5)); 532 | assert_eq!(0b101, v.get_bits(5, 0, 3)); 533 | assert_eq!(0b010101, v.get_bits(5, 6, 6)); 534 | } 535 | 536 | #[test] 537 | fn set_bits() { 538 | let mut v = VB::block_with_fill(5, 10, 0); 539 | assert_eq!(0, v.get_bits(5, 0, 5)); 540 | assert_eq!(0, v.get_bits(5, 5, 5)); 541 | assert_eq!(0, v.get_bits(5, 10, 5)); 542 | 543 | v.set_bits(5, 0, 5, 17); 544 | v.set_bits(5, 5, 5, 2); 545 | v.set_bits(5, 10, 5, 8); 546 | 547 | assert_eq!(17, v.get_bits(5, 0, 5)); 548 | assert_eq!( 2, v.get_bits(5, 5, 5)); 549 | assert_eq!( 8, v.get_bits(5, 10, 5)); 550 | } 551 | 552 | #[test] 553 | fn set_bit() { 554 | let mut v = VB::block_with_fill(1, 2, 0); 555 | assert_eq!(16, v.len()); 556 | 557 | assert_eq!(false, v.get_bit(0)); 558 | assert_eq!(false, v.get_bit(1)); 559 | assert_eq!(false, v.get_bit(2)); 560 | assert_eq!(false, v.get_bit(3)); 561 | assert_eq!(false, v.get_bit(4)); 562 | assert_eq!(false, v.get_bit(5)); 563 | 564 | v.set_bit(1, true); 565 | v.set_bit(2, true); 566 | v.set_bit(5, true); 567 | 568 | assert_eq!(false, v.get_bit(0)); 569 | assert_eq!(true, v.get_bit(1)); 570 | assert_eq!(true, v.get_bit(2)); 571 | assert_eq!(false, v.get_bit(3)); 572 | assert_eq!(false, v.get_bit(4)); 573 | assert_eq!(true, v.get_bit(5)); 574 | } 575 | 576 | #[test] 577 | fn push_block() { 578 | let mut v = VB::new(); 579 | v.push_block(6, 0b11111111); 580 | assert_eq!(0b00111111, v.get_block(0)); 581 | assert_eq!(1, v.len()); 582 | 583 | v.push_block(6, 0b11111111); 584 | assert_eq!(0b00001111, v.get_block(1)); 585 | assert_eq!(2, v.len()); 586 | 587 | v.push_block(6, 0b11111111); 588 | assert_eq!(0b11111111, v.get_block(2)); 589 | assert_eq!(4, v.len()); 590 | } 591 | 592 | #[test] 593 | fn pop_block_after_push() { 594 | let mut v = VB::new(); 595 | v.push_block(6, 0b11111111); 596 | v.push_block(6, 0b11111111); 597 | v.push_block(6, 0b11111111); 598 | assert_eq!(Some(0b11111111), v.pop_block(6)); 599 | assert_eq!(Some(0b00001111), v.pop_block(6)); 600 | assert_eq!(Some(0b00111111), v.pop_block(6)); 601 | assert_eq!(None, v.pop_block(6)); 602 | } 603 | 604 | #[test] 605 | fn pop_block_after_fill() { 606 | let mut v = VB::block_with_fill(6, 3, 0b11111111); 607 | assert_eq!(0b11111111, v.get_block(0)); 608 | assert_eq!(0b11111111, v.get_block(1)); 609 | assert_eq!(0b11111111, v.get_block(2)); 610 | assert_eq!(Some(0b11111111), v.pop_block(6)); 611 | assert_eq!(Some(0b00001111), v.pop_block(6)); 612 | assert_eq!(Some(0b00111111), v.pop_block(6)); 613 | assert_eq!(None, v.pop_block(6)); 614 | } 615 | 616 | #[test] 617 | fn push_bits() { 618 | let mut v = VB::new(); 619 | v.push_bits(6, 0b100110); 620 | v.push_bits(6, 0b010100); 621 | v.push_bits(6, 0b001111); 622 | 623 | assert_eq!(0b00100110, v.get_block(0)); 624 | assert_eq!(0b11110101, v.get_block(1)); 625 | assert_eq!(0b00000000, v.get_block(2)); 626 | } 627 | 628 | #[test] 629 | fn pop_bits() { 630 | let mut v = VB::new(); 631 | v.push_bits(6, 0b100110); 632 | v.push_bits(6, 0b010100); 633 | v.push_bits(6, 0b001111); 634 | 635 | assert_eq!(Some(0b001111), v.pop_bits(6)); 636 | assert_eq!(0b00000101, v.get_block(1)); 637 | assert_eq!(Some(0b010100), v.pop_bits(6)); 638 | assert_eq!(0b00100110, v.get_block(0)); 639 | assert_eq!(Some(0b100110), v.pop_bits(6)); 640 | assert_eq!(None, v.pop_bits(6)); 641 | } 642 | 643 | #[test] 644 | fn push_bit() { 645 | let mut v = VB::new(); 646 | 647 | v.push_bit(false); 648 | v.push_bit(false); 649 | v.push_bit(true); 650 | assert_eq!(3, v.len()); 651 | assert_eq!(1, v.block_len()); 652 | v.push_bit(false); 653 | v.push_bit(true); 654 | v.push_bit(true); 655 | assert_eq!(0b00110100, v.get_block(0)); 656 | 657 | v.push_bit(true); 658 | v.push_bit(false); 659 | assert_eq!(8, v.len()); 660 | assert_eq!(1, v.block_len()); 661 | v.push_bit(true); 662 | assert_eq!(9, v.len()); 663 | assert_eq!(2, v.block_len()); 664 | assert_eq!(0b01110100, v.get_block(0)); 665 | assert_eq!(0b00000001, v.get_block(1)); 666 | } 667 | 668 | #[test] 669 | fn pop_bit() { 670 | let mut v = VB::block_with_fill(1, 2, 0b01010101); 671 | 672 | assert_eq!(2, v.block_len()); 673 | assert_eq!(16, v.len()); 674 | 675 | for _ in 0 .. 8 { 676 | assert_eq!(Some(false), v.pop_bit()); 677 | assert_eq!(Some(true), v.pop_bit()); 678 | } 679 | 680 | assert_eq!(None, v.pop_bit()); 681 | 682 | assert_eq!(0, v.block_len()); 683 | assert_eq!(0, v.len()); 684 | } 685 | 686 | #[test] 687 | fn block_truncate() { 688 | let mut v = VB::new(); 689 | v.push_bits(5, 17); 690 | v.push_bits(5, 30); 691 | v.push_bits(5, 4); 692 | assert_eq!(3, v.len()); 693 | assert_eq!(2, v.block_len()); 694 | 695 | v.block_truncate(5, 1); 696 | assert_eq!(1, v.len()); 697 | assert_eq!(1, v.block_len()); 698 | assert_eq!(Some(17), v.pop_bits(5)); 699 | } 700 | 701 | #[test] 702 | fn truncate() { 703 | let mut v = VB::new(); 704 | v.push_bits(5, 0b10001); 705 | v.push_bits(5, 0b11110); 706 | v.push_bits(5, 0b00100); 707 | 708 | v.truncate(5, 2); 709 | assert_eq!(2, v.len()); 710 | assert_eq!(2, v.block_len()); 711 | assert_eq!(0b10001, v.get_bits(5, 0, 5)); 712 | assert_eq!(0b11110, v.get_bits(5, 5, 5)); 713 | assert_eq!(0b11010001, v.get_block(0)); 714 | assert_eq!(0b00000011, v.get_block(1)); 715 | 716 | v.truncate(5, 1); 717 | assert_eq!(1, v.len()); 718 | assert_eq!(1, v.block_len()); 719 | assert_eq!(0b10001, v.get_bits(5, 0, 5)); 720 | assert_eq!(0b00010001, v.get_block(0)); 721 | 722 | v.truncate(5, 2); 723 | } 724 | 725 | #[test] 726 | fn shrink_to_fit() { 727 | let mut v = VB::new(); 728 | for i in 0 .. 5 { 729 | v.push_bits(5, i); 730 | } 731 | v.shrink_to_fit(); 732 | assert_eq!(4, v.block_capacity()); 733 | } 734 | 735 | #[test] 736 | fn block_resize() { 737 | let mut v = VB::new(); 738 | v.push_bits(5, 0b11010); 739 | v.block_resize(5, 3, 0b11111111); 740 | assert_eq!(0b11010, v.get_bits(5, 0, 5)); 741 | assert_eq!(0b11000, v.get_bits(5, 5, 5)); 742 | assert_eq!(0b11111, v.get_bits(5, 10, 5)); 743 | 744 | v.block_resize(5, 1, 0b11111111); 745 | assert_eq!(1, v.block_len()); 746 | assert_eq!(1, v.len()); 747 | assert_eq!(0b00011010, v.get_block(0)); 748 | } 749 | 750 | #[test] 751 | fn resize() { 752 | let mut v = VB::new(); 753 | v.push_bits(5, 0b11010); 754 | assert_eq!(1, v.len()); 755 | assert_eq!(0b00011010, v.get_block(0)); 756 | 757 | v.resize(5, 3, 0b01010); 758 | assert_eq!(3, v.len()); 759 | assert_eq!(0b11010, v.get_bits(5, 0, 5)); 760 | assert_eq!(0b01010, v.get_bits(5, 5, 5)); 761 | assert_eq!(0b01010, v.get_bits(5, 10, 5)); 762 | assert_eq!(0b01011010, v.get_block(0)); 763 | assert_eq!(0b00101001, v.get_block(1)); 764 | 765 | v.resize(5, 1, 0b01010); 766 | assert_eq!(1, v.block_len()); 767 | assert_eq!(1, v.len()); 768 | assert_eq!(0b00011010, v.get_block(0)); 769 | } 770 | 771 | #[test] #[should_panic] 772 | fn with_capacity_overflow() { 773 | VB::with_capacity(5, !0); 774 | } 775 | 776 | #[test] #[should_panic] 777 | fn get_block_oob() { 778 | let v = VB::new(); 779 | v.get_block(0); 780 | } 781 | 782 | #[test] #[should_panic] 783 | fn set_block_oob() { 784 | let mut v = VB::block_with_fill(5, 2, 0); 785 | v.set_block(5, 2, 0); 786 | } 787 | 788 | #[test] #[should_panic] 789 | fn get_bits_oob1() { 790 | let mut v = VB::new(); 791 | v.push_bits(5, 0); 792 | v.get_bits(5, 5, 5); 793 | } 794 | 795 | #[test] #[should_panic] 796 | fn get_bits_oob2() { 797 | let v = VB::with_fill(5, 2, 0); 798 | v.get_bits(5, 6, 5); 799 | } 800 | 801 | #[test] #[should_panic] 802 | fn set_bits_oob() { 803 | let mut v = VB::with_fill(5, 2, 0); 804 | v.set_bits(5, 10, 5, 0); 805 | } 806 | 807 | #[test] #[should_panic] 808 | fn get_bit_oob() { 809 | let v = VB::with_fill(1, 6, 0); 810 | v.get_bit(6); 811 | } 812 | 813 | #[test] #[should_panic] 814 | fn set_bit_oob() { 815 | let mut v = VB::with_fill(1, 5, 0); 816 | v.set_bit(6, true); 817 | } 818 | 819 | #[test] #[should_panic] 820 | fn reserve_overflow() { 821 | let mut v = VB::new(); 822 | v.reserve(5, !0) 823 | } 824 | } 825 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Succinct data structures for Rust. 2 | //! 3 | //! So far we have: 4 | //! 5 | //! - [bit vectors](bit_vector/struct.BitVec.html) and [bit 6 | //! buffers](stream/struct.BitBuffer.html); 7 | //! - [integer vectors](int_vector/struct.IntVec.html) with arbitrary-sized 8 | //! (1- to 64-bit) elements; 9 | //! - a variety of [universal codes](coding/index.html; 10 | //! - constant-time [rank](struct.JacobsonRank.html) queries; and 11 | //! - *O*(lg lg *n*)-time [select](struct.BinSearchSelect.html) queries 12 | //! based on binary search over ranks. 13 | //! 14 | //! # Usage 15 | //! 16 | //! It’s [on crates.io](https://crates.io/crates/succinct), so you can add 17 | //! 18 | //! ```toml 19 | //! [dependencies] 20 | //! succinct = "0.5.2" 21 | //! ``` 22 | //! 23 | //! to your `Cargo.toml`. 24 | 25 | #![doc(html_root_url = "https://docs.rs/succinct/0.5.2")] 26 | #![warn(missing_docs)] 27 | 28 | extern crate byteorder; 29 | extern crate num_traits; 30 | 31 | #[cfg(test)] 32 | extern crate quickcheck; 33 | 34 | #[macro_use] 35 | mod macros; 36 | 37 | mod internal; 38 | 39 | pub mod broadword; 40 | pub mod coding; 41 | pub mod storage; 42 | pub mod stream; 43 | 44 | mod space_usage; 45 | pub use space_usage::SpaceUsage; 46 | 47 | pub mod bit_vec; 48 | pub use bit_vec::{BitVec, BitVecMut, BitVecPush, BitVector}; 49 | 50 | pub mod int_vec; 51 | pub use int_vec::{IntVec, IntVecMut, IntVector}; 52 | 53 | pub mod rank; 54 | pub use rank::{BitRankSupport, JacobsonRank, Rank9}; 55 | 56 | pub mod select; 57 | pub use select::{Select1Support, BinSearchSelect}; 58 | 59 | -------------------------------------------------------------------------------- /src/macros.rs: -------------------------------------------------------------------------------- 1 | //! Macros for export. 2 | 3 | /// Implements `SpaceUsage` for a stack-only (`Copy`) type. 4 | /// 5 | /// # Example 6 | /// 7 | /// ``` 8 | /// # #[macro_use] extern crate succinct; 9 | /// use std::mem; 10 | /// use succinct::SpaceUsage; 11 | /// 12 | /// # #[allow(dead_code)] 13 | /// struct Point { x: u32, y: u32 } 14 | /// 15 | /// impl_stack_only_space_usage!(Point); 16 | /// 17 | /// fn main() { 18 | /// let point = Point { x: 0, y: 0 }; 19 | /// assert_eq!(point.total_bytes(), mem::size_of::()); 20 | /// } 21 | /// ``` 22 | #[macro_export] 23 | macro_rules! impl_stack_only_space_usage { 24 | ( $t:ty ) => 25 | { 26 | impl $crate::SpaceUsage for $t { 27 | #[inline] fn is_stack_only() -> bool { true } 28 | #[inline] fn heap_bytes(&self) -> usize { 0 } 29 | } 30 | }; 31 | } 32 | 33 | /// Implements `BitVec` for a type that contains a `BitVec` field. 34 | #[macro_export] 35 | macro_rules! impl_bit_vec_adapter { 36 | ( $block:ty, $field:ident ) 37 | => 38 | { 39 | type Block = $block; 40 | 41 | fn bit_len(&self) -> u64 { self.$field.bit_len() } 42 | 43 | fn block_len(&self) -> usize { self.$field.block_len() } 44 | 45 | fn get_block(&self, index: usize) -> $block { 46 | self.$field.get_block(index) 47 | } 48 | 49 | fn get_bit(&self, index: u64) -> bool { 50 | self.$field.get_bit(index) 51 | } 52 | 53 | fn get_bits(&self, index: u64, count: usize) -> $block { 54 | self.$field.get_bits(index, count) 55 | } 56 | } 57 | } 58 | 59 | /// Implements `RankSupport` for a type that contains a `RankSupport` field. 60 | #[macro_export] 61 | macro_rules! impl_rank_support_adapter { 62 | ( $over:ty, $field:ident ) 63 | => 64 | { 65 | type Over = $over; 66 | 67 | fn rank(&self, index: u64, value: Self::Over) -> u64 { 68 | self.$field.rank(index, value) 69 | } 70 | 71 | fn limit(&self) -> u64 { 72 | self.$field.limit() 73 | } 74 | } 75 | } 76 | 77 | /// Implements `BitRankSupport` for a type that contains a `BitRankSupport` 78 | /// field. 79 | #[macro_export] 80 | macro_rules! impl_bit_rank_support_adapter { 81 | ( $field:ident ) 82 | => 83 | { 84 | fn rank1(&self, index: u64) -> u64 { 85 | self.$field.rank1(index) 86 | } 87 | 88 | fn rank0(&self, index: u64) -> u64 { 89 | self.$field.rank0(index) 90 | } 91 | } 92 | } 93 | 94 | /// Implements `Select1Support` for a type that contains a `Select1Support` 95 | /// field. 96 | #[macro_export] 97 | macro_rules! impl_select1_support_adapter { 98 | ( $field:ident ) 99 | => 100 | { 101 | fn select1(&self, index: u64) -> Option { 102 | self.$field.select1(index) 103 | } 104 | } 105 | } 106 | 107 | /// Implements `Select0Support` for a type that contains a `Select0Support` 108 | /// field. 109 | #[macro_export] 110 | macro_rules! impl_select0_support_adapter { 111 | ( $field:ident ) 112 | => 113 | { 114 | fn select0(&self, index: u64) -> Option { 115 | self.$field.select0(index) 116 | } 117 | } 118 | } 119 | 120 | /// Implements `SelectSupport` for a type that contains a `SelectSupport` 121 | /// field. 122 | #[macro_export] 123 | macro_rules! impl_select_support_adapter { 124 | ( $over:ty, $field:ident ) 125 | => 126 | { 127 | type Over = $over; 128 | 129 | fn select(&self, index: u64, value: Self::Over) -> Option { 130 | self.$field.select(index, value) 131 | } 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /src/rank/jacobson.rs: -------------------------------------------------------------------------------- 1 | use num_traits::PrimInt; 2 | 3 | use bit_vec::BitVec; 4 | use int_vec::{IntVec, IntVector}; 5 | use space_usage::SpaceUsage; 6 | use storage::{Address, BlockType}; 7 | 8 | use super::{RankSupport, BitRankSupport}; 9 | 10 | /// Jacobson’s rank structure for fast rank queries over a `BitVec`. 11 | /// 12 | /// Construct with `JacobsonRank::new`. 13 | #[derive(Clone, Debug)] 14 | pub struct JacobsonRank { 15 | bit_store: Store, 16 | large_block_size: usize, 17 | large_block_ranks: IntVector, 18 | small_block_ranks: IntVector, 19 | } 20 | 21 | impl JacobsonRank { 22 | /// Creates a new rank support structure for the given bit vector. 23 | pub fn new(bits: Store) -> Self { 24 | let n = bits.bit_len(); 25 | let lg_n = n.ceil_lg(); 26 | let lg2_n = lg_n * lg_n; 27 | 28 | let small_block_size = Store::Block::nbits(); 29 | let small_per_large = lg2_n.ceil_div(small_block_size); 30 | let large_block_size = small_block_size * small_per_large; 31 | let large_block_count = n / large_block_size as u64 + 1; 32 | let small_block_count = n / small_block_size as u64 + 1; 33 | 34 | let large_meta_size = (n + 1).ceil_lg(); 35 | let small_meta_size = (large_block_size + 1).ceil_lg(); 36 | 37 | let mut large_block_ranks = 38 | IntVector::with_capacity(large_meta_size, large_block_count); 39 | let mut small_block_ranks = 40 | IntVector::with_capacity(small_meta_size, small_block_count); 41 | 42 | let mut current_rank: u64 = 0; 43 | let mut last_large_rank: u64 = 0; 44 | let mut small_block_index: usize = 0; 45 | 46 | for i in 0 .. bits.block_len() { 47 | if small_block_index == 0 { 48 | large_block_ranks.push(current_rank); 49 | last_large_rank = current_rank; 50 | } 51 | 52 | let excess_rank = current_rank - last_large_rank; 53 | small_block_ranks.push(excess_rank); 54 | 55 | current_rank += bits.get_block(i).count_ones() as u64; 56 | small_block_index += 1; 57 | 58 | if small_block_index == small_per_large { 59 | small_block_index = 0; 60 | } 61 | } 62 | 63 | large_block_ranks.push(current_rank); 64 | let excess_rank = current_rank - last_large_rank; 65 | small_block_ranks.push(excess_rank); 66 | 67 | JacobsonRank { 68 | bit_store: bits, 69 | large_block_size: large_block_size, 70 | large_block_ranks: large_block_ranks, 71 | small_block_ranks: small_block_ranks, 72 | } 73 | } 74 | 75 | /// Borrows a reference to the underlying bit store. 76 | pub fn inner(&self) -> &Store { 77 | &self.bit_store 78 | } 79 | 80 | /// Returns the underlying bit store. 81 | pub fn into_inner(self) -> Store { 82 | self.bit_store 83 | } 84 | } 85 | 86 | impl RankSupport for JacobsonRank { 87 | type Over = bool; 88 | 89 | fn rank(&self, position: u64, value: bool) -> u64 { 90 | if value {self.rank1(position)} else {self.rank0(position)} 91 | } 92 | 93 | fn limit(&self) -> u64 { 94 | self.bit_store.bit_len() 95 | } 96 | } 97 | 98 | impl BitRankSupport for JacobsonRank { 99 | fn rank1(&self, position: u64) -> u64 { 100 | assert!(position < self.bit_len(), 101 | "JacobsonRank::rank1: out of bounds"); 102 | 103 | let large_block = position / self.large_block_size as u64; 104 | let address = Address::new::(position); 105 | 106 | let large_rank = self.large_block_ranks.get(large_block); 107 | let small_rank = self.small_block_ranks.get(address.block_index as u64); 108 | let bits_rank = self.bit_store.get_block(address.block_index) 109 | .rank1(address.bit_offset as u64); 110 | 111 | large_rank + small_rank + bits_rank 112 | } 113 | } 114 | 115 | impl BitVec for JacobsonRank { 116 | impl_bit_vec_adapter!(Store::Block, bit_store); 117 | } 118 | 119 | impl SpaceUsage for JacobsonRank { 120 | #[inline] 121 | fn is_stack_only() -> bool { false } 122 | 123 | fn heap_bytes(&self) -> usize { 124 | self.large_block_ranks.heap_bytes() 125 | + self.small_block_ranks.heap_bytes() 126 | + self.bit_store.heap_bytes() 127 | } 128 | } 129 | 130 | #[cfg(test)] 131 | mod test { 132 | use super::*; 133 | use rank::BitRankSupport; 134 | 135 | #[test] 136 | fn rank1() { 137 | let vec = vec![ 0b00000000000001110000000000000001u32; 1024 ]; 138 | let rank = JacobsonRank::new(vec); 139 | 140 | assert_eq!(1, rank.rank1(0)); 141 | assert_eq!(1, rank.rank1(1)); 142 | assert_eq!(1, rank.rank1(2)); 143 | assert_eq!(1, rank.rank1(7)); 144 | assert_eq!(2, rank.rank1(16)); 145 | assert_eq!(3, rank.rank1(17)); 146 | assert_eq!(4, rank.rank1(18)); 147 | assert_eq!(4, rank.rank1(19)); 148 | assert_eq!(4, rank.rank1(20)); 149 | 150 | assert_eq!(16, rank.rank1(4 * 32 - 1)); 151 | assert_eq!(17, rank.rank1(4 * 32)); 152 | assert_eq!(2048, rank.rank1(512 * 32 - 1)); 153 | assert_eq!(2049, rank.rank1(512 * 32)); 154 | 155 | assert_eq!(4096, rank.rank1(1024 * 32 - 1)); 156 | } 157 | 158 | // This test is a sanity check that we aren’t taking up too much 159 | // space with the metadata. 160 | #[test] 161 | fn space() { 162 | use space_usage::*; 163 | 164 | for i in 0 .. 50 { 165 | let vec = vec![ 0b10000000000000001110000000000000u32; 166 | 1000 + i ]; 167 | let rank = JacobsonRank::new(&*vec); 168 | 169 | assert!((rank.total_bytes() as f64 / vec.total_bytes() as f64) 170 | < 1.5); 171 | } 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /src/rank/mod.rs: -------------------------------------------------------------------------------- 1 | //! Support for fast rank queries. 2 | 3 | mod jacobson; 4 | pub use self::jacobson::*; 5 | 6 | mod rank9; 7 | pub use self::rank9::*; 8 | 9 | mod traits; 10 | pub use self::traits::*; 11 | 12 | mod prim; 13 | -------------------------------------------------------------------------------- /src/rank/prim.rs: -------------------------------------------------------------------------------- 1 | use rank::{BitRankSupport, RankSupport}; 2 | use storage::BlockType; 3 | 4 | macro_rules! impl_rank_support_prim { 5 | ( $t:ident ) 6 | => 7 | { 8 | impl RankSupport for $t { 9 | type Over = bool; 10 | 11 | fn rank(&self, position: u64, value: bool) -> u64 { 12 | if value {self.rank1(position)} else {self.rank0(position)} 13 | } 14 | 15 | fn limit(&self) -> u64 { 16 | Self::nbits() as u64 17 | } 18 | } 19 | 20 | impl BitRankSupport for $t { 21 | fn rank1(&self, position: u64) -> u64 { 22 | debug_assert!(position < Self::nbits() as u64); 23 | 24 | let mask = Self::low_mask((position + 1) as usize); 25 | (*self & mask).count_ones() as u64 26 | } 27 | } 28 | } 29 | } 30 | 31 | impl_rank_support_prim!(u8); 32 | impl_rank_support_prim!(u16); 33 | impl_rank_support_prim!(u32); 34 | impl_rank_support_prim!(u64); 35 | impl_rank_support_prim!(usize); 36 | 37 | #[cfg(test)] 38 | mod test { 39 | use rank::*; 40 | 41 | #[test] 42 | fn rank1() { 43 | assert_eq!(0, 0b00000000u8.rank1(0)); 44 | assert_eq!(0, 0b00000000u8.rank1(7)); 45 | assert_eq!(1, 0b01010101u8.rank1(0)); 46 | assert_eq!(1, 0b01010101u8.rank1(1)); 47 | assert_eq!(2, 0b01010101u8.rank1(2)); 48 | assert_eq!(2, 0b01010101u8.rank1(3)); 49 | 50 | assert_eq!(3, 0b00001111u8.rank1(2)); 51 | assert_eq!(4, 0b00001111u8.rank1(3)); 52 | assert_eq!(4, 0b00001111u8.rank1(4)); 53 | assert_eq!(4, 0b00001111u8.rank1(5)); 54 | assert_eq!(4, 0b00001111u8.rank1(7)); 55 | 56 | assert_eq!(0, 0b11110000u8.rank1(0)); 57 | assert_eq!(0, 0b11110000u8.rank1(3)); 58 | assert_eq!(1, 0b11110000u8.rank1(4)); 59 | assert_eq!(2, 0b11110000u8.rank1(5)); 60 | assert_eq!(4, 0b11110000u8.rank1(7)); 61 | } 62 | 63 | #[test] 64 | fn rank0() { 65 | assert_eq!(1, 0b00000000u8.rank0(0)); 66 | assert_eq!(8, 0b00000000u8.rank0(7)); 67 | assert_eq!(0, 0b01010101u8.rank0(0)); 68 | assert_eq!(1, 0b01010101u8.rank0(1)); 69 | assert_eq!(1, 0b01010101u8.rank0(2)); 70 | assert_eq!(2, 0b01010101u8.rank0(3)); 71 | } 72 | 73 | #[test] 74 | fn rank() { 75 | assert_eq!(1, 0b00000000u8.rank(0, false)); 76 | assert_eq!(8, 0b00000000u8.rank(7, false)); 77 | assert_eq!(0, 0b01010101u8.rank(0, false)); 78 | assert_eq!(1, 0b01010101u8.rank(1, false)); 79 | assert_eq!(1, 0b01010101u8.rank(2, false)); 80 | assert_eq!(2, 0b01010101u8.rank(3, false)); 81 | 82 | assert_eq!(0, 0b00000000u8.rank(0, true)); 83 | assert_eq!(0, 0b00000000u8.rank(7, true)); 84 | assert_eq!(1, 0b01010101u8.rank(0, true)); 85 | assert_eq!(1, 0b01010101u8.rank(1, true)); 86 | assert_eq!(2, 0b01010101u8.rank(2, true)); 87 | assert_eq!(2, 0b01010101u8.rank(3, true)); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/rank/rank9.rs: -------------------------------------------------------------------------------- 1 | use num_traits::ToPrimitive; 2 | 3 | use bit_vec::BitVec; 4 | use rank::{RankSupport, BitRankSupport}; 5 | use space_usage::SpaceUsage; 6 | use storage::BlockType; 7 | 8 | /// Vigna’s rank structure for fast rank queries over a `BitVec`. 9 | #[derive(Clone, Debug)] 10 | pub struct Rank9 { 11 | bit_store: Store, 12 | counts: Vec, 13 | } 14 | 15 | #[repr(C)] 16 | #[derive(Clone, Copy, Debug)] 17 | struct Rank9Cell { 18 | level1: u64, 19 | level2: Level2, 20 | } 21 | 22 | #[repr(C)] 23 | #[derive(Clone, Copy, Debug)] 24 | struct Level2(u64); 25 | 26 | impl Level2 { 27 | fn new() -> Self { Level2(0) } 28 | 29 | #[cfg(target_pointer_width = "64")] 30 | fn get(&self, t: usize) -> u64 { 31 | debug_assert!(t < 8); 32 | 33 | let t = t.wrapping_sub(1); 34 | let shift = t.wrapping_add(t >> 60 & 8) * 9; 35 | self.0 >> shift & 0x1FF 36 | } 37 | 38 | #[cfg(target_pointer_width = "64")] 39 | fn set(&mut self, t: usize, value: u64) { 40 | debug_assert!(t < 8); 41 | 42 | let t = t.wrapping_sub(1); 43 | let shift = t.wrapping_add(t >> 60 & 8) * 9; 44 | 45 | let old_part = self.0 & !(0x1FF << shift); 46 | let new_part = (value & 0x1FF) << shift; 47 | 48 | self.0 = old_part | new_part; 49 | } 50 | 51 | #[cfg(target_pointer_width = "32")] 52 | fn get(&self, t: usize) -> u64 { 53 | debug_assert!(t < 8); 54 | 55 | let t = t.wrapping_sub(1); 56 | let shift = t.wrapping_add(t >> 28 & 8) * 9; 57 | self.0 >> shift & 0x1FF 58 | } 59 | 60 | #[cfg(target_pointer_width = "32")] 61 | fn set(&mut self, t: usize, value: u64) { 62 | debug_assert!(t < 8); 63 | 64 | let t = t.wrapping_sub(1); 65 | let shift = t.wrapping_add(t >> 28 & 8) * 9; 66 | 67 | let old_part = self.0 & !(0x1FF << shift); 68 | let new_part = (value & 0x1FF) << shift; 69 | 70 | self.0 = old_part | new_part; 71 | } 72 | } 73 | 74 | impl> Rank9 { 75 | /// Creates a new rank9 structure. 76 | pub fn new(bits: Store) -> Self { 77 | let bb_count = bits.block_len().ceil_div(8); 78 | let mut result = Vec::with_capacity(bb_count + 1); 79 | 80 | let mut level1_count = 0; 81 | let mut level2_count = 0; 82 | 83 | // Scope for store_counts's borrow of result 84 | { 85 | let mut store_counts = |i: usize, 86 | level1_count: &mut u64, 87 | level2_count: &mut u64| { 88 | let basic_block_index = i / 8; 89 | let word_offset = i % 8; 90 | 91 | if word_offset == 0 { 92 | result.push(Rank9Cell { 93 | level1: *level1_count, 94 | level2: Level2::new(), 95 | }); 96 | *level2_count = 0; 97 | } else { 98 | result[basic_block_index].level2 99 | .set(word_offset, *level2_count); 100 | } 101 | }; 102 | 103 | for i in 0..bits.block_len() { 104 | store_counts(i, &mut level1_count, &mut level2_count); 105 | 106 | let word_count = bits.get_block(i).count_ones() as u64; 107 | level1_count += word_count; 108 | level2_count += word_count; 109 | } 110 | 111 | store_counts(bits.block_len(), 112 | &mut level1_count, &mut level2_count); 113 | } 114 | 115 | Rank9 { 116 | bit_store: bits, 117 | counts: result, 118 | } 119 | } 120 | 121 | /// Borrows a reference to the underlying bit store. 122 | pub fn inner(&self) -> &Store { 123 | &self.bit_store 124 | } 125 | 126 | /// Returns the underlying bit store. 127 | pub fn into_inner(self) -> Store { 128 | self.bit_store 129 | } 130 | } 131 | 132 | impl> BitRankSupport for Rank9 { 133 | fn rank1(&self, position: u64) -> u64 { 134 | let bb_index = (position / 512).to_usize() 135 | .expect("Rank9::rank1: index overflow"); 136 | let word_index = (position / 64).to_usize() 137 | .expect("Rank9::rank1: index overflow"); 138 | let word_offset = word_index % 8; 139 | let bit_offset = position % 64; 140 | 141 | let cell = self.counts[bb_index]; 142 | 143 | let bb_portion = cell.level1; 144 | let word_portion = cell.level2.get(word_offset); 145 | let bit_portion = self.bit_store.get_block(word_index) 146 | .rank1(bit_offset); 147 | 148 | bb_portion + word_portion + bit_portion 149 | } 150 | } 151 | 152 | impl> RankSupport for Rank9 { 153 | type Over = bool; 154 | 155 | fn rank(&self, position: u64, value: bool) -> u64 { 156 | if value {self.rank1(position)} else {self.rank0(position)} 157 | } 158 | 159 | fn limit(&self) -> u64 { 160 | self.bit_store.bit_len() 161 | } 162 | } 163 | 164 | impl> BitVec for Rank9 { 165 | impl_bit_vec_adapter!(u64, bit_store); 166 | } 167 | 168 | impl_stack_only_space_usage!(Rank9Cell); 169 | impl_stack_only_space_usage!(Level2); 170 | 171 | impl SpaceUsage for Rank9 { 172 | fn is_stack_only() -> bool { false } 173 | 174 | fn heap_bytes(&self) -> usize { 175 | self.bit_store.heap_bytes() + self.counts.heap_bytes() 176 | } 177 | } 178 | 179 | #[test] 180 | fn level2() { 181 | let mut l2 = 182 | Level2(0b0_110010000_000000000_000000001_000001110_000001000_100000000_000000101); 183 | 184 | assert_eq!(0, l2.get(0)); 185 | assert_eq!(5, l2.get(1)); 186 | assert_eq!(256, l2.get(2)); 187 | assert_eq!(8, l2.get(3)); 188 | assert_eq!(14, l2.get(4)); 189 | assert_eq!(1, l2.get(5)); 190 | assert_eq!(0, l2.get(6)); 191 | assert_eq!(400, l2.get(7)); 192 | 193 | l2.set(3, 45); 194 | 195 | assert_eq!(0, l2.get(0)); 196 | assert_eq!(5, l2.get(1)); 197 | assert_eq!(256, l2.get(2)); 198 | assert_eq!(45, l2.get(3)); 199 | assert_eq!(14, l2.get(4)); 200 | assert_eq!(1, l2.get(5)); 201 | assert_eq!(0, l2.get(6)); 202 | assert_eq!(400, l2.get(7)); 203 | 204 | l2.set(7, 511); 205 | 206 | assert_eq!(0, l2.get(0)); 207 | assert_eq!(5, l2.get(1)); 208 | assert_eq!(256, l2.get(2)); 209 | assert_eq!(45, l2.get(3)); 210 | assert_eq!(14, l2.get(4)); 211 | assert_eq!(1, l2.get(5)); 212 | assert_eq!(0, l2.get(6)); 213 | assert_eq!(511, l2.get(7)); 214 | } 215 | 216 | #[cfg(test)] 217 | mod test { 218 | use super::*; 219 | use rank::BitRankSupport; 220 | 221 | #[test] 222 | fn rank1() { 223 | let vec = vec![ 0b00000000000001110000000000000001u64; 1024 ]; 224 | let rank = Rank9::new(vec); 225 | 226 | assert_eq!(1, rank.rank1(0)); 227 | assert_eq!(1, rank.rank1(1)); 228 | assert_eq!(1, rank.rank1(2)); 229 | assert_eq!(1, rank.rank1(7)); 230 | assert_eq!(2, rank.rank1(16)); 231 | assert_eq!(3, rank.rank1(17)); 232 | assert_eq!(4, rank.rank1(18)); 233 | assert_eq!(4, rank.rank1(19)); 234 | assert_eq!(4, rank.rank1(20)); 235 | 236 | assert_eq!(16, rank.rank1(4 * 64 - 1)); 237 | assert_eq!(17, rank.rank1(4 * 64)); 238 | assert_eq!(2048, rank.rank1(512 * 64 - 1)); 239 | assert_eq!(2049, rank.rank1(512 * 64)); 240 | 241 | assert_eq!(4096, rank.rank1(1024 * 64 - 1)); 242 | } 243 | 244 | // This test is a sanity check that we aren’t taking up too much 245 | // space with the metadata. 246 | #[test] 247 | fn space() { 248 | use space_usage::*; 249 | 250 | for i in 0 .. 50 { 251 | let vec = vec![ 0u64; 1000 + i ]; 252 | let vec_bytes = vec.total_bytes() as f64; 253 | let rank = Rank9::new(vec); 254 | 255 | assert!(rank.total_bytes() as f64 / vec_bytes < 1.3); 256 | } 257 | } 258 | } 259 | -------------------------------------------------------------------------------- /src/rank/traits.rs: -------------------------------------------------------------------------------- 1 | /// Supports fast rank queries. 2 | /// 3 | /// Associated type `Over` gives the type that we can query about. For 4 | /// example, `RankSupport` lets us rank `0` and `1`, whereas 5 | /// `RankSupport` will rank arbitrary bytes. 6 | pub trait RankSupport { 7 | /// The type of value to rank. 8 | type Over: Copy; 9 | 10 | /// Returns the rank of the given value at a given position. 11 | /// 12 | /// This is the number of occurrences of `value` up to and including 13 | /// that position. 14 | /// 15 | /// # Panics 16 | /// 17 | /// Panics if `position >= self.limit()`. 18 | fn rank(&self, position: u64, value: Self::Over) -> u64; 19 | 20 | /// The size of the vector being ranked. 21 | fn limit(&self) -> u64; 22 | } 23 | 24 | /// Supports fast rank queries over `bool`s. 25 | pub trait BitRankSupport: RankSupport { 26 | /// Returns the rank of 1 at the given position. 27 | /// 28 | /// This is the number of occurrences of 1 up to and including that 29 | /// position. 30 | fn rank1(&self, position: u64) -> u64 { 31 | self.rank(position, true) 32 | } 33 | 34 | /// Returns the rank of 0 at the given position. 35 | /// 36 | /// This is the number of occurrences of 0 up to and including that 37 | /// position. 38 | fn rank0(&self, position: u64) -> u64 { 39 | position + 1 - self.rank1(position) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/select/bin_search.rs: -------------------------------------------------------------------------------- 1 | use internal::search::binary_search_function; 2 | use rank::{BitRankSupport, RankSupport}; 3 | use space_usage::SpaceUsage; 4 | use bit_vec::BitVec; 5 | use super::{SelectSupport, Select1Support, Select0Support}; 6 | 7 | /// Performs a select query by binary searching rank queries. 8 | pub struct BinSearchSelect { 9 | rank_support: Rank, 10 | } 11 | 12 | /// Creates a new binary search select support based on a rank support. 13 | impl BinSearchSelect { 14 | /// Creates a new binary search selection support given a rank 15 | /// support. 16 | pub fn new(rank_support: Rank) -> Self { 17 | BinSearchSelect { 18 | rank_support: rank_support, 19 | } 20 | } 21 | 22 | /// Borrows a reference to the underlying rank support. 23 | pub fn inner(&self) -> &Rank { 24 | &self.rank_support 25 | } 26 | 27 | /// Returns the underlying rank structure. 28 | pub fn into_inner(self) -> Rank { 29 | self.rank_support 30 | } 31 | } 32 | 33 | impl BitVec for BinSearchSelect { 34 | impl_bit_vec_adapter!(Rank::Block, rank_support); 35 | } 36 | 37 | impl RankSupport for BinSearchSelect { 38 | impl_rank_support_adapter!(Rank::Over, rank_support); 39 | } 40 | 41 | impl BitRankSupport for BinSearchSelect { 42 | impl_bit_rank_support_adapter!(rank_support); 43 | } 44 | 45 | // If we had access to the representation of the rank structure, we 46 | // could search level by level rather than at arbitrary bit addresses. 47 | // But then this algorithm would be tied to that representation. 48 | 49 | macro_rules! impl_select_support_b { 50 | ($select_support:ident, $select:ident, $rank: ident) 51 | => 52 | { 53 | impl 54 | $select_support for BinSearchSelect { 55 | fn $select(&self, index: u64) -> Option { 56 | binary_search_function(0, self.limit(), index + 1, 57 | |i| self.$rank(i)) 58 | } 59 | } 60 | } 61 | } 62 | 63 | impl_select_support_b!(Select1Support, select1, rank1); 64 | impl_select_support_b!(Select0Support, select0, rank0); 65 | 66 | impl SelectSupport for BinSearchSelect { 67 | type Over = Rank::Over; 68 | 69 | fn select(&self, index: u64, value: Rank::Over) -> Option { 70 | binary_search_function(0, self.limit(), index + 1, 71 | |i| self.rank(i, value)) 72 | } 73 | } 74 | 75 | impl SpaceUsage for BinSearchSelect { 76 | fn is_stack_only() -> bool { Rank::is_stack_only() } 77 | fn heap_bytes(&self) -> usize { self.rank_support.heap_bytes() } 78 | } 79 | 80 | #[cfg(test)] 81 | mod test { 82 | use rank::*; 83 | use select::*; 84 | 85 | #[test] 86 | fn select1() { 87 | let vec = vec![ 0b00000000000001110000000000000001u32; 1024 ]; 88 | let rank = JacobsonRank::new(vec); 89 | let select = BinSearchSelect::new(rank); 90 | 91 | assert_eq!(1, select.rank1(0)); 92 | assert_eq!(1, select.rank1(1)); 93 | assert_eq!(1, select.rank1(2)); 94 | assert_eq!(1, select.rank1(15)); 95 | assert_eq!(2, select.rank1(16)); 96 | assert_eq!(3, select.rank1(17)); 97 | assert_eq!(4, select.rank1(18)); 98 | assert_eq!(4, select.rank1(19)); 99 | assert_eq!(4, select.rank1(20)); 100 | assert_eq!(5, select.rank1(32)); 101 | 102 | assert_eq!(Some(0), select.select1(0)); 103 | assert_eq!(Some(16), select.select1(1)); 104 | assert_eq!(Some(17), select.select1(2)); 105 | assert_eq!(Some(18), select.select1(3)); 106 | assert_eq!(Some(32), select.select1(4)); 107 | assert_eq!(Some(3200), select.select1(400)); 108 | assert_eq!(Some(3216), select.select1(401)); 109 | 110 | assert_eq!(Some(8 * 4092), select.select1(4092)); 111 | assert_eq!(Some(8 * 4092 + 16), select.select1(4093)); 112 | assert_eq!(Some(8 * 4092 + 17), select.select1(4094)); 113 | assert_eq!(Some(8 * 4092 + 18), select.select1(4095)); 114 | assert_eq!(None, select.select1(4096)) 115 | } 116 | 117 | #[test] 118 | fn select2() { 119 | let vec = vec![ 0b10101010101010101010101010101010u32; 1024 ]; 120 | let rank = JacobsonRank::new(vec); 121 | let select = BinSearchSelect::new(rank); 122 | 123 | assert_eq!(Some(1), select.select1(0)); 124 | assert_eq!(Some(3), select.select1(1)); 125 | assert_eq!(Some(5), select.select1(2)); 126 | assert_eq!(Some(7), select.select1(3)); 127 | assert_eq!(Some(919), select.select1(459)); 128 | } 129 | 130 | #[test] 131 | fn select3() { 132 | let vec = vec![ 0b11111111111111111111111111111111u32; 1024 ]; 133 | let rank = JacobsonRank::new(vec); 134 | let select = BinSearchSelect::new(rank); 135 | 136 | assert_eq!(Some(0), select.select1(0)); 137 | assert_eq!(Some(1), select.select1(1)); 138 | assert_eq!(Some(2), select.select1(2)); 139 | assert_eq!(Some(32767), select.select1(32767)); 140 | assert_eq!(None, select.select1(32768)); 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /src/select/mod.rs: -------------------------------------------------------------------------------- 1 | //! Support for fast select queries. 2 | 3 | mod bin_search; 4 | pub use self::bin_search::*; 5 | 6 | mod traits; 7 | pub use self::traits::*; 8 | -------------------------------------------------------------------------------- /src/select/traits.rs: -------------------------------------------------------------------------------- 1 | /// Supports selecting for 1 bits. 2 | pub trait Select1Support { 3 | /// Returns the position of the `index`th 1 bit. 4 | fn select1(&self, index: u64) -> Option; 5 | } 6 | 7 | /// Supports selecting for 0 bits. 8 | pub trait Select0Support { 9 | /// Returns the position of the `index`th 0 bit. 10 | fn select0(&self, index: u64) -> Option; 11 | } 12 | 13 | /// Supports select queries over associated type `Over`. 14 | pub trait SelectSupport { 15 | /// The type of value that we can search for. 16 | type Over: Copy; 17 | 18 | /// Returns the position of the `index`th occurrence of `value`. 19 | fn select(&self, index: u64, value: Self::Over) -> Option; 20 | } 21 | -------------------------------------------------------------------------------- /src/space_usage.rs: -------------------------------------------------------------------------------- 1 | //! A trait for computing space usage. 2 | 3 | use std::mem; 4 | 5 | /// Computes the space usage of an object. 6 | /// 7 | /// We calculate the space usage as split into two portions, the heap 8 | /// portion (returned by `heap_bytes` and the stack portion (returned by 9 | /// `stack_bytes`). The stack portion is the statically-known size for 10 | /// every object of its type as allocated on the stack; the dynamic 11 | /// portion is the additional heap allocation that may depend on 12 | /// run-time factors. 13 | /// 14 | /// Examples: 15 | /// 16 | /// - Primitive types like `u32` and `usize` are stack-only. 17 | /// 18 | /// - A tuple or struct type is stack-only when all its components are. 19 | /// Its heap portion is the sum of their heap portions, but its stack 20 | /// portion may exceed the sum of their stack portions because of 21 | /// alignment and padding. 22 | /// 23 | /// - The size of a vector includes a stack portion, the vector struct 24 | /// itself, and a heap portion, the array holding its elements. The 25 | /// heap portion of a vector includes the stack portions of its 26 | /// elements. (Should they be called something else for this reason? I 27 | /// considered static/dynamic, but `Box` shows why that doesn’t express 28 | /// exactly the right property.) 29 | 30 | pub trait SpaceUsage: Sized { 31 | /// Computes the size of the receiver in bytes. 32 | /// 33 | /// This includes not just the immediate stack object, but any heap 34 | /// memory that it owns. 35 | /// 36 | /// The default implementation returns 37 | /// `Self::stack_bytes() + self.heap_bytes()`. 38 | #[inline] 39 | fn total_bytes(&self) -> usize { 40 | Self::stack_bytes() + self.heap_bytes() 41 | } 42 | 43 | /// Is the size of this type known statically? 44 | /// 45 | /// If this method returns true then `heap_bytes` should always 46 | /// return 0. 47 | fn is_stack_only() -> bool; 48 | 49 | /// Calculates the stack portion of the size of this type. 50 | /// 51 | /// This is the size of the immediate storage that all objects of 52 | /// this type occupy; it excludes storage that objects of the 53 | /// type might allocate dynamically. 54 | /// 55 | /// The default implementation returns `std::mem::size_of::()`. 56 | 57 | #[inline] 58 | fn stack_bytes() -> usize { 59 | mem::size_of::() 60 | } 61 | 62 | /// Calculates the heap portion of the size of an object. 63 | /// 64 | /// This is the memory used by (or, rather, owned by) the object, not 65 | /// including any portion of its size that is 66 | /// included in `stack_bytes`. This is typically for containers 67 | /// that heap allocate varying amounts of memory. 68 | #[inline] 69 | fn heap_bytes(&self) -> usize; 70 | } 71 | 72 | impl_stack_only_space_usage!(()); 73 | impl_stack_only_space_usage!(u8); 74 | impl_stack_only_space_usage!(u16); 75 | impl_stack_only_space_usage!(u32); 76 | impl_stack_only_space_usage!(u64); 77 | impl_stack_only_space_usage!(usize); 78 | impl_stack_only_space_usage!(i8); 79 | impl_stack_only_space_usage!(i16); 80 | impl_stack_only_space_usage!(i32); 81 | impl_stack_only_space_usage!(i64); 82 | impl_stack_only_space_usage!(isize); 83 | impl_stack_only_space_usage!(f32); 84 | impl_stack_only_space_usage!(f64); 85 | 86 | impl<'a, T> SpaceUsage for &'a T { 87 | fn is_stack_only() -> bool { true } 88 | fn heap_bytes(&self) -> usize { 0 } 89 | } 90 | 91 | impl<'a, T> SpaceUsage for &'a [T] { 92 | fn is_stack_only() -> bool { true } 93 | fn heap_bytes(&self) -> usize { 0 } 94 | } 95 | 96 | macro_rules! impl_tuple_space_usage { 97 | ( $( $tv:ident ),+ ) => 98 | { 99 | impl<$( $tv: SpaceUsage ),+> SpaceUsage for ($( $tv, )+) { 100 | #[allow(non_snake_case)] 101 | fn heap_bytes(&self) -> usize { 102 | let &($( ref $tv, )+) = self; 103 | 0 $( + $tv.heap_bytes() )+ 104 | } 105 | 106 | #[inline] 107 | fn is_stack_only() -> bool { 108 | $( $tv::is_stack_only() )&* 109 | } 110 | } 111 | } 112 | } 113 | 114 | impl_tuple_space_usage!(A); 115 | impl_tuple_space_usage!(A, B); 116 | impl_tuple_space_usage!(A, B, C); 117 | impl_tuple_space_usage!(A, B, C, D); 118 | impl_tuple_space_usage!(A, B, C, D, E); 119 | impl_tuple_space_usage!(A, B, C, D, E, F); 120 | impl_tuple_space_usage!(A, B, C, D, E, F, G); 121 | impl_tuple_space_usage!(A, B, C, D, E, F, G, H); 122 | impl_tuple_space_usage!(A, B, C, D, E, F, G, H, I); 123 | impl_tuple_space_usage!(A, B, C, D, E, F, G, H, I, J); 124 | impl_tuple_space_usage!(A, B, C, D, E, F, G, H, I, J, K); 125 | impl_tuple_space_usage!(A, B, C, D, E, F, G, H, I, J, K, L); 126 | 127 | impl SpaceUsage for Vec { 128 | #[inline] 129 | fn is_stack_only() -> bool { false } 130 | 131 | fn heap_bytes(&self) -> usize { 132 | let mut result = self.capacity() * A::stack_bytes(); 133 | 134 | if ! A::is_stack_only() { 135 | for each in self { 136 | result += each.heap_bytes(); 137 | } 138 | } 139 | 140 | result 141 | } 142 | } 143 | 144 | impl SpaceUsage for Box { 145 | #[inline] 146 | fn is_stack_only() -> bool { false } 147 | 148 | fn stack_bytes() -> usize { 149 | mem::size_of::() 150 | } 151 | 152 | fn heap_bytes(&self) -> usize { 153 | use std::ops::Deref; 154 | self.deref().total_bytes() 155 | } 156 | } 157 | 158 | #[cfg(test)] 159 | mod test { 160 | use super::*; 161 | use std::mem::size_of; 162 | 163 | #[test] 164 | fn is_stack_only() { 165 | assert!( u32::is_stack_only()); 166 | assert!( isize::is_stack_only()); 167 | assert!(! Vec::::is_stack_only()); 168 | assert!(! Vec::>::is_stack_only()); 169 | assert!( <(u32, u32, u32)>::is_stack_only()); 170 | assert!(! <(u32, Vec, u32)>::is_stack_only()); 171 | } 172 | 173 | #[test] 174 | fn int_size() { 175 | assert_eq!(2, 0u16.total_bytes()); 176 | assert_eq!(4, 0u32.total_bytes()); 177 | assert_eq!(8, 0i64.total_bytes()); 178 | } 179 | 180 | #[test] 181 | fn tuple_size() { 182 | assert_eq!(8, (0u32, 0u32).total_bytes()); 183 | // This isn’t guaranteed to work, but it does for now: 184 | assert_eq!(12, (0u32, 0u8, 0u32).total_bytes()); 185 | } 186 | 187 | #[test] 188 | fn vec_size() { 189 | let v = Vec::::with_capacity(8); 190 | assert_eq!(8, v.capacity()); 191 | assert_eq!(64, v.heap_bytes()); 192 | assert_eq!(64 + size_of::>(), 193 | v.total_bytes()); 194 | } 195 | 196 | #[test] 197 | fn vec_vec_size() { 198 | let v1 = Vec::::with_capacity(8); 199 | let v2 = Vec::::with_capacity(8); 200 | let w = vec![v1, v2]; 201 | assert_eq!(2, w.capacity()); 202 | assert_eq!(128 + 2 * size_of::>() + 203 | size_of::>>(), 204 | w.total_bytes()); 205 | } 206 | } 207 | -------------------------------------------------------------------------------- /src/storage.rs: -------------------------------------------------------------------------------- 1 | //! Traits describing how bits and arrays of bits are stored. 2 | 3 | use std::fmt; 4 | use std::io; 5 | use std::mem; 6 | 7 | use byteorder::{ByteOrder, ReadBytesExt, WriteBytesExt}; 8 | use num_traits::{One, PrimInt, ToPrimitive}; 9 | 10 | use bit_vec::{BitVec, BitVecMut}; 11 | use rank::{BitRankSupport, RankSupport}; 12 | use space_usage::SpaceUsage; 13 | 14 | /// Types that can be used for `IntVector` and `BitVector` storage. 15 | /// 16 | /// This trait is kind of a grab bag of methods right now. It includes: 17 | /// 18 | /// - methods for computing sizes and offsets relative to the block size, 19 | /// - methods for getting and setting individual and groups of bits, 20 | /// - a method for computing rank, 21 | /// - three arithmetic methods that probably belong elsewhere, and 22 | /// - block-based, endian-specified I/O. 23 | pub trait BlockType: PrimInt + BitVec + BitVecMut + BitRankSupport + 24 | RankSupport + SpaceUsage + fmt::Debug { 25 | // Methods for computing sizes and offsets relative to the block size. 26 | 27 | /// The number of bits in a block. 28 | #[inline] 29 | fn nbits() -> usize { 30 | 8 * mem::size_of::() 31 | } 32 | 33 | /// Returns `index / Self::nbits()`, computed by shifting. 34 | /// 35 | /// This is intended for converting a bit address into a block 36 | /// address, which is why it takes `u64` and returns `usize`. 37 | /// There is no check that the result actually fits in a `usize`, 38 | /// so this should only be used when `index` is already known to 39 | /// be small enough. 40 | #[inline] 41 | fn div_nbits(index: u64) -> usize { 42 | (index >> Self::lg_nbits()) as usize 43 | } 44 | 45 | /// Returns `index / Self::nbits()`, computed by shifting. 46 | /// 47 | /// This is intended for converting a bit address into a block 48 | /// address, which is why it takes `u64` and returns `usize`. 49 | #[inline] 50 | fn checked_div_nbits(index: u64) -> Option { 51 | (index >> Self::lg_nbits()).to_usize() 52 | } 53 | 54 | /// Returns `index / Self::nbits()` rounded up, computed by shifting. 55 | /// 56 | /// This is intended for converting a bit size into a block 57 | /// size, which is why it takes `u64` and returns `usize`. 58 | #[inline] 59 | fn ceil_div_nbits(index: u64) -> usize { 60 | Self::div_nbits(index + (Self::nbits() as u64 - 1)) 61 | } 62 | 63 | /// Returns `index / Self::nbits()` rounded up, computed by shifting. 64 | /// 65 | /// This is intended for converting a bit size into a block 66 | /// size, which is why it takes `u64` and returns `usize`. 67 | /// There is no check that the result actually fits in a `usize`, 68 | /// so this should only be used when `index` is already known to 69 | /// be small enough. 70 | #[inline] 71 | fn checked_ceil_div_nbits(index: u64) -> Option { 72 | Self::checked_div_nbits(index + (Self::nbits() as u64 - 1)) 73 | } 74 | 75 | /// Returns `index % Self::nbits()`, computed by masking. 76 | /// 77 | /// This is intended for converting a bit address into a bit offset 78 | /// within a block, which is why it takes `u64` and returns `usize`. 79 | #[inline] 80 | fn mod_nbits(index: u64) -> usize { 81 | let mask: u64 = Self::lg_nbits_mask(); 82 | (index & mask) as usize 83 | } 84 | 85 | /// Returns `index * Self::nbits()`, computed by shifting. 86 | /// 87 | /// This is intended for converting a block address into a bit address, 88 | /// which is why it takes a `usize` and returns a `u64`. 89 | fn mul_nbits(index: usize) -> u64 { 90 | (index as u64) << Self::lg_nbits() 91 | } 92 | 93 | /// Computes how many bits are in the last block of an array of 94 | /// `len` bits. 95 | /// 96 | /// This is like `Self::mod_nbits`, but it returns `Self::nbits()` in 97 | /// lieu of 0. Note that this means that if you have 0 bits then the 98 | /// last block is full. 99 | #[inline] 100 | fn last_block_bits(len: u64) -> usize { 101 | let masked = Self::mod_nbits(len); 102 | if masked == 0 { Self::nbits() } else { masked } 103 | } 104 | 105 | /// Log-base-2 of the number of bits in a block. 106 | #[inline] 107 | fn lg_nbits() -> usize { 108 | Self::nbits().floor_lg() 109 | } 110 | 111 | /// Mask with the lowest-order `lg_nbits()` set. 112 | #[inline] 113 | fn lg_nbits_mask() -> Result { 114 | Result::low_mask(Self::lg_nbits()) 115 | } 116 | 117 | /// The bit mask consisting of `Self::nbits() - element_bits` zeroes 118 | /// followed by `element_bits` ones. 119 | /// 120 | /// # Precondition 121 | /// 122 | /// `element_bits <= Self::nbits()` 123 | #[inline] 124 | fn low_mask(element_bits: usize) -> Self { 125 | debug_assert!(element_bits <= Self::nbits()); 126 | 127 | if element_bits == Self::nbits() { 128 | !Self::zero() 129 | } else { 130 | (Self::one() << element_bits) - Self::one() 131 | } 132 | } 133 | 134 | /// The bit mask with the `bit_index`th bit set. 135 | /// 136 | /// BitVec are index in little-endian style based at 0. 137 | /// 138 | /// # Precondition 139 | /// 140 | /// `bit_index < Self::nbits()` 141 | #[inline] 142 | fn nth_mask(bit_index: usize) -> Self { 143 | Self::one() << bit_index 144 | } 145 | 146 | // Methods for getting and setting bits. 147 | 148 | /// Extracts the value of the `bit_index`th bit. 149 | /// 150 | /// # Panics 151 | /// 152 | /// Panics if `bit_index` is out of bounds. 153 | #[inline] 154 | fn get_bit(self, bit_index: usize) -> bool { 155 | assert!(bit_index < Self::nbits(), "Block::get_bit: out of bounds"); 156 | self & Self::nth_mask(bit_index) != Self::zero() 157 | } 158 | 159 | /// Functionally updates the value of the `bit_index`th bit to `bit_value`. 160 | /// 161 | /// # Panics 162 | /// 163 | /// Panics if `bit_index` is out of bounds. 164 | #[inline] 165 | fn with_bit(self, bit_index: usize, bit_value: bool) -> Self { 166 | assert!(bit_index < Self::nbits(), "Block::with_bit: out of bounds"); 167 | if bit_value { 168 | self | Self::nth_mask(bit_index) 169 | } else { 170 | self & !Self::nth_mask(bit_index) 171 | } 172 | } 173 | 174 | /// Extracts `len` bits starting at bit offset `start`. 175 | /// 176 | /// # Panics 177 | /// 178 | /// Panics of the bit span is out of bounds. 179 | #[inline] 180 | fn get_bits(self, start: usize, len: usize) -> Self { 181 | assert!(start + len <= Self::nbits(), 182 | "Block::get_bits: out of bounds");; 183 | 184 | if len == 0 { return Self::zero(); } 185 | 186 | (self >> start) & Self::low_mask(len) 187 | } 188 | 189 | /// Functionally updates `len` bits to `value` starting at offset `start`. 190 | /// 191 | /// # Panics 192 | /// 193 | /// Panics of the bit span is out of bounds. 194 | #[inline] 195 | fn with_bits(self, start: usize, len: usize, value: Self) -> Self { 196 | assert!(start + len <= Self::nbits(), 197 | "Block::with_bits: out of bounds"); 198 | 199 | if len == 0 { return self; } 200 | 201 | let mask = Self::low_mask(len) << start; 202 | let shifted_value = value << start; 203 | 204 | (self & !mask) | (shifted_value & mask) 205 | } 206 | 207 | // Arithmetic methods that probably belong elsewhere. 208 | 209 | /// Returns the smallest number `n` such that `2.pow(n) >= self`. 210 | #[inline] 211 | fn ceil_lg(self) -> usize { 212 | if self <= Self::one() { return 0; } 213 | Self::nbits() - (self - Self::one()).leading_zeros() as usize 214 | } 215 | 216 | /// Returns the largest number `n` such that `2.pow(n) <= self`. 217 | #[inline] 218 | fn floor_lg(self) -> usize { 219 | if self <= Self::one() { return 0; } 220 | Self::nbits() - 1 - self.leading_zeros() as usize 221 | } 222 | 223 | /// Returns the smallest number `n` such that `n * divisor >= self`. 224 | #[inline] 225 | fn ceil_div(self, divisor: Self) -> Self { 226 | (self + divisor - Self::one()) / divisor 227 | } 228 | 229 | // I/O methods 230 | 231 | /// Reads a block with the specified endianness. 232 | fn read_block(source: &mut R) -> io::Result 233 | where R: io::Read, T: ByteOrder; 234 | 235 | /// Writes a block with the specified endianness. 236 | fn write_block(&self, sink: &mut W) -> io::Result<()> 237 | where W: io::Write, T: ByteOrder; 238 | } 239 | 240 | macro_rules! fn_low_mask { 241 | ( $ty:ident ) 242 | => 243 | { 244 | #[inline] 245 | fn low_mask(k: usize) -> $ty { 246 | debug_assert!(k <= Self::nbits()); 247 | 248 | // Compute the mask when element_bits is not the word size: 249 | let a = $ty::one().wrapping_shl(k as u32) - 1; 250 | 251 | // Special case for the word size: 252 | let b = (Self::div_nbits(k as u64) & 1) as $ty * !0; 253 | 254 | a | b 255 | } 256 | } 257 | } 258 | 259 | impl BlockType for u8 { 260 | fn read_block(source: &mut R) -> io::Result 261 | where R: io::Read, 262 | T: ByteOrder { 263 | source.read_u8() 264 | } 265 | 266 | fn write_block(&self, sink: &mut W) -> io::Result<()> 267 | where W: io::Write, 268 | T: ByteOrder { 269 | sink.write_u8(*self) 270 | } 271 | 272 | fn_low_mask!(u8); 273 | } 274 | 275 | macro_rules! impl_block_type { 276 | ($ty:ident, $read:ident, $write:ident) 277 | => 278 | { 279 | impl BlockType for $ty { 280 | fn read_block(source: &mut R) -> io::Result 281 | where R: io::Read, 282 | T: ByteOrder { 283 | source.$read::() 284 | } 285 | 286 | fn write_block(&self, sink: &mut W) -> io::Result<()> 287 | where W: io::Write, 288 | T: ByteOrder { 289 | sink.$write::(*self) 290 | } 291 | 292 | fn_low_mask!($ty); 293 | } 294 | } 295 | } 296 | 297 | impl_block_type!(u16, read_u16, write_u16); 298 | impl_block_type!(u32, read_u32, write_u32); 299 | impl_block_type!(u64, read_u64, write_u64); 300 | 301 | impl BlockType for usize { 302 | #[cfg(target_pointer_width = "64")] 303 | fn read_block(source: &mut R) -> io::Result 304 | where R: io::Read, 305 | T: ByteOrder { 306 | source.read_u64::().map(|x| x as usize) 307 | } 308 | 309 | #[cfg(target_pointer_width = "32")] 310 | fn read_block(source: &mut R) -> io::Result 311 | where R: io::Read, 312 | T: ByteOrder { 313 | source.read_u32::().map(|x| x as usize) 314 | } 315 | 316 | #[cfg(target_pointer_width = "64")] 317 | fn write_block(&self, sink: &mut W) -> io::Result<()> 318 | where W: io::Write, 319 | T: ByteOrder { 320 | sink.write_u64::(*self as u64) 321 | } 322 | 323 | #[cfg(target_pointer_width = "32")] 324 | fn write_block(&self, sink: &mut W) -> io::Result<()> 325 | where W: io::Write, 326 | T: ByteOrder { 327 | sink.write_u32::(*self as u32) 328 | } 329 | 330 | fn_low_mask!(usize); 331 | 332 | } 333 | 334 | /// Represents the address of a bit, broken into a block component 335 | /// and a bit offset component. 336 | #[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] 337 | pub struct Address { 338 | /// The index of the block containing the bit in question. 339 | pub block_index: usize, 340 | /// The position of the bit in question within its block. 341 | pub bit_offset: usize, 342 | } 343 | 344 | impl Address { 345 | /// Creates an `Address` for the given bit index for storage in 346 | /// block type `Block`. 347 | /// 348 | /// # Panics 349 | /// 350 | /// Panics if `bit_index` divided by the block size doesn’t fit in a 351 | /// `usize`. 352 | #[inline] 353 | pub fn new(bit_index: u64) -> Self { 354 | Address { 355 | block_index: Block::checked_div_nbits(bit_index) 356 | .expect("Address::new: index overflow"), 357 | bit_offset: Block::mod_nbits(bit_index), 358 | } 359 | } 360 | 361 | /// Converts an `Address` back into a raw bit index. 362 | /// 363 | /// This method and `new` should be inverses. 364 | #[inline] 365 | pub fn bit_index(&self) -> u64 { 366 | Block::mul_nbits(self.block_index) + self.bit_offset as u64 367 | } 368 | } 369 | 370 | #[cfg(test)] 371 | mod test { 372 | use super::*; 373 | use quickcheck::{quickcheck, TestResult}; 374 | 375 | #[test] 376 | fn low_mask() { 377 | assert_eq!(0b00011111, u8::low_mask(5)); 378 | assert_eq!(0b0011111111111111, u16::low_mask(14)); 379 | assert_eq!(0b1111111111111111, u16::low_mask(16)); 380 | } 381 | 382 | #[test] 383 | fn nth_mask() { 384 | assert_eq!(0b10000000, u8::nth_mask(7)); 385 | assert_eq!(0b01000000, u8::nth_mask(6)); 386 | assert_eq!(0b00100000, u8::nth_mask(5)); 387 | assert_eq!(0b00000010, u8::nth_mask(1)); 388 | assert_eq!(0b00000001, u8::nth_mask(0)); 389 | } 390 | 391 | #[test] 392 | fn get_bits() { 393 | assert_eq!(0b0, 394 | 0b0100110001110000u16.get_bits(0, 0)); 395 | assert_eq!(0b010, 396 | 0b0100110001110000u16.get_bits(13, 3)); 397 | assert_eq!( 0b110001, 398 | 0b0100110001110000u16.get_bits(6, 6)); 399 | assert_eq!( 0b10000, 400 | 0b0100110001110000u16.get_bits(0, 5)); 401 | assert_eq!(0b0100110001110000, 402 | 0b0100110001110000u16.get_bits(0, 16)); 403 | } 404 | 405 | #[test] 406 | fn with_bits() { 407 | assert_eq!(0b0111111111000001, 408 | 0b0110001111000001u16.with_bits(10, 3, 0b111)); 409 | assert_eq!(0b0101110111000001, 410 | 0b0110001111000001u16.with_bits(9, 5, 0b01110)); 411 | assert_eq!(0b0110001111000001, 412 | 0b0110001111000001u16.with_bits(14, 0, 0b01110)); 413 | assert_eq!(0b0110001110101010, 414 | 0b0110001111000001u16.with_bits(0, 8, 0b10101010)); 415 | assert_eq!(0b0000000000000010, 416 | 0b0110001111000001u16.with_bits(0, 16, 0b10)); 417 | } 418 | 419 | #[test] 420 | fn get_bit() { 421 | assert!(! 0b00000000u8.get_bit(0)); 422 | assert!(! 0b00000000u8.get_bit(1)); 423 | assert!(! 0b00000000u8.get_bit(2)); 424 | assert!(! 0b00000000u8.get_bit(3)); 425 | assert!(! 0b00000000u8.get_bit(7)); 426 | assert!(! 0b10101010u8.get_bit(0)); 427 | assert!( 0b10101010u8.get_bit(1)); 428 | assert!(! 0b10101010u8.get_bit(2)); 429 | assert!( 0b10101010u8.get_bit(3)); 430 | assert!( 0b10101010u8.get_bit(7)); 431 | } 432 | 433 | #[test] 434 | fn with_bit() { 435 | assert_eq!(0b00100000, 0b00000000u8.with_bit(5, true)); 436 | assert_eq!(0b00000000, 0b00000000u8.with_bit(5, false)); 437 | assert_eq!(0b10101010, 0b10101010u8.with_bit(7, true)); 438 | assert_eq!(0b00101010, 0b10101010u8.with_bit(7, false)); 439 | assert_eq!(0b10101011, 0b10101010u8.with_bit(0, true)); 440 | assert_eq!(0b10101010, 0b10101010u8.with_bit(0, false)); 441 | } 442 | 443 | #[test] 444 | fn floor_lg() { 445 | assert_eq!(0, 1u32.floor_lg()); 446 | assert_eq!(1, 2u32.floor_lg()); 447 | assert_eq!(1, 3u32.floor_lg()); 448 | assert_eq!(2, 4u32.floor_lg()); 449 | assert_eq!(2, 5u32.floor_lg()); 450 | assert_eq!(2, 7u32.floor_lg()); 451 | assert_eq!(3, 8u32.floor_lg()); 452 | 453 | fn prop(n: u64) -> TestResult { 454 | if n == 0 { return TestResult::discard(); } 455 | 456 | TestResult::from_bool( 457 | 2u64.pow(n.floor_lg() as u32) <= n 458 | && 2u64.pow(n.floor_lg() as u32 + 1) > n) 459 | } 460 | 461 | quickcheck(prop as fn(u64) -> TestResult); 462 | } 463 | 464 | #[test] 465 | fn ceil_lg() { 466 | assert_eq!(0, 1u32.ceil_lg()); 467 | assert_eq!(1, 2u32.ceil_lg()); 468 | assert_eq!(2, 3u32.ceil_lg()); 469 | assert_eq!(2, 4u32.ceil_lg()); 470 | assert_eq!(3, 5u32.ceil_lg()); 471 | assert_eq!(3, 7u32.ceil_lg()); 472 | assert_eq!(3, 8u32.ceil_lg()); 473 | assert_eq!(4, 9u32.ceil_lg()); 474 | 475 | fn prop(n: u64) -> TestResult { 476 | if n <= 1 { return TestResult::discard(); } 477 | 478 | TestResult::from_bool( 479 | 2u64.pow(n.ceil_lg() as u32) >= n 480 | && 2u64.pow(n.ceil_lg() as u32 - 1) < n) 481 | } 482 | 483 | quickcheck(prop as fn(u64) -> TestResult); 484 | } 485 | 486 | #[test] 487 | fn ceil_div() { 488 | assert_eq!(6, 12u32.ceil_div(2)); 489 | assert_eq!(4, 12u32.ceil_div(3)); 490 | assert_eq!(3, 12u32.ceil_div(4)); 491 | assert_eq!(3, 12u32.ceil_div(5)); 492 | assert_eq!(2, 12u32.ceil_div(6)); 493 | assert_eq!(2, 12u32.ceil_div(7)); 494 | assert_eq!(2, 12u32.ceil_div(11)); 495 | assert_eq!(1, 12u32.ceil_div(12)); 496 | 497 | fn prop(n: u64, m: u64) -> TestResult { 498 | if n * m == 0 { return TestResult::discard(); } 499 | 500 | TestResult::from_bool( 501 | m * n.ceil_div(m) >= n 502 | && m * (n.ceil_div(m) - 1) < n) 503 | } 504 | 505 | quickcheck(prop as fn(u64, u64) -> TestResult); 506 | } 507 | } 508 | 509 | -------------------------------------------------------------------------------- /src/stream/bit_buffer.rs: -------------------------------------------------------------------------------- 1 | use std::io::{Error, ErrorKind, Result}; 2 | 3 | use storage::{BlockType}; 4 | use stream::{BitRead, BitWrite}; 5 | use bit_vec::*; 6 | 7 | /// A bit buffer can be used to read bits from or write bits to an 8 | /// underlying bit vector. 9 | #[derive(Clone, Debug)] 10 | pub struct BitBuffer { 11 | data: Inner, 12 | pos: u64, 13 | } 14 | 15 | impl BitBuffer> { 16 | /// Creates a new, empty bit buffer. 17 | #[inline] 18 | pub fn new() -> Self { 19 | Self::with_capacity(0) 20 | } 21 | 22 | /// Creates a new, empty bit buffer with the given capacity (in 23 | /// bits) preallocated. 24 | pub fn with_capacity(capacity: u64) -> Self { 25 | BitBuffer { 26 | data: BitVector::with_capacity(capacity), 27 | pos: 0, 28 | } 29 | } 30 | } 31 | 32 | impl BitBuffer { 33 | /// Creates a new bit buffer for reading from a bit vector. 34 | pub fn from(input: Inner) -> Self { 35 | BitBuffer { 36 | data: input, 37 | pos: 0, 38 | } 39 | } 40 | 41 | /// Creates a new bit buffer for appending to a bit vector. 42 | pub fn append(vec: Inner) -> Self { 43 | let len = vec.bit_len(); 44 | BitBuffer { 45 | data: vec, 46 | pos: len, 47 | } 48 | } 49 | 50 | /// Moves the position for the next read or write. 51 | pub fn seek(&mut self, position: u64) -> Result<()> { 52 | if position <= self.data.bit_len() { 53 | self.pos = position; 54 | Ok(()) 55 | } else { 56 | Err(Error::new(ErrorKind::NotFound, 57 | "position out of bounds")) 58 | } 59 | } 60 | } 61 | 62 | impl BitBuffer { 63 | /// Returns the bit vector underlying the bit buffer. 64 | #[inline] 65 | pub fn into_inner(self) -> Inner { 66 | self.data 67 | } 68 | 69 | /// Gives access to the bit vector underlying the bit buffer. 70 | #[inline] 71 | pub fn inner(&self) -> &Inner { 72 | &self.data 73 | } 74 | 75 | /// The position in the bit buffer where the next read or write will 76 | /// occur. 77 | #[inline] 78 | pub fn position(&self) -> u64 { 79 | self.pos 80 | } 81 | } 82 | 83 | impl BitVec for BitBuffer { 84 | type Block = Inner::Block; 85 | 86 | #[inline] 87 | fn block_len(&self) -> usize { 88 | self.data.block_len() 89 | } 90 | 91 | #[inline] 92 | fn bit_len(&self) -> u64 { 93 | self.data.bit_len() 94 | } 95 | 96 | #[inline] 97 | fn get_block(&self, position: usize) -> Self::Block { 98 | self.data.get_block(position) 99 | } 100 | } 101 | 102 | impl BitVecMut for BitBuffer { 103 | #[inline] 104 | fn set_block(&mut self, position: usize, value: Self::Block) { 105 | self.data.set_block(position, value); 106 | } 107 | } 108 | 109 | impl BitRead for BitBuffer { 110 | fn read_bit(&mut self) -> Result> { 111 | if self.pos < self.bit_len() { 112 | let result = self.get_bit(self.pos); 113 | self.pos += 1; 114 | Ok(Some(result)) 115 | } else { 116 | Ok(None) 117 | } 118 | } 119 | } 120 | 121 | impl BitWrite for BitBuffer { 122 | fn write_bit(&mut self, value: bool) -> Result<()> { 123 | while self.pos >= self.bit_len() { 124 | self.data.push_bit(false); 125 | } 126 | 127 | let pos = self.pos; 128 | self.set_bit(pos, value); 129 | self.pos = pos + 1; 130 | 131 | Ok(()) 132 | } 133 | } 134 | 135 | #[cfg(test)] 136 | mod test { 137 | use super::*; 138 | use stream::{BitRead, BitWrite}; 139 | 140 | #[test] 141 | fn reader() { 142 | let mut vec = BitVector::::new(); 143 | vec.push_bit(false); 144 | vec.push_bit(true); 145 | vec.push_bit(false); 146 | vec.push_bit(false); 147 | vec.push_bit(true); 148 | 149 | let mut reader = BitBuffer::from(vec); 150 | 151 | assert_eq!(Some(false), reader.read_bit().unwrap()); 152 | assert_eq!(Some(true), reader.read_bit().unwrap()); 153 | assert_eq!(Some(false), reader.read_bit().unwrap()); 154 | assert_eq!(Some(false), reader.read_bit().unwrap()); 155 | assert_eq!(Some(true), reader.read_bit().unwrap()); 156 | assert_eq!(None, reader.read_bit().unwrap()); 157 | } 158 | 159 | #[test] 160 | fn writer() { 161 | let mut writer: BitBuffer = BitBuffer::new(); 162 | 163 | writer.write_bit(true).unwrap(); 164 | writer.write_bit(false).unwrap(); 165 | writer.write_bit(false).unwrap(); 166 | writer.write_bit(true).unwrap(); 167 | writer.write_bit(true).unwrap(); 168 | 169 | let mut vec = writer.into_inner(); 170 | 171 | assert_eq!(Some(true), vec.pop_bit()); 172 | assert_eq!(Some(true), vec.pop_bit()); 173 | assert_eq!(Some(false), vec.pop_bit()); 174 | assert_eq!(Some(false), vec.pop_bit()); 175 | assert_eq!(Some(true), vec.pop_bit()); 176 | assert_eq!(None, vec.pop_bit()); 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /src/stream/mod.rs: -------------------------------------------------------------------------------- 1 | //! Bit-oriented streams for coding. 2 | 3 | mod traits; 4 | pub use self::traits::*; 5 | 6 | mod bit_buffer; 7 | pub use self::bit_buffer::*; 8 | -------------------------------------------------------------------------------- /src/stream/traits.rs: -------------------------------------------------------------------------------- 1 | use std::io::Result; 2 | 3 | use internal::errors::*; 4 | 5 | use num_traits::PrimInt; 6 | 7 | /// Allows reading bits from a source. 8 | pub trait BitRead { 9 | /// Reads a single bit from the source. 10 | /// 11 | /// End-of-file is indicated by `Ok(None)`. 12 | fn read_bit(&mut self) -> Result>; 13 | 14 | /// Reads `nbits` bits as an integer, least-significant bit first. 15 | fn read_int(&mut self, nbits: usize) -> Result> { 16 | let mut result = N::zero(); 17 | let mut mask = N::one(); 18 | let mut consumed = false; 19 | 20 | for _ in 0 .. nbits { 21 | if let Some(bit) = try!(self.read_bit()) { 22 | consumed = true; 23 | if bit { 24 | result = result | mask; 25 | } 26 | mask = mask << 1; 27 | } else { 28 | if consumed { 29 | return out_of_bits("BitRead::read_int"); 30 | } else { 31 | return Ok(None); 32 | } 33 | } 34 | } 35 | 36 | Ok(Some(result)) 37 | } 38 | 39 | /// Reads `nbits` bits as an integer, most-significant bit first. 40 | fn read_int_be(&mut self, nbits: usize) -> Result> { 41 | let mut result = N::zero(); 42 | let mut consumed = false; 43 | 44 | for _ in 0 .. nbits { 45 | if let Some(bit) = try!(self.read_bit()) { 46 | consumed = true; 47 | result = result << 1; 48 | if bit { 49 | result = result | N::one() 50 | } 51 | } else { 52 | if consumed { 53 | return out_of_bits("BitRead::read_int"); 54 | } else { 55 | return Ok(None) 56 | } 57 | } 58 | } 59 | 60 | Ok(Some(result)) 61 | } 62 | } 63 | 64 | /// Allows writing bits to a sink. 65 | pub trait BitWrite { 66 | /// Writes a single bit to the sink. 67 | fn write_bit(&mut self, value: bool) -> Result<()>; 68 | 69 | /// Writes the lower `nbits` of `value`, least-significant first. 70 | fn write_int(&mut self, nbits: usize, mut value: N) -> Result<()> { 71 | for _ in 0 .. nbits { 72 | try!(self.write_bit(value & N::one() != N::zero())); 73 | value = value >> 1; 74 | } 75 | 76 | Ok(()) 77 | } 78 | 79 | /// Writes the lower `nbits` of `value`, most-significant first. 80 | fn write_int_be(&mut self, nbits: usize, value: N) 81 | -> Result<()> { 82 | let mut mask = N::one() << nbits - 1; 83 | 84 | for _ in 0 .. nbits { 85 | try!(self.write_bit(value & mask != N::zero())); 86 | mask = mask >> 1; 87 | } 88 | 89 | Ok(()) 90 | } 91 | } 92 | 93 | // These instances aren't particularly efficient, but they might be good 94 | // for testing. 95 | 96 | use std::collections::VecDeque; 97 | 98 | impl BitRead for VecDeque { 99 | fn read_bit(&mut self) -> Result> { 100 | Ok(self.pop_front()) 101 | } 102 | } 103 | 104 | impl BitWrite for VecDeque { 105 | fn write_bit(&mut self, value: bool) -> Result<()> { 106 | self.push_back(value); 107 | Ok(()) 108 | } 109 | } 110 | 111 | #[cfg(test)] 112 | mod test { 113 | use super::*; 114 | use std::collections::VecDeque; 115 | 116 | #[test] 117 | fn read_bit() { 118 | let mut vd = VecDeque::new(); 119 | vd.push_back(true); 120 | vd.push_back(true); 121 | vd.push_back(false); 122 | 123 | assert_eq!(Some(true), vd.read_bit().unwrap()); 124 | assert_eq!(Some(true), vd.read_bit().unwrap()); 125 | assert_eq!(Some(false), vd.read_bit().unwrap()); 126 | assert_eq!(None, vd.read_bit().unwrap()); 127 | } 128 | 129 | #[test] 130 | fn write_bit() { 131 | let mut vd = VecDeque::new(); 132 | 133 | vd.write_bit(false).unwrap(); 134 | vd.write_bit(true).unwrap(); 135 | vd.write_bit(true).unwrap(); 136 | 137 | assert_eq!(Some(false), vd.pop_front()); 138 | assert_eq!(Some(true), vd.pop_front()); 139 | assert_eq!(Some(true), vd.pop_front()); 140 | assert_eq!(None, vd.pop_front()); 141 | } 142 | 143 | #[test] 144 | fn read_int() { 145 | let mut vd = VecDeque::new(); 146 | 147 | vd.write_bit(false).unwrap(); 148 | assert_eq!(Some(Some(0)), vd.read_int(1).ok()); 149 | 150 | vd.write_bit(true).unwrap(); 151 | assert_eq!(Some(Some(1)), vd.read_int(1).ok()); 152 | 153 | vd.write_bit(true).unwrap(); 154 | vd.write_bit(false).unwrap(); 155 | assert_eq!(Some(Some(1)), vd.read_int(2).ok()); 156 | 157 | vd.write_bit(false).unwrap(); 158 | vd.write_bit(true).unwrap(); 159 | assert_eq!(Some(Some(2)), vd.read_int(2).ok()); 160 | 161 | vd.write_bit(true).unwrap(); 162 | vd.write_bit(true).unwrap(); 163 | assert_eq!(Some(Some(3)), vd.read_int(2).ok()); 164 | 165 | vd.write_bit(true).unwrap(); 166 | vd.write_bit(true).unwrap(); 167 | vd.write_bit(false).unwrap(); 168 | vd.write_bit(false).unwrap(); 169 | assert_eq!(Some(Some(3)), vd.read_int(4).ok()); 170 | } 171 | 172 | #[test] 173 | fn read_int_be() { 174 | let mut vd = VecDeque::new(); 175 | 176 | vd.write_bit(false).unwrap(); 177 | assert_eq!(Some(Some(0)), vd.read_int_be(1).ok()); 178 | 179 | vd.write_bit(true).unwrap(); 180 | assert_eq!(Some(Some(1)), vd.read_int_be(1).ok()); 181 | 182 | vd.write_bit(true).unwrap(); 183 | vd.write_bit(false).unwrap(); 184 | assert_eq!(Some(Some(2)), vd.read_int_be(2).ok()); 185 | 186 | vd.write_bit(false).unwrap(); 187 | vd.write_bit(true).unwrap(); 188 | assert_eq!(Some(Some(1)), vd.read_int_be(2).ok()); 189 | 190 | vd.write_bit(true).unwrap(); 191 | vd.write_bit(true).unwrap(); 192 | assert_eq!(Some(Some(3)), vd.read_int_be(2).ok()); 193 | 194 | vd.write_bit(true).unwrap(); 195 | vd.write_bit(true).unwrap(); 196 | vd.write_bit(false).unwrap(); 197 | vd.write_bit(false).unwrap(); 198 | assert_eq!(Some(Some(12)), vd.read_int_be(4).ok()); 199 | } 200 | 201 | #[test] 202 | fn write_int() { 203 | let mut vd = VecDeque::new(); 204 | 205 | vd.write_int(5, 6).unwrap(); 206 | vd.write_int(5, 7).unwrap(); 207 | vd.write_int(5, 2).unwrap(); 208 | vd.write_int(4, 3).unwrap(); 209 | vd.write_int(4, 1).unwrap(); 210 | vd.write_int(4, 0).unwrap(); 211 | vd.write_int(4, 6).unwrap(); 212 | 213 | assert_eq!(Some(Some(6)), vd.read_int(5).ok()); 214 | assert_eq!(Some(Some(7)), vd.read_int(5).ok()); 215 | assert_eq!(Some(Some(2)), vd.read_int(5).ok()); 216 | assert_eq!(Some(Some(3)), vd.read_int(4).ok()); 217 | assert_eq!(Some(Some(1)), vd.read_int(4).ok()); 218 | assert_eq!(Some(Some(0)), vd.read_int(4).ok()); 219 | assert_eq!(Some(Some(6)), vd.read_int(4).ok()); 220 | } 221 | 222 | #[test] 223 | fn write_int_be() { 224 | let mut vd = VecDeque::new(); 225 | 226 | vd.write_int_be(5, 6).unwrap(); 227 | vd.write_int_be(5, 7).unwrap(); 228 | vd.write_int_be(5, 2).unwrap(); 229 | vd.write_int_be(4, 3).unwrap(); 230 | vd.write_int_be(4, 1).unwrap(); 231 | vd.write_int_be(4, 0).unwrap(); 232 | vd.write_int_be(4, 6).unwrap(); 233 | 234 | assert_eq!(Some(Some(6)), vd.read_int_be(5).ok()); 235 | assert_eq!(Some(Some(7)), vd.read_int_be(5).ok()); 236 | assert_eq!(Some(Some(2)), vd.read_int_be(5).ok()); 237 | assert_eq!(Some(Some(3)), vd.read_int_be(4).ok()); 238 | assert_eq!(Some(Some(1)), vd.read_int_be(4).ok()); 239 | assert_eq!(Some(Some(0)), vd.read_int_be(4).ok()); 240 | assert_eq!(Some(Some(6)), vd.read_int_be(4).ok()); 241 | } 242 | } 243 | --------------------------------------------------------------------------------