├── src ├── tests │ ├── mod.rs │ ├── macros.rs │ ├── iter.rs │ └── byte_set.rs ├── byte_set │ ├── raw.rs │ ├── mod.rs │ ├── traits.rs │ ├── ascii.rs │ └── main_impl.rs ├── chunk.rs ├── iter.rs ├── macros.rs └── lib.rs ├── .github ├── FUNDING.yml └── workflows │ └── ci.yml ├── benches ├── benchmarks │ ├── mod.rs │ ├── drop.rs │ ├── len.rs │ ├── clear.rs │ ├── iter.rs │ ├── max.rs │ ├── extend_slice.rs │ ├── min.rs │ ├── contains_cached.rs │ ├── contains_random.rs │ ├── insert.rs │ └── remove_single.rs ├── main.rs └── util │ ├── hash.rs │ ├── mod.rs │ ├── rand.rs │ └── bool256.rs ├── rustfmt.toml ├── LICENSE-MIT ├── Cargo.toml ├── .gitignore ├── CHANGELOG.md ├── LICENSE-APACHE ├── README.md └── Cargo.lock /src/tests/mod.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | mod macros; 3 | 4 | mod byte_set; 5 | mod iter; 6 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: ['nvzqz'] 4 | patreon: nvzqz 5 | custom: ['https://www.paypal.me/nvzqz'] 6 | -------------------------------------------------------------------------------- /benches/benchmarks/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod clear; 2 | pub mod contains_cached; 3 | pub mod contains_random; 4 | pub mod drop; 5 | pub mod extend_slice; 6 | pub mod insert; 7 | pub mod iter; 8 | pub mod len; 9 | pub mod max; 10 | pub mod min; 11 | pub mod remove_single; 12 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 80 2 | hard_tabs = false 3 | tab_spaces = 4 4 | newline_style = "Unix" 5 | use_small_heuristics = "Default" 6 | reorder_imports = true 7 | reorder_modules = true 8 | remove_nested_parens = true 9 | fn_args_layout = "Tall" 10 | edition = "2018" 11 | merge_derives = true 12 | use_try_shorthand = false 13 | use_field_init_shorthand = true 14 | force_explicit_abi = true 15 | print_misformatted_file_names = true 16 | -------------------------------------------------------------------------------- /benches/main.rs: -------------------------------------------------------------------------------- 1 | mod benchmarks; 2 | mod util; 3 | 4 | fn main() { 5 | let mut criterion = criterion::Criterion::default().configure_from_args(); 6 | 7 | macro_rules! benchmarks { 8 | ($($module:ident,)+) => { 9 | $(benchmarks::$module::benches(&mut criterion);)+ 10 | }; 11 | } 12 | 13 | benchmarks! { 14 | clear, 15 | contains_cached, 16 | contains_random, 17 | drop, 18 | extend_slice, 19 | insert, 20 | iter, 21 | len, 22 | max, 23 | min, 24 | remove_single, 25 | } 26 | 27 | criterion.final_summary(); 28 | } 29 | -------------------------------------------------------------------------------- /benches/util/hash.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | collections::HashSet, 3 | hash::{BuildHasher, Hasher}, 4 | }; 5 | 6 | pub use hashbrown::HashSet as HashbrownSet; 7 | 8 | pub type IdentityHashSet = HashSet; 9 | pub type IdentityHashbrownSet = HashbrownSet; 10 | 11 | /// A hasher that passes the input byte directly. 12 | #[derive(Clone, Copy, Default)] 13 | pub struct IdentityHasher { 14 | byte: u8, 15 | } 16 | 17 | impl Hasher for IdentityHasher { 18 | fn finish(&self) -> u64 { 19 | self.byte as u64 20 | } 21 | 22 | fn write(&mut self, _: &[u8]) { 23 | panic!("Must use `write_u8` instead"); 24 | } 25 | 26 | fn write_u8(&mut self, i: u8) { 27 | self.byte = i; 28 | } 29 | } 30 | 31 | #[derive(Clone, Copy, Default)] 32 | pub struct IdentityHasherBuilder; 33 | 34 | impl BuildHasher for IdentityHasherBuilder { 35 | type Hasher = IdentityHasher; 36 | 37 | fn build_hasher(&self) -> Self::Hasher { 38 | Default::default() 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/tests/macros.rs: -------------------------------------------------------------------------------- 1 | // These assertions are macros in order to keep file/line/column info. 2 | 3 | macro_rules! assert_not_contains { 4 | ($set:expr, $byte:expr) => { 5 | assert!( 6 | !$set.contains($byte), 7 | "{} contains {}", 8 | $set.fmt_binary(), 9 | $byte 10 | ); 11 | }; 12 | } 13 | 14 | macro_rules! assert_contains { 15 | ($set:expr, $byte:expr) => { 16 | assert!( 17 | $set.contains($byte), 18 | "{} does not contain {}", 19 | $set.fmt_binary(), 20 | $byte 21 | ); 22 | }; 23 | } 24 | 25 | macro_rules! assert_len { 26 | ($set:expr, $len:expr) => { 27 | assert_eq!( 28 | $set.len(), 29 | $len, 30 | "{} does not have {} bytes", 31 | $set.fmt_binary(), 32 | $len 33 | ); 34 | }; 35 | } 36 | 37 | macro_rules! assert_empty { 38 | ($set:expr) => { 39 | assert!($set.is_empty(), "{} is not empty", $set.fmt_binary()); 40 | }; 41 | } 42 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Nikolai Vazquez 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "byte_set" 3 | version = "0.1.3" 4 | edition = "2018" 5 | authors = ["Nikolai Vazquez"] 6 | license = "MIT OR Apache-2.0" 7 | readme = "README.md" 8 | description = "Efficient sets of bytes." 9 | repository = "https://github.com/nvzqz/byte-set-rs" 10 | homepage = "https://github.com/nvzqz/byte-set-rs" 11 | documentation = "https://docs.rs/byte_set" 12 | keywords = ["byte", "set", "search", "string", "fast"] 13 | categories = ["no-std", "data-structures", "rust-patterns", "embedded"] 14 | include = ["Cargo.toml", "src", "benches", "README*", "CHANGELOG*", "LICENSE*"] 15 | 16 | [package.metadata.docs.rs] 17 | all-features = true 18 | rustdoc-args = ["--cfg", "docsrs"] 19 | 20 | [dependencies] 21 | rand = { version = "0.7", default-features = false, optional = true } 22 | rand_core = { version = "0.5", default-features = false, optional = true } 23 | serde = { version = "1.0", default-features = false, optional = true } 24 | 25 | [dev-dependencies] 26 | criterion = "0.3.2" 27 | fixedbitset = "0.3.0" 28 | hashbrown = "0.7.2" 29 | rand = "0.7" 30 | 31 | [features] 32 | std = ["alloc"] 33 | alloc = [] 34 | 35 | [[bench]] 36 | name = "benches" 37 | path = "benches/main.rs" 38 | harness = false 39 | -------------------------------------------------------------------------------- /benches/util/mod.rs: -------------------------------------------------------------------------------- 1 | use std::convert::TryFrom; 2 | 3 | pub mod bool256; 4 | pub mod hash; 5 | pub mod rand; 6 | 7 | pub use self::{bool256::Bool256, rand::Rand}; 8 | 9 | /// Input sizes for benchmarks. 10 | pub const SIZES: &[usize] = &[0, 8, 16, 32, 64, 128, 192, 256]; 11 | 12 | /// A type that can be casted to `T`. 13 | pub trait Cast { 14 | /// Casts `self` to `T`, saturating at the max value. 15 | fn saturating_cast(self) -> T; 16 | } 17 | 18 | impl Cast for usize { 19 | fn saturating_cast(self) -> u8 { 20 | u8::try_from(self).unwrap_or(u8::max_value()) 21 | } 22 | } 23 | 24 | /// Casts `value` to `T`, saturating at the max value. 25 | pub fn saturating_cast, U>(value: T) -> U { 26 | value.saturating_cast() 27 | } 28 | 29 | /// Finds and removes `item` from `vec`. 30 | /// 31 | /// This is a stable implementation of the nightly-only `Vec::remove_item`. 32 | pub fn vec_remove_item(vec: &mut Vec, item: &T) -> Option 33 | where 34 | T: PartialEq, 35 | { 36 | let pos = vec.iter().position(|x| *x == *item)?; 37 | Some(vec.remove(pos)) 38 | } 39 | 40 | /// Finds and removes `item` from `vec`, using binary search. 41 | pub fn vec_remove_item_binary_search(vec: &mut Vec, item: &T) -> Option 42 | where 43 | T: Ord, 44 | { 45 | let pos = vec.binary_search(item).ok()?; 46 | Some(vec.remove(pos)) 47 | } 48 | -------------------------------------------------------------------------------- /src/byte_set/raw.rs: -------------------------------------------------------------------------------- 1 | use super::ByteSet; 2 | use core::{mem, slice}; 3 | 4 | /// Operations over the internal memory representation. 5 | /// 6 | /// There are currently no stability guarantees over the internal bytes. This is 7 | /// being tracked in [#8](https://github.com/nvzqz/byte-set-rs/issues/8). 8 | impl ByteSet { 9 | const SIZE: usize = mem::size_of::(); 10 | 11 | /// Returns the underlying bytes of `self`. 12 | #[inline] 13 | pub fn into_raw_bytes(self) -> [u8; Self::SIZE] { 14 | unsafe { mem::transmute(self) } 15 | } 16 | 17 | /// Returns a shared reference to the underlying bytes of `self`. 18 | #[inline] 19 | pub fn as_raw_bytes(&self) -> &[u8; Self::SIZE] { 20 | unsafe { &*self.0.as_ptr().cast() } 21 | } 22 | 23 | /// Returns a mutable reference to the underlying bytes of `self`. 24 | #[inline] 25 | pub fn as_raw_bytes_mut(&mut self) -> &mut [u8; Self::SIZE] { 26 | unsafe { &mut *self.0.as_mut_ptr().cast() } 27 | } 28 | 29 | /// Returns a shared reference to the underlying bytes of `slice`. 30 | #[inline] 31 | pub fn slice_as_raw_bytes(slice: &[Self]) -> &[u8] { 32 | let ptr = slice.as_ptr().cast::(); 33 | let len = slice.len() * Self::SIZE; 34 | unsafe { slice::from_raw_parts(ptr, len) } 35 | } 36 | 37 | /// Returns a mutable reference to the underlying bytes of `slice`. 38 | #[inline] 39 | pub fn slice_as_raw_bytes_mut(slice: &mut [Self]) -> &mut [u8] { 40 | let ptr = slice.as_mut_ptr().cast::(); 41 | let len = slice.len() * Self::SIZE; 42 | unsafe { slice::from_raw_parts_mut(ptr, len) } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/chunk.rs: -------------------------------------------------------------------------------- 1 | //! Operations over chunks (`usize`). 2 | 3 | use core::mem; 4 | 5 | // TODO(#3): Use 64-bit chunk on 32-bit targets with 64-bit instructions. 6 | 7 | // Not using `usize` in order to work on platforms with other pointer sizes. 8 | #[cfg(target_pointer_width = "64")] 9 | pub type Chunk = u64; 10 | #[cfg(not(target_pointer_width = "64"))] 11 | pub type Chunk = u32; 12 | 13 | const SLOT_NUM_BITS: usize = mem::size_of::() * 8; 14 | 15 | /// Multiplied to get the byte offset for a given chunk index. 16 | pub const INDEX_OFFSET: usize = SLOT_NUM_BITS; 17 | 18 | /// Returns the first (least significant) bit of `chunk`, or `None` if `chunk` 19 | /// is 0. 20 | #[inline] 21 | pub fn lsb(chunk: Chunk) -> Option { 22 | if chunk == 0 { 23 | None 24 | } else { 25 | Some(chunk.trailing_zeros() as u8) 26 | } 27 | } 28 | 29 | /// Returns the last (most significant) bit of `chunk`, or `None` if `chunk` is 30 | /// 0. 31 | #[inline] 32 | pub fn msb(chunk: Chunk) -> Option { 33 | if chunk == 0 { 34 | None 35 | } else { 36 | let bits = SLOT_NUM_BITS - 1; 37 | Some((bits as u8) ^ chunk.leading_zeros() as u8) 38 | } 39 | } 40 | 41 | /// Removes the first (least significant) bit from `chunk` and returns it, or 42 | /// `None` if `chunk` is 0. 43 | #[inline] 44 | pub fn pop_lsb(chunk: &mut Chunk) -> Option { 45 | let lsb = lsb(*chunk)?; 46 | *chunk ^= 1 << lsb; 47 | Some(lsb) 48 | } 49 | 50 | /// Removes the last (most significant) bit from `chunk` and returns it, or 51 | /// `None` if `chunk` is 0. 52 | #[inline] 53 | pub fn pop_msb(chunk: &mut Chunk) -> Option { 54 | let msb = msb(*chunk)?; 55 | *chunk ^= 1 << msb; 56 | Some(msb) 57 | } 58 | -------------------------------------------------------------------------------- /src/tests/iter.rs: -------------------------------------------------------------------------------- 1 | use crate::ByteSet; 2 | 3 | #[test] 4 | fn collect_full() { 5 | let set = ByteSet::full(); 6 | 7 | let bytes: Vec = set.into_iter().collect(); 8 | assert_eq!(bytes.len(), 256); 9 | 10 | for b in 0..=u8::max_value() { 11 | let i = b as usize; 12 | 13 | assert_eq!(bytes.get(i), Some(&b), "{:?} at {} is not {}", bytes, i, b); 14 | } 15 | } 16 | 17 | #[test] 18 | fn collect_full_rev() { 19 | let set = ByteSet::full(); 20 | 21 | let bytes: Vec = set.into_iter().rev().collect(); 22 | assert_eq!(bytes.len(), 256); 23 | 24 | for b in 0..=u8::max_value() { 25 | // Iterating in reverse, so flip the index. 26 | let i = 255 - b as usize; 27 | 28 | assert_eq!(bytes.get(i), Some(&b), "{:?} at {} is not {}", bytes, i, b); 29 | } 30 | } 31 | 32 | #[test] 33 | fn contains_full() { 34 | let iter = &mut ByteSet::full().into_iter(); 35 | 36 | while let Some(byte) = iter.next() { 37 | assert_not_contains!(iter.into_byte_set(), byte); 38 | } 39 | } 40 | 41 | #[test] 42 | fn contains_full_rev() { 43 | let iter = &mut ByteSet::full().into_iter(); 44 | 45 | while let Some(byte) = iter.next_back() { 46 | assert_not_contains!(iter.into_byte_set(), byte); 47 | } 48 | } 49 | 50 | #[test] 51 | fn ord() { 52 | let full = ByteSet::full().into_iter(); 53 | assert_eq!( 54 | full.collect::>(), 55 | (0..=u8::max_value()).collect::>(), 56 | ); 57 | } 58 | 59 | #[test] 60 | fn ord_rev() { 61 | let full = ByteSet::full().into_iter(); 62 | assert_eq!( 63 | full.rev().collect::>(), 64 | (0..=u8::max_value()).rev().collect::>(), 65 | ); 66 | } 67 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/macos,linux,windows,rust 3 | # Edit at https://www.gitignore.io/?templates=macos,linux,windows,rust 4 | 5 | ### Linux ### 6 | *~ 7 | 8 | # temporary files which can be created if a process still has a handle open of a deleted file 9 | .fuse_hidden* 10 | 11 | # KDE directory preferences 12 | .directory 13 | 14 | # Linux trash folder which might appear on any partition or disk 15 | .Trash-* 16 | 17 | # .nfs files are created when an open file is removed but is still being accessed 18 | .nfs* 19 | 20 | ### macOS ### 21 | # General 22 | .DS_Store 23 | .AppleDouble 24 | .LSOverride 25 | 26 | # Icon must end with two \r 27 | Icon 28 | 29 | # Thumbnails 30 | ._* 31 | 32 | # Files that might appear in the root of a volume 33 | .DocumentRevisions-V100 34 | .fseventsd 35 | .Spotlight-V100 36 | .TemporaryItems 37 | .Trashes 38 | .VolumeIcon.icns 39 | .com.apple.timemachine.donotpresent 40 | 41 | # Directories potentially created on remote AFP share 42 | .AppleDB 43 | .AppleDesktop 44 | Network Trash Folder 45 | Temporary Items 46 | .apdisk 47 | 48 | ### Rust ### 49 | # Generated by Cargo 50 | # will have compiled files and executables 51 | /target/ 52 | 53 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 54 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 55 | Cargo.lock 56 | 57 | # These are backup files generated by rustfmt 58 | **/*.rs.bk 59 | 60 | ### Windows ### 61 | # Windows thumbnail cache files 62 | Thumbs.db 63 | Thumbs.db:encryptable 64 | ehthumbs.db 65 | ehthumbs_vista.db 66 | 67 | # Dump file 68 | *.stackdump 69 | 70 | # Folder config file 71 | [Dd]esktop.ini 72 | 73 | # Recycle Bin used on file shares 74 | $RECYCLE.BIN/ 75 | 76 | # Windows Installer files 77 | *.cab 78 | *.msi 79 | *.msix 80 | *.msm 81 | *.msp 82 | 83 | # Windows shortcuts 84 | *.lnk 85 | 86 | # End of https://www.gitignore.io/api/macos,linux,windows,rust 87 | -------------------------------------------------------------------------------- /benches/util/rand.rs: -------------------------------------------------------------------------------- 1 | use byte_set::ByteSet; 2 | use fixedbitset::FixedBitSet; 3 | use hashbrown::HashSet as HashbrownSet; 4 | use rand::{seq::SliceRandom, Rng}; 5 | use std::{ 6 | collections::{BTreeSet, BinaryHeap, HashSet}, 7 | hash::BuildHasher, 8 | }; 9 | 10 | /// Returns an array of bytes that has been shuffled. 11 | pub fn shuffled_bytes(rng: &mut R) -> [u8; 256] { 12 | let mut input = [0u8; 256]; 13 | for i in 0..=u8::max_value() { 14 | input[i as usize] = i; 15 | } 16 | input.shuffle(rng); 17 | input 18 | } 19 | 20 | /// A byte container that can be randomly generated. 21 | pub trait Rand { 22 | /// Generates an instance containing `len` random bytes from `rng`. 23 | fn rand_len(len: usize, rng: &mut R) -> Self; 24 | } 25 | 26 | impl Rand for ByteSet { 27 | fn rand_len(len: usize, rng: &mut R) -> Self { 28 | let input = shuffled_bytes(rng); 29 | input[..len].iter().collect() 30 | } 31 | } 32 | 33 | // Standard library collections: 34 | 35 | impl Rand for BTreeSet { 36 | fn rand_len(len: usize, rng: &mut R) -> Self { 37 | let input = shuffled_bytes(rng); 38 | input[..len].iter().cloned().collect() 39 | } 40 | } 41 | 42 | impl Rand for BinaryHeap { 43 | fn rand_len(len: usize, rng: &mut R) -> Self { 44 | let input = shuffled_bytes(rng); 45 | input[..len].iter().cloned().collect() 46 | } 47 | } 48 | 49 | impl Rand for HashSet { 50 | fn rand_len(len: usize, rng: &mut R) -> Self { 51 | let input = shuffled_bytes(rng); 52 | input[..len].iter().cloned().collect() 53 | } 54 | } 55 | 56 | impl Rand for Vec { 57 | fn rand_len(len: usize, rng: &mut R) -> Self { 58 | let input = shuffled_bytes(rng); 59 | input[..len].into() 60 | } 61 | } 62 | 63 | // 3rd party collections: 64 | 65 | impl Rand for FixedBitSet { 66 | fn rand_len(len: usize, rng: &mut R) -> Self { 67 | let input = shuffled_bytes(rng); 68 | input[..len] 69 | .iter() 70 | .cloned() 71 | .map(|byte| byte as usize) 72 | .collect() 73 | } 74 | } 75 | 76 | impl Rand for HashbrownSet { 77 | fn rand_len(len: usize, rng: &mut R) -> Self { 78 | let input = shuffled_bytes(rng); 79 | input[..len].iter().cloned().collect() 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/byte_set/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::Chunk; 2 | use core::mem; 3 | 4 | // These modules must appear in this order to make documentation easier to read. 5 | // The space between ensures rustfmt does not reorder them. 6 | mod main_impl; 7 | 8 | mod ascii; 9 | 10 | mod raw; 11 | 12 | mod traits; 13 | 14 | /// An efficient, general-purpose set of [`u8`]s. 15 | /// 16 | /// # Implementation 17 | /// 18 | /// This is a 256-bit mask where a byte is contained based on whether its bit is 19 | /// enabled. The first (least significant) bit in the mask represents the first 20 | /// byte in the set. Likewise, the last last (most significant) bit represents 21 | /// the last byte. 22 | /// 23 | /// The mask is composed a of "chunk" array. Each chunk is either 64 or 32 bits 24 | /// wide, depending on the target architecture. As of right now, this is based 25 | /// on native register size. This may change in the future based on target 26 | /// features that enable better performance. 27 | /// 28 | /// [`u8`]: https://doc.rust-lang.org/std/primitive.u8.html 29 | #[derive(Clone, Copy, PartialEq, Eq)] 30 | #[repr(C)] 31 | pub struct ByteSet(pub(crate) [Chunk; Self::NUM_SLOTS]); 32 | 33 | /// Returns the chunk index for `byte` and the bit shift for that chunk. 34 | #[inline] 35 | const fn chunk_index_and_shift(byte: u8) -> (usize, usize) { 36 | let byte = byte as usize; 37 | 38 | #[cfg(target_pointer_width = "64")] 39 | let index = byte >> 6; 40 | #[cfg(target_pointer_width = "64")] 41 | let shift = byte & 0b0011_1111; 42 | 43 | #[cfg(not(target_pointer_width = "64"))] 44 | let index = byte >> 5; 45 | #[cfg(not(target_pointer_width = "64"))] 46 | let shift = byte & 0b0001_1111; 47 | 48 | (index, shift) 49 | } 50 | 51 | impl ByteSet { 52 | pub(crate) const SLOT_SIZE: usize = mem::size_of::(); 53 | 54 | pub(crate) const NUM_SLOTS: usize = 256 / 8 / Self::SLOT_SIZE; 55 | 56 | pub(crate) const LAST_SLOT_INDEX: usize = Self::NUM_SLOTS - 1; 57 | } 58 | 59 | #[cfg(test)] 60 | impl ByteSet { 61 | /// Returns a formatting proxy for the binary representation of `self`. 62 | /// 63 | /// `fmt::Binary` is not currently implemented for `ByteSet` because of the 64 | /// extra work to support formatting options. 65 | pub(crate) fn fmt_binary<'a>(&'a self) -> impl core::fmt::Display + 'a { 66 | struct Formatted<'a>(&'a ByteSet); 67 | 68 | impl core::fmt::Display for Formatted<'_> { 69 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { 70 | for chunk in &(self.0).0 { 71 | #[cfg(target_pointer_width = "64")] 72 | write!(f, "{:064b}", chunk)?; 73 | 74 | #[cfg(not(target_pointer_width = "64"))] 75 | write!(f, "{:032b}", chunk)?; 76 | } 77 | Ok(()) 78 | } 79 | } 80 | 81 | Formatted(self) 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /benches/benchmarks/drop.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, BatchSize, BenchmarkId, Criterion, Throughput}; 2 | use std::collections::{BTreeSet, HashSet}; 3 | 4 | use crate::util::{self, hash::HashbrownSet, Bool256, Rand}; 5 | use byte_set::ByteSet; 6 | 7 | pub fn benches(criterion: &mut Criterion) { 8 | let mut group = criterion.benchmark_group("Drop"); 9 | 10 | let mut rng = rand::thread_rng(); 11 | 12 | for &size in util::SIZES { 13 | group.throughput(Throughput::Bytes(size as u64)); 14 | 15 | group.bench_function(BenchmarkId::new("ByteSet", size), |b| { 16 | b.iter_batched( 17 | || black_box(ByteSet::rand_len(size, &mut rng)), 18 | |byte_set| { 19 | drop(byte_set); 20 | }, 21 | BatchSize::SmallInput, 22 | ) 23 | }); 24 | 25 | group.bench_function(BenchmarkId::new("[bool; 256]", size), |b| { 26 | b.iter_batched( 27 | || black_box(Bool256::rand_len(size, &mut rng)), 28 | |bool256| { 29 | drop(bool256); 30 | }, 31 | BatchSize::SmallInput, 32 | ) 33 | }); 34 | 35 | group.bench_function(BenchmarkId::new("HashSet", size), |b| { 36 | b.iter_batched( 37 | || black_box(HashSet::::rand_len(size, &mut rng)), 38 | |hash_set| { 39 | drop(hash_set); 40 | }, 41 | BatchSize::SmallInput, 42 | ) 43 | }); 44 | 45 | group.bench_function( 46 | BenchmarkId::new("hashbrown::HashSet", size), 47 | |b| { 48 | b.iter_batched( 49 | || black_box(HashbrownSet::::rand_len(size, &mut rng)), 50 | |hash_set| { 51 | drop(hash_set); 52 | }, 53 | BatchSize::SmallInput, 54 | ) 55 | }, 56 | ); 57 | 58 | group.bench_function( 59 | BenchmarkId::new("fixedbitset::FixedBitSet", size), 60 | |b| { 61 | b.iter_batched( 62 | || { 63 | black_box(fixedbitset::FixedBitSet::rand_len( 64 | size, &mut rng, 65 | )) 66 | }, 67 | |fixed_bit_set| { 68 | drop(fixed_bit_set); 69 | }, 70 | BatchSize::SmallInput, 71 | ) 72 | }, 73 | ); 74 | 75 | group.bench_function(BenchmarkId::new("BTreeSet", size), |b| { 76 | b.iter_batched( 77 | || black_box(BTreeSet::::rand_len(size, &mut rng)), 78 | |btree_set| { 79 | drop(btree_set); 80 | }, 81 | BatchSize::SmallInput, 82 | ) 83 | }); 84 | 85 | group.bench_function(BenchmarkId::new("Vec", size), |b| { 86 | b.iter_batched( 87 | || black_box(Vec::::rand_len(size, &mut rng)), 88 | |vec| { 89 | drop(vec); 90 | }, 91 | BatchSize::SmallInput, 92 | ) 93 | }); 94 | } 95 | 96 | group.finish(); 97 | } 98 | -------------------------------------------------------------------------------- /benches/util/bool256.rs: -------------------------------------------------------------------------------- 1 | use super::rand::{shuffled_bytes, Rand}; 2 | use std::{iter, ops::Range}; 3 | 4 | /// A wrapper around `[bool; 256]` for comparing performance. 5 | pub struct Bool256(pub [bool; 256]); 6 | 7 | impl Default for Bool256 { 8 | fn default() -> Self { 9 | Self([false; 256]) 10 | } 11 | } 12 | 13 | impl Rand for Bool256 { 14 | fn rand_len(len: usize, rng: &mut R) -> Self { 15 | let input = shuffled_bytes(rng); 16 | input[..len].iter().collect() 17 | } 18 | } 19 | 20 | impl Bool256 { 21 | pub fn new() -> Self { 22 | Self::default() 23 | } 24 | 25 | pub fn clear(&mut self) { 26 | *self = Self::default(); 27 | } 28 | 29 | pub fn len(&self) -> usize { 30 | self.0 31 | .iter() 32 | .fold(0, |n, &is_contained| n + is_contained as usize) 33 | } 34 | 35 | pub fn insert(&mut self, byte: u8) { 36 | self.0[byte as usize] = true; 37 | } 38 | 39 | pub fn remove(&mut self, byte: u8) { 40 | self.0[byte as usize] = false; 41 | } 42 | 43 | pub fn contains(&self, byte: u8) -> bool { 44 | self.0[byte as usize] 45 | } 46 | 47 | pub fn min(&self) -> Option { 48 | self.into_iter().next() 49 | } 50 | 51 | pub fn max(&self) -> Option { 52 | self.into_iter().next_back() 53 | } 54 | } 55 | 56 | impl Extend for Bool256 { 57 | fn extend>(&mut self, iter: T) { 58 | iter.into_iter().for_each(|byte| self.insert(byte)); 59 | } 60 | } 61 | 62 | impl<'a> Extend<&'a u8> for Bool256 { 63 | fn extend>(&mut self, iter: T) { 64 | self.extend(iter.into_iter().cloned()); 65 | } 66 | } 67 | 68 | impl iter::FromIterator for Bool256 { 69 | fn from_iter>(iter: T) -> Self { 70 | let mut value = Self::default(); 71 | value.extend(iter); 72 | value 73 | } 74 | } 75 | 76 | impl<'a> iter::FromIterator<&'a u8> for Bool256 { 77 | fn from_iter>(iter: T) -> Self { 78 | iter.into_iter().cloned().collect() 79 | } 80 | } 81 | 82 | impl<'a> IntoIterator for &'a Bool256 { 83 | type IntoIter = Iter<'a>; 84 | type Item = u8; 85 | 86 | fn into_iter(self) -> Self::IntoIter { 87 | Iter { 88 | bool256: self, 89 | indexes: 0..256, 90 | } 91 | } 92 | } 93 | 94 | pub struct Iter<'a> { 95 | bool256: &'a Bool256, 96 | indexes: Range, 97 | } 98 | 99 | impl Iterator for Iter<'_> { 100 | type Item = u8; 101 | 102 | fn next(&mut self) -> Option { 103 | for i in self.indexes.by_ref() { 104 | let byte = i as u8; 105 | if self.bool256.contains(byte) { 106 | return Some(byte); 107 | } 108 | } 109 | None 110 | } 111 | } 112 | 113 | impl DoubleEndedIterator for Iter<'_> { 114 | fn next_back(&mut self) -> Option { 115 | for i in self.indexes.by_ref().rev() { 116 | let byte = i as u8; 117 | if self.bool256.contains(byte) { 118 | return Some(byte); 119 | } 120 | } 121 | None 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /benches/benchmarks/len.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, BatchSize, BenchmarkId, Criterion, Throughput}; 2 | use std::collections::{BTreeSet, HashSet}; 3 | 4 | use crate::util::{self, hash::HashbrownSet, Bool256, Rand}; 5 | use byte_set::ByteSet; 6 | 7 | pub fn benches(criterion: &mut Criterion) { 8 | let mut group = criterion.benchmark_group("Length"); 9 | 10 | let mut rng = rand::thread_rng(); 11 | 12 | for &size in util::SIZES { 13 | group.throughput(Throughput::Bytes(size as u64)); 14 | 15 | group.bench_function(BenchmarkId::new("ByteSet", size), |b| { 16 | b.iter_batched( 17 | || black_box(ByteSet::rand_len(size, &mut rng)), 18 | |byte_set| black_box(byte_set.len()), 19 | BatchSize::SmallInput, 20 | ) 21 | }); 22 | 23 | group.bench_function(BenchmarkId::new("[bool; 256]", size), |b| { 24 | b.iter_batched( 25 | || black_box(Bool256::rand_len(size, &mut rng)), 26 | |bool256| black_box(bool256.len()), 27 | BatchSize::SmallInput, 28 | ) 29 | }); 30 | 31 | let range_inclusive = black_box(0u8..=util::saturating_cast(size)); 32 | group.bench_with_input( 33 | BenchmarkId::new("RangeInclusive", size), 34 | &range_inclusive, 35 | |b, range_inclusive| b.iter(|| black_box(range_inclusive.len())), 36 | ); 37 | 38 | group.bench_function(BenchmarkId::new("HashSet", size), |b| { 39 | b.iter_batched_ref( 40 | || black_box(HashSet::::rand_len(size, &mut rng)), 41 | |hash_set| black_box(hash_set.len()), 42 | BatchSize::SmallInput, 43 | ) 44 | }); 45 | 46 | group.bench_function( 47 | BenchmarkId::new("hashbrown::HashSet", size), 48 | |b| { 49 | b.iter_batched_ref( 50 | || black_box(HashbrownSet::::rand_len(size, &mut rng)), 51 | |hash_set| black_box(hash_set.len()), 52 | BatchSize::SmallInput, 53 | ) 54 | }, 55 | ); 56 | 57 | group.bench_function( 58 | BenchmarkId::new("fixedbitset::FixedBitSet", size), 59 | |b| { 60 | b.iter_batched_ref( 61 | || { 62 | black_box(fixedbitset::FixedBitSet::rand_len( 63 | size, &mut rng, 64 | )) 65 | }, 66 | |fixed_bit_set| black_box(fixed_bit_set.count_ones(..)), 67 | BatchSize::SmallInput, 68 | ) 69 | }, 70 | ); 71 | 72 | group.bench_function(BenchmarkId::new("BTreeSet", size), |b| { 73 | b.iter_batched_ref( 74 | || black_box(BTreeSet::::rand_len(size, &mut rng)), 75 | |btree_set| black_box(btree_set.len()), 76 | BatchSize::SmallInput, 77 | ) 78 | }); 79 | 80 | group.bench_function(BenchmarkId::new("Vec", size), |b| { 81 | b.iter_batched_ref( 82 | || black_box(Vec::::rand_len(size, &mut rng)), 83 | |vec| black_box(vec.len()), 84 | BatchSize::SmallInput, 85 | ) 86 | }); 87 | } 88 | 89 | group.finish(); 90 | } 91 | -------------------------------------------------------------------------------- /src/iter.rs: -------------------------------------------------------------------------------- 1 | use crate::{chunk, ByteSet}; 2 | use core::iter; 3 | 4 | /// An iterator over a [`ByteSet`]. 5 | /// 6 | /// [`ByteSet`]: struct.ByteSet.html 7 | #[derive(Clone, Copy, Debug, PartialEq, Eq)] 8 | pub struct Iter { 9 | /// The set being iterated over. It is mutated in-place as bits are popped 10 | /// from each chunk. 11 | byte_set: ByteSet, 12 | 13 | /// The current chunk index when iterating forwards. 14 | forward_index: usize, 15 | 16 | /// The current chunk index when iterating backwards. 17 | backward_index: usize, 18 | } 19 | 20 | impl Iter { 21 | #[inline] 22 | pub(crate) const fn new(byte_set: ByteSet) -> Self { 23 | Self { 24 | byte_set, 25 | forward_index: 0, 26 | backward_index: ByteSet::LAST_SLOT_INDEX, 27 | } 28 | } 29 | 30 | /// Returns the underlying [`ByteSet`]. 31 | /// 32 | /// Note that iteration mutates the byteset in-place. 33 | #[inline] 34 | pub const fn into_byte_set(self) -> ByteSet { 35 | self.byte_set 36 | } 37 | } 38 | 39 | impl From for Iter { 40 | #[inline] 41 | fn from(byte_set: ByteSet) -> Self { 42 | Self::new(byte_set) 43 | } 44 | } 45 | 46 | impl Iterator for Iter { 47 | type Item = u8; 48 | 49 | fn next(&mut self) -> Option { 50 | let range = self.forward_index..ByteSet::NUM_SLOTS; 51 | 52 | for index in range { 53 | self.forward_index = index; 54 | 55 | let chunk = &mut self.byte_set.0[index]; 56 | 57 | if let Some(lsb) = chunk::pop_lsb(chunk) { 58 | return Some(lsb + (index * chunk::INDEX_OFFSET) as u8); 59 | } 60 | } 61 | 62 | None 63 | } 64 | 65 | fn for_each(mut self, mut f: F) 66 | where 67 | F: FnMut(u8), 68 | { 69 | (0..ByteSet::NUM_SLOTS).for_each(|index| { 70 | let chunk = &mut self.byte_set.0[index]; 71 | 72 | while let Some(lsb) = chunk::pop_lsb(chunk) { 73 | f(lsb + (index * chunk::INDEX_OFFSET) as u8); 74 | } 75 | }); 76 | } 77 | 78 | #[inline] 79 | fn size_hint(&self) -> (usize, Option) { 80 | let len = self.len(); 81 | (len, Some(len)) 82 | } 83 | 84 | #[inline] 85 | fn count(self) -> usize { 86 | self.len() 87 | } 88 | 89 | #[inline] 90 | fn last(mut self) -> Option { 91 | self.next_back() 92 | } 93 | 94 | #[inline] 95 | fn min(mut self) -> Option { 96 | self.next() 97 | } 98 | 99 | #[inline] 100 | fn max(self) -> Option { 101 | self.last() 102 | } 103 | } 104 | 105 | impl DoubleEndedIterator for Iter { 106 | fn next_back(&mut self) -> Option { 107 | // `Range` (`a..b`) is faster than `InclusiveRange` (`a..=b`). 108 | let range = 0..(self.backward_index + 1); 109 | 110 | for index in range.rev() { 111 | self.backward_index = index; 112 | 113 | // SAFETY: This invariant is tested. 114 | let chunk = unsafe { self.byte_set.0.get_unchecked_mut(index) }; 115 | 116 | if let Some(msb) = chunk::pop_msb(chunk) { 117 | return Some(msb + (index * chunk::INDEX_OFFSET) as u8); 118 | } 119 | } 120 | 121 | None 122 | } 123 | } 124 | 125 | impl ExactSizeIterator for Iter { 126 | #[inline] 127 | fn len(&self) -> usize { 128 | self.byte_set.len() 129 | } 130 | } 131 | 132 | // `Iter` does not produce more values after `None` is reached. 133 | impl iter::FusedIterator for Iter {} 134 | -------------------------------------------------------------------------------- /benches/benchmarks/clear.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, BatchSize, BenchmarkId, Criterion, Throughput}; 2 | use std::collections::{BTreeSet, HashSet}; 3 | 4 | use crate::util::{self, hash::HashbrownSet, Bool256, Rand}; 5 | use byte_set::ByteSet; 6 | 7 | pub fn benches(criterion: &mut Criterion) { 8 | let mut group = criterion.benchmark_group("Clear"); 9 | 10 | let mut rng = rand::thread_rng(); 11 | 12 | for &size in util::SIZES { 13 | group.throughput(Throughput::Bytes(size as u64)); 14 | 15 | group.bench_function(BenchmarkId::new("ByteSet", size), |b| { 16 | b.iter_batched_ref( 17 | || black_box(ByteSet::rand_len(size, &mut rng)), 18 | |byte_set| { 19 | byte_set.clear(); 20 | black_box(byte_set); 21 | }, 22 | BatchSize::SmallInput, 23 | ) 24 | }); 25 | 26 | group.bench_function(BenchmarkId::new("[bool; 256]", size), |b| { 27 | b.iter_batched_ref( 28 | || black_box(Bool256::rand_len(size, &mut rng)), 29 | |bool256| { 30 | bool256.clear(); 31 | black_box(bool256); 32 | }, 33 | BatchSize::SmallInput, 34 | ) 35 | }); 36 | 37 | group.bench_function(BenchmarkId::new("HashSet", size), |b| { 38 | b.iter_batched_ref( 39 | || black_box(HashSet::::rand_len(size, &mut rng)), 40 | |hash_set| { 41 | hash_set.clear(); 42 | black_box(hash_set); 43 | }, 44 | BatchSize::SmallInput, 45 | ) 46 | }); 47 | 48 | group.bench_function( 49 | BenchmarkId::new("hashbrown::HashSet", size), 50 | |b| { 51 | b.iter_batched_ref( 52 | || black_box(HashbrownSet::::rand_len(size, &mut rng)), 53 | |hash_set| { 54 | hash_set.clear(); 55 | black_box(hash_set); 56 | }, 57 | BatchSize::SmallInput, 58 | ) 59 | }, 60 | ); 61 | 62 | group.bench_function( 63 | BenchmarkId::new("fixedbitset::FixedBitSet", size), 64 | |b| { 65 | b.iter_batched_ref( 66 | || { 67 | black_box(fixedbitset::FixedBitSet::rand_len( 68 | size, &mut rng, 69 | )) 70 | }, 71 | |fixed_bit_set| { 72 | fixed_bit_set.clear(); 73 | black_box(fixed_bit_set); 74 | }, 75 | BatchSize::SmallInput, 76 | ) 77 | }, 78 | ); 79 | 80 | group.bench_function(BenchmarkId::new("BTreeSet", size), |b| { 81 | b.iter_batched_ref( 82 | || black_box(BTreeSet::::rand_len(size, &mut rng)), 83 | |btree_set| { 84 | btree_set.clear(); 85 | black_box(btree_set); 86 | }, 87 | BatchSize::SmallInput, 88 | ) 89 | }); 90 | 91 | group.bench_function(BenchmarkId::new("Vec", size), |b| { 92 | b.iter_batched_ref( 93 | || black_box(Vec::::rand_len(size, &mut rng)), 94 | |vec| { 95 | vec.clear(); 96 | black_box(vec); 97 | }, 98 | BatchSize::SmallInput, 99 | ) 100 | }); 101 | } 102 | 103 | group.finish(); 104 | } 105 | -------------------------------------------------------------------------------- /src/macros.rs: -------------------------------------------------------------------------------- 1 | /// Creates a [`ByteSet`] from a sequence of [`u8`]s. 2 | /// 3 | /// `byte_set!` allows `ByteSet`s to be defined with the same syntax as [`vec!`] 4 | /// or array expressions. 5 | /// 6 | /// # Examples 7 | /// 8 | /// This can be used within a `const` context: 9 | /// 10 | /// ``` 11 | /// # use byte_set::{byte_set, ByteSet}; 12 | /// const SET: ByteSet = byte_set!(1, 2, 3, b'a', b'b', b'c'); 13 | /// 14 | /// assert!(SET.contains(b'a')); 15 | /// ``` 16 | /// 17 | /// [`ByteSet`]: struct.ByteSet.html 18 | /// [`u8`]: https://doc.rust-lang.org/std/primitive.u8.html 19 | /// [`vec!`]: https://doc.rust-lang.org/std/macro.vec.html 20 | #[macro_export] 21 | macro_rules! byte_set { 22 | ($($byte:expr,)*) => { 23 | $crate::ByteSet::new() $(.inserting($byte))* 24 | }; 25 | ($($byte:expr),*) => { 26 | $crate::byte_set!($($byte,)*) 27 | }; 28 | } 29 | 30 | /// Performs a `$map` operation over the `usize` chunks of `$this` and `$other`, 31 | /// combining the resulting `usize` chunks with `$reduce`. 32 | macro_rules! map_reduce_chunks { 33 | ($this:expr, $other:expr, $map:tt, $reduce:tt) => {{ 34 | // TODO: Might be worth wrapping a `const fn`? This is only being used 35 | // by binary ops, so this is fine for now. 36 | #[cfg(target_pointer_width = "64")] 37 | { 38 | ($this.0[0] $map $other.0[0]) $reduce 39 | ($this.0[1] $map $other.0[1]) $reduce 40 | ($this.0[2] $map $other.0[2]) $reduce 41 | ($this.0[3] $map $other.0[3]) 42 | } 43 | 44 | #[cfg(not(target_pointer_width = "64"))] 45 | { 46 | ($this.0[0] $map $other.0[0]) $reduce 47 | ($this.0[1] $map $other.0[1]) $reduce 48 | ($this.0[2] $map $other.0[2]) $reduce 49 | ($this.0[3] $map $other.0[3]) $reduce 50 | ($this.0[4] $map $other.0[4]) $reduce 51 | ($this.0[5] $map $other.0[5]) $reduce 52 | ($this.0[6] $map $other.0[6]) $reduce 53 | ($this.0[7] $map $other.0[7]) 54 | } 55 | }}; 56 | } 57 | 58 | /// Performs a `$map` operation over the `usize` chunks of `$this` and `$other`, 59 | /// returning the resulting `ByteSet`. 60 | macro_rules! map_chunks { 61 | ($this:expr, $map:tt) => {{ 62 | // TODO: Might be worth wrapping a `const fn`? This is only being used 63 | // by `!`, so being a prefix op is fine for now. 64 | #[cfg(target_pointer_width = "64")] 65 | { 66 | ByteSet([ 67 | $map $this.0[0], $map $this.0[1], 68 | $map $this.0[2], $map $this.0[3], 69 | ]) 70 | } 71 | 72 | #[cfg(not(target_pointer_width = "64"))] 73 | { 74 | ByteSet([ 75 | $map $this.0[0], $map $this.0[1], 76 | $map $this.0[2], $map $this.0[3], 77 | $map $this.0[4], $map $this.0[5], 78 | $map $this.0[6], $map $this.0[7], 79 | ]) 80 | } 81 | }}; 82 | ($this:expr, $map:tt, $other:expr) => {{ 83 | #[cfg(target_pointer_width = "64")] 84 | { 85 | ByteSet([ 86 | ($this.0[0] $map $other.0[0]), ($this.0[1] $map $other.0[1]), 87 | ($this.0[2] $map $other.0[2]), ($this.0[3] $map $other.0[3]), 88 | ]) 89 | } 90 | 91 | #[cfg(not(target_pointer_width = "64"))] 92 | { 93 | ByteSet([ 94 | ($this.0[0] $map $other.0[0]), ($this.0[1] $map $other.0[1]), 95 | ($this.0[2] $map $other.0[2]), ($this.0[3] $map $other.0[3]), 96 | ($this.0[4] $map $other.0[4]), ($this.0[5] $map $other.0[5]), 97 | ($this.0[6] $map $other.0[6]), ($this.0[7] $map $other.0[7]), 98 | ]) 99 | } 100 | }}; 101 | } 102 | -------------------------------------------------------------------------------- /src/tests/byte_set.rs: -------------------------------------------------------------------------------- 1 | use crate::ByteSet; 2 | 3 | #[test] 4 | fn len() { 5 | assert_len!(ByteSet::new(), 0); 6 | assert_len!(ByteSet::full(), 256); 7 | } 8 | 9 | #[test] 10 | fn insert() { 11 | let mut set = ByteSet::new(); 12 | 13 | for byte in 0..=u8::max_value() { 14 | assert_not_contains!(set, byte); 15 | 16 | let copy = set; 17 | assert_contains!(copy.inserting(byte), byte); 18 | 19 | set.insert(byte); 20 | assert_contains!(set, byte); 21 | } 22 | 23 | assert_eq!(set.len(), 256); 24 | } 25 | 26 | #[test] 27 | fn remove() { 28 | let mut set = ByteSet::full(); 29 | 30 | for byte in 0..=u8::max_value() { 31 | assert_contains!(set, byte); 32 | 33 | let copy = set; 34 | assert_not_contains!(copy.removing(byte), byte); 35 | 36 | set.remove(byte); 37 | assert_not_contains!(set, byte); 38 | } 39 | 40 | assert_empty!(set); 41 | } 42 | 43 | #[test] 44 | fn first() { 45 | macro_rules! assert_first_eq { 46 | ($set:expr, $first:expr) => {{ 47 | let set = &$set; 48 | let first: Option = $first.into(); 49 | assert_eq!( 50 | set.first(), 51 | first, 52 | "First byte in {} is not {:?}", 53 | set.fmt_binary(), 54 | first 55 | ); 56 | }}; 57 | } 58 | 59 | assert_first_eq!(ByteSet::new(), None); 60 | assert_eq!(ByteSet::new().pop_first(), None); 61 | 62 | let mut set = ByteSet::full(); 63 | 64 | for byte in set.into_iter() { 65 | assert_first_eq!(set, byte); 66 | assert_eq!(set.pop_first(), Some(byte)); 67 | } 68 | 69 | assert_first_eq!(set, None); 70 | assert_eq!(set.pop_first(), None); 71 | } 72 | 73 | #[test] 74 | fn last() { 75 | macro_rules! assert_last_eq { 76 | ($set:expr, $last:expr) => {{ 77 | let set = &$set; 78 | let last: Option = $last.into(); 79 | assert_eq!( 80 | set.last(), 81 | last, 82 | "Last byte in {} is not {:?}", 83 | set.fmt_binary(), 84 | last 85 | ); 86 | }}; 87 | } 88 | 89 | assert_last_eq!(ByteSet::new(), None); 90 | assert_eq!(ByteSet::new().pop_last(), None); 91 | 92 | let mut set = ByteSet::full(); 93 | 94 | for byte in set.into_iter().rev() { 95 | assert_last_eq!(set, byte); 96 | assert_eq!(set.pop_last(), Some(byte)); 97 | } 98 | 99 | assert_last_eq!(set, None); 100 | assert_eq!(set.pop_last(), None); 101 | } 102 | 103 | #[test] 104 | fn from_open_ranges() { 105 | for byte in 0..=u8::max_value() { 106 | let range_to = ..byte; 107 | let range_from = byte..; 108 | let range_to_i = ..=byte; 109 | let set_to = ByteSet::from_range_to(range_to.clone()); 110 | let set_from = ByteSet::from_range_from(range_from.clone()); 111 | let set_to_i = ByteSet::from_range_to_inclusive(range_to_i.clone()); 112 | for b in 0..=u8::max_value() { 113 | assert_eq!(range_to.contains(&b), set_to.contains(b)); 114 | assert_eq!(range_from.contains(&b), set_from.contains(b)); 115 | assert_eq!(range_to_i.contains(&b), set_to_i.contains(b)); 116 | } 117 | } 118 | } 119 | 120 | #[test] 121 | fn from_closed_ranges() { 122 | for start in 0..=u8::max_value() { 123 | for end in start..=u8::max_value() { 124 | let range = start..end; 125 | let range_i = start..=end; 126 | let set = ByteSet::from_range(range.clone()); 127 | let set_i = ByteSet::from_range_inclusive(range_i.clone()); 128 | for b in 0..=u8::max_value() { 129 | assert_eq!(range.contains(&b), set.contains(b)); 130 | assert_eq!(range_i.contains(&b), set_i.contains(b)); 131 | } 132 | } 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /benches/benchmarks/iter.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, BatchSize, BenchmarkId, Criterion, Throughput}; 2 | use std::collections::{BTreeSet, HashSet}; 3 | 4 | use crate::util::{self, hash::HashbrownSet, Bool256, Rand}; 5 | use byte_set::ByteSet; 6 | 7 | pub fn benches(criterion: &mut Criterion) { 8 | let mut group = criterion.benchmark_group("Iter"); 9 | 10 | let mut rng = rand::thread_rng(); 11 | 12 | for &size in util::SIZES { 13 | group.throughput(Throughput::Bytes(size as u64)); 14 | 15 | group.bench_function(BenchmarkId::new("ByteSet", size), |b| { 16 | b.iter_batched( 17 | || black_box(ByteSet::rand_len(size, &mut rng)), 18 | |byte_set| { 19 | for byte in byte_set { 20 | black_box(byte); 21 | } 22 | }, 23 | BatchSize::SmallInput, 24 | ) 25 | }); 26 | 27 | group.bench_function(BenchmarkId::new("[bool; 256]", size), |b| { 28 | b.iter_batched_ref( 29 | || black_box(Bool256::rand_len(size, &mut rng)), 30 | |bool256| { 31 | for byte in &*bool256 { 32 | black_box(byte); 33 | } 34 | }, 35 | BatchSize::SmallInput, 36 | ) 37 | }); 38 | 39 | let range_inclusive = black_box(0u8..=util::saturating_cast(size)); 40 | group.bench_with_input( 41 | BenchmarkId::new("RangeInclusive", size), 42 | &range_inclusive, 43 | |b, range_inclusive| { 44 | b.iter(|| { 45 | for byte in range_inclusive.clone() { 46 | black_box(byte); 47 | } 48 | }) 49 | }, 50 | ); 51 | 52 | group.bench_function(BenchmarkId::new("HashSet", size), |b| { 53 | b.iter_batched_ref( 54 | || black_box(HashSet::::rand_len(size, &mut rng)), 55 | |hash_set| { 56 | for &byte in hash_set.iter() { 57 | black_box(byte); 58 | } 59 | }, 60 | BatchSize::SmallInput, 61 | ) 62 | }); 63 | 64 | group.bench_function( 65 | BenchmarkId::new("hashbrown::HashSet", size), 66 | |b| { 67 | b.iter_batched_ref( 68 | || black_box(HashbrownSet::::rand_len(size, &mut rng)), 69 | |hash_set| { 70 | for &byte in hash_set.iter() { 71 | black_box(byte); 72 | } 73 | }, 74 | BatchSize::SmallInput, 75 | ) 76 | }, 77 | ); 78 | 79 | group.bench_function( 80 | BenchmarkId::new("fixedbitset::FixedBitSet", size), 81 | |b| { 82 | b.iter_batched_ref( 83 | || { 84 | black_box(fixedbitset::FixedBitSet::rand_len( 85 | size, &mut rng, 86 | )) 87 | }, 88 | |fixed_bit_set| { 89 | // The `.ones()` iterator goes over all set bits, making 90 | // it equivalent to the others. 91 | for one in fixed_bit_set.ones() { 92 | black_box(one); 93 | } 94 | }, 95 | BatchSize::SmallInput, 96 | ) 97 | }, 98 | ); 99 | 100 | group.bench_function(BenchmarkId::new("BTreeSet", size), |b| { 101 | b.iter_batched_ref( 102 | || black_box(BTreeSet::::rand_len(size, &mut rng)), 103 | |btree_set| { 104 | for &byte in btree_set.iter() { 105 | black_box(byte); 106 | } 107 | }, 108 | BatchSize::SmallInput, 109 | ) 110 | }); 111 | 112 | group.bench_function(BenchmarkId::new("Vec", size), |b| { 113 | b.iter_batched_ref( 114 | || black_box(Vec::::rand_len(size, &mut rng)), 115 | |vec| { 116 | for &byte in vec.iter() { 117 | black_box(byte); 118 | } 119 | }, 120 | BatchSize::SmallInput, 121 | ) 122 | }); 123 | } 124 | 125 | group.finish(); 126 | } 127 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog [![crates.io][crate-badge]][crate] [![docs.rs][docs-badge]][docs] 2 | 3 | All notable changes to this project will be documented in this file. Please 4 | update it with your changes when submitting a pull request. 5 | 6 | This format is based on [Keep a Changelog] and this project adheres to 7 | [Semantic Versioning]. 8 | 9 | ## [Unreleased] 10 | 11 | ### Added 12 | 13 | - `PartialEq>` implementation for `ByteSet`. 14 | - `PartialEq>` implementation for `ByteSet`. 15 | - `PartialOrd>` implementation for `ByteSet`. 16 | 17 | ### Changed 18 | 19 | - **\[Breaking\]** Comparison functions in [`PartialOrd`] and [`Ord`] are based 20 | on lexicographical order of the contained bytes. Previously they were just a 21 | `memcmp`, regardless of architecture. 22 | 23 | ## [0.1.3] - 2020-06-12 24 | 25 | ### Added 26 | 27 | - `ByteSet::from_byte`: creates a set from a single byte. 28 | - `serde::Serialize` into a `u8` sequence. 29 | - `serde::Deserialize` from `&[u8]` or a `u8` sequence. 30 | - Conversions from range types that can be used in `const` ([#5] by [@Manishearth]). 31 | - Manual implementation of `Iter::for_each` that's slightly more optimized than 32 | the default. 33 | 34 | ## [0.1.2] - 2020-06-03 35 | 36 | ### Removed 37 | 38 | - `build.rs`, which improves compile time and allows this to be used in external 39 | build systems like Bazel and Buck. 40 | 41 | ## [0.1.1] - 2020-06-03 42 | 43 | ### Added 44 | 45 | - `ByteSet::is_ascii_alphabetic`: returns `true` if [`u8::is_ascii_alphabetic`] returns `true` for all bytes. 46 | - `ByteSet::is_ascii_uppercase`: returns `true` if [`u8::is_ascii_uppercase`] returns `true` for all bytes. 47 | - `ByteSet::is_ascii_lowercase`: returns `true` if [`u8::is_ascii_lowercase`] returns `true` for all bytes. 48 | - `ByteSet::is_ascii_alphanumeric`: returns `true` if [`u8::is_ascii_alphanumeric`] returns `true` for all bytes. 49 | - `ByteSet::is_ascii_digit`: returns `true` if [`u8::is_ascii_digit`] returns `true` for all bytes. 50 | - `ByteSet::is_ascii_hexdigit`: returns `true` if [`u8::is_ascii_hexdigit`] returns `true` for all bytes. 51 | - `ByteSet::is_ascii_punctuation`: returns `true` if [`u8::is_ascii_punctuation`] returns `true` for all bytes. 52 | - `ByteSet::is_ascii_graphic`: returns `true` if [`u8::is_ascii_graphic`] returns `true` for all bytes. 53 | - `ByteSet::is_ascii_whitespace`: returns `true` if [`u8::is_ascii_whitespace`] returns `true` for all bytes. 54 | - `ByteSet::is_ascii_control`: returns `true` if [`u8::is_ascii_control`] returns `true` for all bytes. 55 | 56 | ## 0.1.0 - 2020-06-01 57 | 58 | Initial release! 59 | 60 | [crate]: https://crates.io/crates/byte_set 61 | [crate-badge]: https://img.shields.io/crates/v/byte_set.svg 62 | [docs]: https://docs.rs/byte_set 63 | [docs-badge]: https://docs.rs/byte_set/badge.svg 64 | 65 | [Keep a Changelog]: http://keepachangelog.com/en/1.0.0/ 66 | [Semantic Versioning]: http://semver.org/spec/v2.0.0.html 67 | 68 | [`PartialOrd`]: https://doc.rust-lang.org/std/cmp/trait.PartialOrd.html 69 | [`Ord`]: https://doc.rust-lang.org/std/cmp/trait.Ord.html 70 | [`u8::is_ascii_alphabetic`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_alphabetic 71 | [`u8::is_ascii_uppercase`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_uppercase 72 | [`u8::is_ascii_lowercase`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_lowercase 73 | [`u8::is_ascii_alphanumeric`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_alphanumeric 74 | [`u8::is_ascii_digit`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_digit 75 | [`u8::is_ascii_hexdigit`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_hexdigit 76 | [`u8::is_ascii_punctuation`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_punctuation 77 | [`u8::is_ascii_graphic`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_graphic 78 | [`u8::is_ascii_whitespace`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_whitespace 79 | [`u8::is_ascii_control`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_control 80 | 81 | [`serde::Serialize`]: https://docs.rs/serde/1.*/serde/trait.Serialize.html 82 | [`serde::Deserialize`]: https://docs.rs/serde/1.*/serde/trait.Deserialize.html 83 | 84 | [Unreleased]: https://github.com/nvzqz/byte-set-rs/compare/v0.1.3...HEAD 85 | [0.1.3]: https://github.com/nvzqz/byte-set-rs/compare/v0.1.2...v0.1.3 86 | [0.1.2]: https://github.com/nvzqz/byte-set-rs/compare/v0.1.1...v0.1.2 87 | [0.1.1]: https://github.com/nvzqz/byte-set-rs/compare/v0.1.0...v0.1.1 88 | 89 | [#5]: https://github.com/nvzqz/byte-set-rs/pull/5 90 | 91 | [@Manishearth]: https://github.com/Manishearth 92 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - master 5 | pull_request: 6 | branches: 7 | - master 8 | 9 | name: CI 10 | 11 | jobs: 12 | rust-check: 13 | name: Check 14 | runs-on: ubuntu-latest 15 | steps: 16 | # Checkout 17 | - uses: actions/checkout@v1 18 | 19 | # Install 20 | - name: Install Rust 21 | uses: actions-rs/toolchain@v1 22 | with: 23 | profile: minimal 24 | toolchain: stable 25 | override: true 26 | 27 | # Cache 28 | - name: Cache cargo registry 29 | uses: actions/cache@v1 30 | with: 31 | path: ~/.cargo/registry 32 | key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} 33 | - name: Cache cargo index 34 | uses: actions/cache@v1 35 | with: 36 | path: ~/.cargo/git 37 | key: ${{ runner.os }}-cargo-index-${{ hashFiles('**/Cargo.lock') }} 38 | - name: Cache target dir 39 | uses: actions/cache@v1 40 | with: 41 | path: target 42 | key: ${{ runner.os }}-cargo-target-check-${{ hashFiles('**/Cargo.lock') }} 43 | 44 | # Run 45 | - name: cargo check 46 | uses: actions-rs/cargo@v1 47 | with: 48 | command: check 49 | 50 | # Run 51 | - name: cargo check benches 52 | uses: actions-rs/cargo@v1 53 | with: 54 | command: check 55 | args: --bench benches 56 | 57 | rust-test: 58 | name: Test 59 | runs-on: ubuntu-latest 60 | steps: 61 | # Checkout 62 | - uses: actions/checkout@v1 63 | 64 | # Install 65 | - name: Install Rust 66 | uses: actions-rs/toolchain@v1 67 | with: 68 | profile: minimal 69 | toolchain: stable 70 | override: true 71 | 72 | # Cache 73 | - name: Cache cargo registry 74 | uses: actions/cache@v1 75 | with: 76 | path: ~/.cargo/registry 77 | key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} 78 | - name: Cache cargo index 79 | uses: actions/cache@v1 80 | with: 81 | path: ~/.cargo/git 82 | key: ${{ runner.os }}-cargo-index-${{ hashFiles('**/Cargo.lock') }} 83 | - name: Cache target dir 84 | uses: actions/cache@v1 85 | with: 86 | path: target 87 | key: ${{ runner.os }}-cargo-target-test-${{ hashFiles('**/Cargo.lock') }} 88 | 89 | # Run 90 | - name: Cargo test 91 | uses: actions-rs/cargo@v1 92 | with: 93 | command: test 94 | 95 | rust-fmt: 96 | name: Format 97 | runs-on: ubuntu-latest 98 | steps: 99 | # Install 100 | - uses: actions/checkout@v1 101 | - name: Install Rust 102 | uses: actions-rs/toolchain@v1 103 | with: 104 | profile: minimal 105 | toolchain: stable 106 | override: true 107 | 108 | # Run 109 | - run: rustup component add rustfmt 110 | - name: Cargo fmt 111 | uses: actions-rs/cargo@v1 112 | with: 113 | command: fmt 114 | args: --all -- --check 115 | 116 | rust-clippy: 117 | name: Clippy 118 | runs-on: ubuntu-latest 119 | steps: 120 | # Checkout 121 | - uses: actions/checkout@v1 122 | 123 | # Install 124 | - name: Install Rust 125 | uses: actions-rs/toolchain@v1 126 | with: 127 | profile: minimal 128 | toolchain: stable 129 | override: true 130 | 131 | # Cache 132 | - name: Cache cargo registry 133 | uses: actions/cache@v1 134 | with: 135 | path: ~/.cargo/registry 136 | key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} 137 | - name: Cache cargo index 138 | uses: actions/cache@v1 139 | with: 140 | path: ~/.cargo/git 141 | key: ${{ runner.os }}-cargo-index-${{ hashFiles('**/Cargo.lock') }} 142 | - name: Cache target dir 143 | uses: actions/cache@v1 144 | with: 145 | path: target 146 | key: ${{ runner.os }}-cargo-target-clippy-${{ hashFiles('**/Cargo.lock') }} 147 | 148 | # Run 149 | - run: rustup component add clippy 150 | - name: Cargo clippy 151 | uses: actions-rs/cargo@v1 152 | with: 153 | command: clippy 154 | args: -- -D warnings 155 | 156 | rust-doc: 157 | name: Doc 158 | runs-on: ubuntu-latest 159 | steps: 160 | # Checkout 161 | - uses: actions/checkout@v1 162 | 163 | # Install 164 | - name: Install Rust 165 | uses: actions-rs/toolchain@v1 166 | with: 167 | profile: minimal 168 | toolchain: nightly 169 | override: true 170 | 171 | # Cache 172 | - name: Cache cargo registry 173 | uses: actions/cache@v1 174 | with: 175 | path: ~/.cargo/registry 176 | key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} 177 | - name: Cache cargo index 178 | uses: actions/cache@v1 179 | with: 180 | path: ~/.cargo/git 181 | key: ${{ runner.os }}-cargo-index-${{ hashFiles('**/Cargo.lock') }} 182 | - name: Cache target dir 183 | uses: actions/cache@v1 184 | with: 185 | path: target 186 | key: ${{ runner.os }}-cargo-target-doc-${{ hashFiles('**/Cargo.lock') }} 187 | 188 | # Run 189 | - name: Cargo doc 190 | uses: actions-rs/cargo@v1 191 | env: 192 | RUSTFLAGS: -D warnings 193 | with: 194 | command: doc 195 | args: --no-deps 196 | -------------------------------------------------------------------------------- /benches/benchmarks/max.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, BatchSize, BenchmarkId, Criterion, Throughput}; 2 | use std::collections::{BTreeSet, BinaryHeap, HashSet}; 3 | 4 | use crate::util::{ 5 | self, 6 | hash::{HashbrownSet, IdentityHashSet, IdentityHashbrownSet}, 7 | Bool256, Rand, 8 | }; 9 | use byte_set::ByteSet; 10 | 11 | pub fn benches(criterion: &mut Criterion) { 12 | let mut group = criterion.benchmark_group("Max"); 13 | 14 | let mut rng = rand::thread_rng(); 15 | 16 | for &size in util::SIZES { 17 | group.throughput(Throughput::Bytes(size as u64)); 18 | 19 | group.bench_function(BenchmarkId::new("ByteSet", size), |b| { 20 | b.iter_batched_ref( 21 | || black_box(ByteSet::rand_len(size, &mut rng)), 22 | |byte_set| { 23 | black_box(byte_set.last()); 24 | }, 25 | BatchSize::SmallInput, 26 | ) 27 | }); 28 | 29 | group.bench_function(BenchmarkId::new("[bool; 256]", size), |b| { 30 | b.iter_batched_ref( 31 | || black_box(Bool256::rand_len(size, &mut rng)), 32 | |bool256| { 33 | black_box(bool256.max()); 34 | }, 35 | BatchSize::SmallInput, 36 | ) 37 | }); 38 | 39 | let range_inclusive = black_box(0u8..=util::saturating_cast(size)); 40 | group.bench_with_input( 41 | BenchmarkId::new("RangeInclusive", size), 42 | &range_inclusive, 43 | |b, range_inclusive| { 44 | b.iter(|| { 45 | black_box(range_inclusive.end()); 46 | }) 47 | }, 48 | ); 49 | 50 | group.bench_function(BenchmarkId::new("HashSet", size), |b| { 51 | b.iter_batched_ref( 52 | || black_box(HashSet::::rand_len(size, &mut rng)), 53 | |hash_set| { 54 | black_box(hash_set.iter().max()); 55 | }, 56 | BatchSize::SmallInput, 57 | ) 58 | }); 59 | 60 | group.bench_function( 61 | BenchmarkId::new("HashSet (Identity Hash)", size), 62 | |b| { 63 | b.iter_batched_ref( 64 | || { 65 | black_box(IdentityHashSet::::rand_len( 66 | size, &mut rng, 67 | )) 68 | }, 69 | |hash_set| { 70 | black_box(hash_set.iter().max()); 71 | }, 72 | BatchSize::SmallInput, 73 | ) 74 | }, 75 | ); 76 | 77 | group.bench_function( 78 | BenchmarkId::new("hashbrown::HashSet", size), 79 | |b| { 80 | b.iter_batched_ref( 81 | || black_box(HashbrownSet::::rand_len(size, &mut rng)), 82 | |hash_set| { 83 | black_box(hash_set.iter().max()); 84 | }, 85 | BatchSize::SmallInput, 86 | ) 87 | }, 88 | ); 89 | 90 | group.bench_function( 91 | BenchmarkId::new("hashbrown::HashSet (Identity Hash)", size), 92 | |b| { 93 | b.iter_batched_ref( 94 | || { 95 | black_box(IdentityHashbrownSet::::rand_len( 96 | size, &mut rng, 97 | )) 98 | }, 99 | |hash_set| { 100 | black_box(hash_set.iter().max()); 101 | }, 102 | BatchSize::SmallInput, 103 | ) 104 | }, 105 | ); 106 | 107 | group.bench_function( 108 | BenchmarkId::new("fixedbitset::FixedBitSet", size), 109 | |b| { 110 | b.iter_batched_ref( 111 | || { 112 | black_box(fixedbitset::FixedBitSet::rand_len( 113 | size, &mut rng, 114 | )) 115 | }, 116 | |fixed_bit_set| { 117 | // The `.ones()` iterator does not have `.next_back()`. 118 | black_box(fixed_bit_set.ones().last()); 119 | }, 120 | BatchSize::SmallInput, 121 | ) 122 | }, 123 | ); 124 | 125 | group.bench_function(BenchmarkId::new("BTreeSet", size), |b| { 126 | b.iter_batched_ref( 127 | || black_box(BTreeSet::::rand_len(size, &mut rng)), 128 | |btree_set| { 129 | // The `last` method is nightly-only: 130 | // https://github.com/rust-lang/rust/issues/62924 131 | black_box(btree_set.iter().next_back()); 132 | }, 133 | BatchSize::SmallInput, 134 | ) 135 | }); 136 | 137 | group.bench_function(BenchmarkId::new("Vec", size), |b| { 138 | b.iter_batched_ref( 139 | || black_box(Vec::::rand_len(size, &mut rng)), 140 | |vec| { 141 | black_box(vec.iter().max()); 142 | }, 143 | BatchSize::SmallInput, 144 | ) 145 | }); 146 | 147 | group.bench_function(BenchmarkId::new("BinaryHeap", size), |b| { 148 | b.iter_batched_ref( 149 | // `Reverse` is required here because `peek` returns the max value. 150 | || black_box(BinaryHeap::::rand_len(size, &mut rng)), 151 | |binary_heap| { 152 | black_box(binary_heap.peek()); 153 | }, 154 | BatchSize::SmallInput, 155 | ) 156 | }); 157 | } 158 | 159 | group.finish(); 160 | } 161 | -------------------------------------------------------------------------------- /benches/benchmarks/extend_slice.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, BatchSize, BenchmarkId, Criterion, Throughput}; 2 | use std::collections::{BTreeSet, BinaryHeap, HashSet}; 3 | 4 | use crate::util::{ 5 | self, 6 | hash::{HashbrownSet, IdentityHashSet, IdentityHashbrownSet}, 7 | rand::shuffled_bytes, 8 | Bool256, 9 | }; 10 | use byte_set::ByteSet; 11 | 12 | pub fn benches(criterion: &mut Criterion) { 13 | let mut group = criterion.benchmark_group("Extend (Slice)"); 14 | 15 | let mut rng = rand::thread_rng(); 16 | 17 | for &size in util::SIZES { 18 | group.throughput(Throughput::Bytes(size as u64)); 19 | 20 | group.bench_function(BenchmarkId::new("ByteSet", size), |b| { 21 | b.iter_batched_ref( 22 | || { 23 | let bytes = shuffled_bytes(&mut rng); 24 | black_box((bytes, ByteSet::new())) 25 | }, 26 | |(bytes, byte_set)| { 27 | byte_set.extend(&bytes[..size]); 28 | black_box(byte_set); 29 | }, 30 | BatchSize::SmallInput, 31 | ) 32 | }); 33 | 34 | group.bench_function(BenchmarkId::new("[bool; 256]", size), |b| { 35 | b.iter_batched_ref( 36 | || { 37 | let bytes = shuffled_bytes(&mut rng); 38 | black_box((bytes, Bool256::new())) 39 | }, 40 | |(bytes, bool256)| { 41 | bool256.extend(&bytes[..size]); 42 | black_box(bool256); 43 | }, 44 | BatchSize::SmallInput, 45 | ) 46 | }); 47 | 48 | group.bench_function(BenchmarkId::new("HashSet", size), |b| { 49 | b.iter_batched_ref( 50 | || { 51 | let bytes = shuffled_bytes(&mut rng); 52 | black_box((bytes, HashSet::::new())) 53 | }, 54 | |(bytes, hash_set)| { 55 | hash_set.extend(&bytes[..size]); 56 | black_box(hash_set); 57 | }, 58 | BatchSize::SmallInput, 59 | ) 60 | }); 61 | 62 | group.bench_function( 63 | BenchmarkId::new("HashSet (Identity Hash)", size), 64 | |b| { 65 | b.iter_batched_ref( 66 | || { 67 | let bytes = shuffled_bytes(&mut rng); 68 | black_box((bytes, IdentityHashSet::::default())) 69 | }, 70 | |(bytes, hash_set)| { 71 | hash_set.extend(&bytes[..size]); 72 | black_box(hash_set); 73 | }, 74 | BatchSize::SmallInput, 75 | ) 76 | }, 77 | ); 78 | 79 | group.bench_function( 80 | BenchmarkId::new("hashbrown::HashSet", size), 81 | |b| { 82 | b.iter_batched_ref( 83 | || { 84 | let bytes = shuffled_bytes(&mut rng); 85 | black_box((bytes, HashbrownSet::::new())) 86 | }, 87 | |(bytes, hash_set)| { 88 | hash_set.extend(&bytes[..size]); 89 | black_box(hash_set); 90 | }, 91 | BatchSize::SmallInput, 92 | ) 93 | }, 94 | ); 95 | 96 | group.bench_function( 97 | BenchmarkId::new("IdentityHashbrownSet (Identity Hash)", size), 98 | |b| { 99 | b.iter_batched_ref( 100 | || { 101 | let bytes = shuffled_bytes(&mut rng); 102 | black_box(( 103 | bytes, 104 | IdentityHashbrownSet::::default(), 105 | )) 106 | }, 107 | |(bytes, hash_set)| { 108 | hash_set.extend(&bytes[..size]); 109 | black_box(hash_set); 110 | }, 111 | BatchSize::SmallInput, 112 | ) 113 | }, 114 | ); 115 | 116 | group.bench_function(BenchmarkId::new("BTreeSet", size), |b| { 117 | b.iter_batched_ref( 118 | || { 119 | let bytes = shuffled_bytes(&mut rng); 120 | black_box((bytes, BTreeSet::::new())) 121 | }, 122 | |(bytes, btree_set)| { 123 | btree_set.extend(&bytes[..size]); 124 | black_box(btree_set); 125 | }, 126 | BatchSize::SmallInput, 127 | ) 128 | }); 129 | 130 | group.bench_function(BenchmarkId::new("Vec", size), |b| { 131 | b.iter_batched_ref( 132 | || { 133 | let bytes = shuffled_bytes(&mut rng); 134 | black_box((bytes, Vec::::new())) 135 | }, 136 | |(bytes, vec)| { 137 | vec.extend(&bytes[..size]); 138 | black_box(vec); 139 | }, 140 | BatchSize::SmallInput, 141 | ) 142 | }); 143 | 144 | group.bench_function(BenchmarkId::new("BinaryHeap", size), |b| { 145 | b.iter_batched_ref( 146 | || { 147 | let bytes = shuffled_bytes(&mut rng); 148 | black_box((bytes, BinaryHeap::::new())) 149 | }, 150 | |(bytes, binary_heap)| { 151 | binary_heap.extend(&bytes[..size]); 152 | black_box(binary_heap); 153 | }, 154 | BatchSize::SmallInput, 155 | ) 156 | }); 157 | } 158 | 159 | group.finish(); 160 | } 161 | -------------------------------------------------------------------------------- /benches/benchmarks/min.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, BatchSize, BenchmarkId, Criterion, Throughput}; 2 | use std::collections::{BTreeSet, BinaryHeap, HashSet}; 3 | 4 | use crate::util::{ 5 | self, 6 | hash::{HashbrownSet, IdentityHashSet, IdentityHashbrownSet}, 7 | Bool256, Rand, 8 | }; 9 | use byte_set::ByteSet; 10 | 11 | pub fn benches(criterion: &mut Criterion) { 12 | let mut group = criterion.benchmark_group("Min"); 13 | 14 | let mut rng = rand::thread_rng(); 15 | 16 | for &size in util::SIZES { 17 | group.throughput(Throughput::Bytes(size as u64)); 18 | 19 | group.bench_function(BenchmarkId::new("ByteSet", size), |b| { 20 | b.iter_batched_ref( 21 | || black_box(ByteSet::rand_len(size, &mut rng)), 22 | |byte_set| { 23 | black_box(byte_set.first()); 24 | }, 25 | BatchSize::SmallInput, 26 | ) 27 | }); 28 | 29 | group.bench_function(BenchmarkId::new("[bool; 256]", size), |b| { 30 | b.iter_batched_ref( 31 | || black_box(Bool256::rand_len(size, &mut rng)), 32 | |bool256| { 33 | black_box(bool256.min()); 34 | }, 35 | BatchSize::SmallInput, 36 | ) 37 | }); 38 | 39 | let range_inclusive = black_box(0u8..=util::saturating_cast(size)); 40 | group.bench_with_input( 41 | BenchmarkId::new("RangeInclusive", size), 42 | &range_inclusive, 43 | |b, range_inclusive| { 44 | b.iter(|| { 45 | black_box(range_inclusive.start()); 46 | }) 47 | }, 48 | ); 49 | 50 | group.bench_function(BenchmarkId::new("HashSet", size), |b| { 51 | b.iter_batched_ref( 52 | || black_box(HashSet::::rand_len(size, &mut rng)), 53 | |hash_set| { 54 | black_box(hash_set.iter().min()); 55 | }, 56 | BatchSize::SmallInput, 57 | ) 58 | }); 59 | 60 | group.bench_function( 61 | BenchmarkId::new("HashSet (Identity Hash)", size), 62 | |b| { 63 | b.iter_batched_ref( 64 | || { 65 | black_box(IdentityHashSet::::rand_len( 66 | size, &mut rng, 67 | )) 68 | }, 69 | |hash_set| { 70 | black_box(hash_set.iter().min()); 71 | }, 72 | BatchSize::SmallInput, 73 | ) 74 | }, 75 | ); 76 | 77 | group.bench_function( 78 | BenchmarkId::new("hashbrown::HashSet", size), 79 | |b| { 80 | b.iter_batched_ref( 81 | || black_box(HashbrownSet::::rand_len(size, &mut rng)), 82 | |hash_set| { 83 | black_box(hash_set.iter().min()); 84 | }, 85 | BatchSize::SmallInput, 86 | ) 87 | }, 88 | ); 89 | 90 | group.bench_function( 91 | BenchmarkId::new("hashbrown::HashSet (Identity Hash)", size), 92 | |b| { 93 | b.iter_batched_ref( 94 | || { 95 | black_box(IdentityHashbrownSet::::rand_len( 96 | size, &mut rng, 97 | )) 98 | }, 99 | |hash_set| { 100 | black_box(hash_set.iter().min()); 101 | }, 102 | BatchSize::SmallInput, 103 | ) 104 | }, 105 | ); 106 | 107 | group.bench_function( 108 | BenchmarkId::new("fixedbitset::FixedBitSet", size), 109 | |b| { 110 | b.iter_batched_ref( 111 | || { 112 | black_box(fixedbitset::FixedBitSet::rand_len( 113 | size, &mut rng, 114 | )) 115 | }, 116 | |fixed_bit_set| { 117 | black_box(fixed_bit_set.ones().next()); 118 | }, 119 | BatchSize::SmallInput, 120 | ) 121 | }, 122 | ); 123 | 124 | group.bench_function( 125 | BenchmarkId::new("fixedbitset::FixedBitSet", size), 126 | |b| { 127 | b.iter_batched_ref( 128 | || { 129 | black_box(fixedbitset::FixedBitSet::rand_len( 130 | size, &mut rng, 131 | )) 132 | }, 133 | |fixed_bit_set| { 134 | black_box(fixed_bit_set.ones().next()); 135 | }, 136 | BatchSize::SmallInput, 137 | ) 138 | }, 139 | ); 140 | 141 | group.bench_function(BenchmarkId::new("BTreeSet", size), |b| { 142 | b.iter_batched_ref( 143 | || black_box(BTreeSet::::rand_len(size, &mut rng)), 144 | |btree_set| { 145 | // The `first` method is nightly-only: 146 | // https://github.com/rust-lang/rust/issues/62924 147 | black_box(btree_set.iter().next()); 148 | }, 149 | BatchSize::SmallInput, 150 | ) 151 | }); 152 | 153 | group.bench_function(BenchmarkId::new("Vec", size), |b| { 154 | b.iter_batched_ref( 155 | || black_box(Vec::::rand_len(size, &mut rng)), 156 | |vec| { 157 | black_box(vec.iter().min()); 158 | }, 159 | BatchSize::SmallInput, 160 | ) 161 | }); 162 | 163 | group.bench_function(BenchmarkId::new("BinaryHeap", size), |b| { 164 | b.iter_batched_ref( 165 | // `Reverse` is required here because `peek` returns the max value. 166 | || black_box(BinaryHeap::::rand_len(size, &mut rng)), 167 | |binary_heap| { 168 | black_box(binary_heap.iter().min()); 169 | }, 170 | BatchSize::SmallInput, 171 | ) 172 | }); 173 | } 174 | 175 | group.finish(); 176 | } 177 | -------------------------------------------------------------------------------- /benches/benchmarks/contains_cached.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, BatchSize, BenchmarkId, Criterion, Throughput}; 2 | use rand::Rng; 3 | use std::collections::{BTreeSet, HashSet}; 4 | 5 | use crate::util::{ 6 | self, 7 | hash::{HashbrownSet, IdentityHashSet, IdentityHashbrownSet}, 8 | Bool256, Rand, 9 | }; 10 | use byte_set::ByteSet; 11 | 12 | pub fn benches(criterion: &mut Criterion) { 13 | let mut group = criterion.benchmark_group("Contains (Cached)"); 14 | 15 | let mut rng = rand::thread_rng(); 16 | 17 | let mut input = [0u8; 256]; 18 | for i in 0..=u8::max_value() { 19 | input[i as usize] = i; 20 | } 21 | 22 | for &size in util::SIZES { 23 | group.throughput(Throughput::Bytes(size as u64)); 24 | 25 | let byte_set = ByteSet::rand_len(size, &mut rng); 26 | group.bench_with_input( 27 | BenchmarkId::new("ByteSet", size), 28 | &byte_set, 29 | |b, byte_set| { 30 | b.iter_batched( 31 | || rng.gen::(), 32 | |byte| { 33 | black_box(byte_set.contains(byte)); 34 | }, 35 | BatchSize::SmallInput, 36 | ) 37 | }, 38 | ); 39 | 40 | let bool256 = Bool256::rand_len(size, &mut rng); 41 | group.bench_with_input( 42 | BenchmarkId::new("[bool; 256]", size), 43 | &bool256, 44 | |b, bool256| { 45 | b.iter_batched( 46 | || rng.gen::(), 47 | |byte| { 48 | black_box(bool256.contains(byte)); 49 | }, 50 | BatchSize::SmallInput, 51 | ) 52 | }, 53 | ); 54 | 55 | let range_inclusive = black_box(0u8..=util::saturating_cast(size)); 56 | group.bench_with_input( 57 | BenchmarkId::new("RangeInclusive", size), 58 | &range_inclusive, 59 | |b, range_inclusive| { 60 | b.iter_batched( 61 | || rng.gen::(), 62 | |byte| { 63 | black_box(range_inclusive.contains(&byte)); 64 | }, 65 | BatchSize::SmallInput, 66 | ) 67 | }, 68 | ); 69 | 70 | let hash_set = HashSet::::rand_len(size, &mut rng); 71 | group.bench_with_input( 72 | BenchmarkId::new("HashSet", size), 73 | &hash_set, 74 | |b, hash_set| { 75 | b.iter_batched( 76 | || rng.gen::(), 77 | |byte| { 78 | black_box(hash_set.contains(&byte)); 79 | }, 80 | BatchSize::SmallInput, 81 | ) 82 | }, 83 | ); 84 | 85 | let hash_set = IdentityHashSet::::rand_len(size, &mut rng); 86 | group.bench_with_input( 87 | BenchmarkId::new("HashSet (Identity Hash)", size), 88 | &hash_set, 89 | |b, hash_set| { 90 | b.iter_batched( 91 | || rng.gen::(), 92 | |byte| { 93 | black_box(hash_set.contains(&byte)); 94 | }, 95 | BatchSize::SmallInput, 96 | ) 97 | }, 98 | ); 99 | 100 | let hash_set = HashbrownSet::::rand_len(size, &mut rng); 101 | group.bench_with_input( 102 | BenchmarkId::new("hashbrown::HashSet", size), 103 | &hash_set, 104 | |b, hash_set| { 105 | b.iter_batched( 106 | || rng.gen::(), 107 | |byte| { 108 | black_box(hash_set.contains(&byte)); 109 | }, 110 | BatchSize::SmallInput, 111 | ) 112 | }, 113 | ); 114 | 115 | let hash_set = IdentityHashbrownSet::::rand_len(size, &mut rng); 116 | group.bench_with_input( 117 | BenchmarkId::new("hashbrown::HashSet (Identity Hash)", size), 118 | &hash_set, 119 | |b, hash_set| { 120 | b.iter_batched( 121 | || rng.gen::(), 122 | |byte| { 123 | black_box(hash_set.contains(&byte)); 124 | }, 125 | BatchSize::SmallInput, 126 | ) 127 | }, 128 | ); 129 | 130 | let fixed_bit_set = fixedbitset::FixedBitSet::rand_len(size, &mut rng); 131 | group.bench_with_input( 132 | BenchmarkId::new("fixedbitset::FixedBitSet", size), 133 | &fixed_bit_set, 134 | |b, fixed_bit_set| { 135 | b.iter_batched( 136 | || rng.gen::(), 137 | |byte| { 138 | black_box(fixed_bit_set.contains(byte as usize)); 139 | }, 140 | BatchSize::SmallInput, 141 | ) 142 | }, 143 | ); 144 | 145 | let btree_set = BTreeSet::::rand_len(size, &mut rng); 146 | group.bench_with_input( 147 | BenchmarkId::new("BTreeSet", size), 148 | &btree_set, 149 | |b, btree_set| { 150 | b.iter_batched( 151 | || rng.gen::(), 152 | |byte| { 153 | black_box(btree_set.contains(&byte)); 154 | }, 155 | BatchSize::SmallInput, 156 | ) 157 | }, 158 | ); 159 | 160 | let vec = Vec::::rand_len(size, &mut rng); 161 | group.bench_with_input( 162 | BenchmarkId::new("Vec", size), 163 | &vec, 164 | |b, vec| { 165 | b.iter_batched( 166 | || rng.gen::(), 167 | |byte| { 168 | black_box(vec.contains(&byte)); 169 | }, 170 | BatchSize::SmallInput, 171 | ) 172 | }, 173 | ); 174 | 175 | let vec = { 176 | let mut vec = Vec::::rand_len(size, &mut rng); 177 | vec.sort_unstable(); 178 | vec 179 | }; 180 | group.bench_with_input( 181 | BenchmarkId::new("Vec (Binary Search)", size), 182 | &vec, 183 | |b, vec| { 184 | b.iter_batched( 185 | || rng.gen::(), 186 | |byte| { 187 | black_box(vec.binary_search(&byte).is_ok()); 188 | }, 189 | BatchSize::SmallInput, 190 | ) 191 | }, 192 | ); 193 | } 194 | 195 | group.finish(); 196 | } 197 | -------------------------------------------------------------------------------- /benches/benchmarks/contains_random.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, BatchSize, BenchmarkId, Criterion, Throughput}; 2 | use rand::Rng; 3 | use std::collections::{BTreeSet, HashSet}; 4 | 5 | use crate::util::{ 6 | self, 7 | hash::{HashbrownSet, IdentityHashSet, IdentityHashbrownSet}, 8 | Bool256, Rand, 9 | }; 10 | use byte_set::ByteSet; 11 | 12 | pub fn benches(criterion: &mut Criterion) { 13 | let mut group = criterion.benchmark_group("Contains (Random)"); 14 | 15 | let mut rng = rand::thread_rng(); 16 | 17 | for &size in util::SIZES { 18 | group.throughput(Throughput::Bytes(size as u64)); 19 | 20 | group.bench_function(BenchmarkId::new("ByteSet", size), |b| { 21 | b.iter_batched( 22 | || { 23 | black_box(( 24 | rng.gen::(), 25 | ByteSet::rand_len(size, &mut rng), 26 | )) 27 | }, 28 | |(byte, byte_set)| { 29 | black_box(byte_set.contains(byte)); 30 | }, 31 | BatchSize::SmallInput, 32 | ) 33 | }); 34 | 35 | group.bench_function(BenchmarkId::new("[bool; 256]", size), |b| { 36 | b.iter_batched( 37 | || { 38 | black_box(( 39 | rng.gen::(), 40 | Bool256::rand_len(size, &mut rng), 41 | )) 42 | }, 43 | |(byte, bool256)| { 44 | black_box(bool256.contains(byte)); 45 | }, 46 | BatchSize::SmallInput, 47 | ) 48 | }); 49 | 50 | let range_inclusive = black_box(0u8..=util::saturating_cast(size)); 51 | group.bench_with_input( 52 | BenchmarkId::new("RangeInclusive", size), 53 | &range_inclusive, 54 | |b, range_inclusive| { 55 | b.iter_batched( 56 | || rng.gen::(), 57 | |byte| { 58 | black_box(range_inclusive.contains(&byte)); 59 | }, 60 | BatchSize::SmallInput, 61 | ) 62 | }, 63 | ); 64 | 65 | group.bench_function(BenchmarkId::new("HashSet", size), |b| { 66 | b.iter_batched_ref( 67 | || { 68 | black_box(( 69 | rng.gen::(), 70 | HashSet::::rand_len(size, &mut rng), 71 | )) 72 | }, 73 | |(byte, hash_set)| { 74 | black_box(hash_set.contains(byte)); 75 | }, 76 | BatchSize::SmallInput, 77 | ) 78 | }); 79 | 80 | group.bench_function( 81 | BenchmarkId::new("HashSet (Identity Hash)", size), 82 | |b| { 83 | b.iter_batched_ref( 84 | || { 85 | black_box(( 86 | rng.gen::(), 87 | IdentityHashSet::::rand_len(size, &mut rng), 88 | )) 89 | }, 90 | |(byte, hash_set)| { 91 | black_box(hash_set.contains(byte)); 92 | }, 93 | BatchSize::SmallInput, 94 | ) 95 | }, 96 | ); 97 | 98 | group.bench_function( 99 | BenchmarkId::new("hashbrown::HashSet", size), 100 | |b| { 101 | b.iter_batched_ref( 102 | || { 103 | black_box(( 104 | rng.gen::(), 105 | HashbrownSet::::rand_len(size, &mut rng), 106 | )) 107 | }, 108 | |(byte, hash_set)| { 109 | black_box(hash_set.contains(byte)); 110 | }, 111 | BatchSize::SmallInput, 112 | ) 113 | }, 114 | ); 115 | 116 | group.bench_function( 117 | BenchmarkId::new("hashbrown::HashSet (Identity Hash)", size), 118 | |b| { 119 | b.iter_batched_ref( 120 | || { 121 | black_box(( 122 | rng.gen::(), 123 | IdentityHashbrownSet::::rand_len( 124 | size, &mut rng, 125 | ), 126 | )) 127 | }, 128 | |(byte, hash_set)| { 129 | black_box(hash_set.contains(byte)); 130 | }, 131 | BatchSize::SmallInput, 132 | ) 133 | }, 134 | ); 135 | 136 | group.bench_function( 137 | BenchmarkId::new("fixedbitset::FixedBitSet", size), 138 | |b| { 139 | b.iter_batched_ref( 140 | || { 141 | black_box(( 142 | rng.gen::(), 143 | fixedbitset::FixedBitSet::rand_len(size, &mut rng), 144 | )) 145 | }, 146 | |(byte, fixed_bit_set)| { 147 | black_box(fixed_bit_set.contains(*byte as usize)); 148 | }, 149 | BatchSize::SmallInput, 150 | ) 151 | }, 152 | ); 153 | 154 | group.bench_function(BenchmarkId::new("BTreeSet", size), |b| { 155 | b.iter_batched_ref( 156 | || { 157 | black_box(( 158 | rng.gen::(), 159 | BTreeSet::::rand_len(size, &mut rng), 160 | )) 161 | }, 162 | |(byte, btree_set)| { 163 | black_box(btree_set.contains(byte)); 164 | }, 165 | BatchSize::SmallInput, 166 | ) 167 | }); 168 | 169 | group.bench_function(BenchmarkId::new("Vec", size), |b| { 170 | b.iter_batched_ref( 171 | || { 172 | black_box(( 173 | rng.gen::(), 174 | Vec::::rand_len(size, &mut rng), 175 | )) 176 | }, 177 | |(byte, vec)| { 178 | black_box(vec.contains(byte)); 179 | }, 180 | BatchSize::SmallInput, 181 | ) 182 | }); 183 | 184 | group.bench_function( 185 | BenchmarkId::new("Vec (Binary Search)", size), 186 | |b| { 187 | b.iter_batched_ref( 188 | || { 189 | let mut vec = Vec::::rand_len(size, &mut rng); 190 | vec.sort_unstable(); 191 | 192 | black_box((rng.gen::(), vec)) 193 | }, 194 | |(byte, vec)| { 195 | black_box(vec.binary_search(&byte).is_ok()); 196 | }, 197 | BatchSize::SmallInput, 198 | ) 199 | }, 200 | ); 201 | } 202 | 203 | group.finish(); 204 | } 205 | -------------------------------------------------------------------------------- /benches/benchmarks/insert.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, BatchSize, BenchmarkId, Criterion, Throughput}; 2 | use std::collections::{BTreeSet, BinaryHeap, HashSet}; 3 | 4 | use crate::util::{ 5 | self, 6 | hash::{HashbrownSet, IdentityHashSet, IdentityHashbrownSet}, 7 | rand::shuffled_bytes, 8 | Bool256, 9 | }; 10 | use byte_set::ByteSet; 11 | 12 | pub fn benches(criterion: &mut Criterion) { 13 | let mut group = criterion.benchmark_group("Insert"); 14 | 15 | let mut rng = rand::thread_rng(); 16 | 17 | for &size in util::SIZES { 18 | group.throughput(Throughput::Bytes(size as u64)); 19 | 20 | group.bench_function(BenchmarkId::new("ByteSet", size), |b| { 21 | b.iter_batched_ref( 22 | || { 23 | let bytes = shuffled_bytes(&mut rng); 24 | black_box((bytes, ByteSet::new())) 25 | }, 26 | |(bytes, byte_set)| { 27 | for &byte in &bytes[..size] { 28 | byte_set.insert(byte); 29 | } 30 | black_box(byte_set); 31 | }, 32 | BatchSize::SmallInput, 33 | ) 34 | }); 35 | 36 | group.bench_function(BenchmarkId::new("[bool; 256]", size), |b| { 37 | b.iter_batched_ref( 38 | || { 39 | let bytes = shuffled_bytes(&mut rng); 40 | black_box((bytes, Bool256::new())) 41 | }, 42 | |(bytes, bool256)| { 43 | for &byte in &bytes[..size] { 44 | bool256.insert(byte); 45 | } 46 | black_box(bool256); 47 | }, 48 | BatchSize::SmallInput, 49 | ) 50 | }); 51 | 52 | group.bench_function(BenchmarkId::new("HashSet", size), |b| { 53 | b.iter_batched_ref( 54 | || { 55 | let bytes = shuffled_bytes(&mut rng); 56 | black_box((bytes, HashSet::::new())) 57 | }, 58 | |(bytes, hash_set)| { 59 | for &byte in &bytes[..size] { 60 | hash_set.insert(byte); 61 | } 62 | black_box(hash_set); 63 | }, 64 | BatchSize::SmallInput, 65 | ) 66 | }); 67 | 68 | group.bench_function( 69 | BenchmarkId::new("HashSet (Identity Hash)", size), 70 | |b| { 71 | b.iter_batched_ref( 72 | || { 73 | let bytes = shuffled_bytes(&mut rng); 74 | black_box((bytes, IdentityHashSet::::default())) 75 | }, 76 | |(bytes, hash_set)| { 77 | for &byte in &bytes[..size] { 78 | hash_set.insert(byte); 79 | } 80 | black_box(hash_set); 81 | }, 82 | BatchSize::SmallInput, 83 | ) 84 | }, 85 | ); 86 | 87 | group.bench_function( 88 | BenchmarkId::new("hashbrown::HashSet", size), 89 | |b| { 90 | b.iter_batched_ref( 91 | || { 92 | let bytes = shuffled_bytes(&mut rng); 93 | black_box((bytes, HashbrownSet::::new())) 94 | }, 95 | |(bytes, hash_set)| { 96 | for &byte in &bytes[..size] { 97 | hash_set.insert(byte); 98 | } 99 | black_box(hash_set); 100 | }, 101 | BatchSize::SmallInput, 102 | ) 103 | }, 104 | ); 105 | 106 | group.bench_function( 107 | BenchmarkId::new("hashbrown::HashSet (Identity Hash)", size), 108 | |b| { 109 | b.iter_batched_ref( 110 | || { 111 | let bytes = shuffled_bytes(&mut rng); 112 | black_box(( 113 | bytes, 114 | IdentityHashbrownSet::::default(), 115 | )) 116 | }, 117 | |(bytes, hash_set)| { 118 | for &byte in &bytes[..size] { 119 | hash_set.insert(byte); 120 | } 121 | black_box(hash_set); 122 | }, 123 | BatchSize::SmallInput, 124 | ) 125 | }, 126 | ); 127 | 128 | group.bench_function( 129 | BenchmarkId::new("fixedbitset::FixedBitSet", size), 130 | |b| { 131 | b.iter_batched_ref( 132 | || { 133 | let bytes = shuffled_bytes(&mut rng); 134 | black_box((bytes, fixedbitset::FixedBitSet::default())) 135 | }, 136 | |(bytes, fixed_bit_set)| { 137 | for &byte in &bytes[..size] { 138 | let byte = byte as usize; 139 | fixed_bit_set.grow(byte); 140 | fixed_bit_set.insert(byte); 141 | } 142 | black_box(fixed_bit_set); 143 | }, 144 | BatchSize::SmallInput, 145 | ) 146 | }, 147 | ); 148 | 149 | group.bench_function(BenchmarkId::new("BTreeSet", size), |b| { 150 | b.iter_batched_ref( 151 | || { 152 | let bytes = shuffled_bytes(&mut rng); 153 | black_box((bytes, BTreeSet::::new())) 154 | }, 155 | |(bytes, btree_set)| { 156 | for &byte in &bytes[..size] { 157 | btree_set.insert(byte); 158 | } 159 | black_box(btree_set); 160 | }, 161 | BatchSize::SmallInput, 162 | ) 163 | }); 164 | 165 | group.bench_function(BenchmarkId::new("Vec", size), |b| { 166 | b.iter_batched_ref( 167 | || { 168 | let bytes = shuffled_bytes(&mut rng); 169 | black_box((bytes, Vec::::new())) 170 | }, 171 | |(bytes, vec)| { 172 | for &byte in &bytes[..size] { 173 | vec.push(byte); 174 | } 175 | black_box(vec); 176 | }, 177 | BatchSize::SmallInput, 178 | ) 179 | }); 180 | 181 | group.bench_function( 182 | BenchmarkId::new("Vec (Binary Search)", size), 183 | |b| { 184 | b.iter_batched_ref( 185 | || { 186 | let bytes = shuffled_bytes(&mut rng); 187 | black_box((bytes, Vec::::new())) 188 | }, 189 | |(bytes, vec)| { 190 | for byte in &bytes[..size] { 191 | if let Err(index) = vec.binary_search(byte) { 192 | vec.insert(index, *byte); 193 | } 194 | } 195 | black_box(vec); 196 | }, 197 | BatchSize::SmallInput, 198 | ) 199 | }, 200 | ); 201 | 202 | group.bench_function(BenchmarkId::new("BinaryHeap", size), |b| { 203 | b.iter_batched_ref( 204 | || { 205 | let bytes = shuffled_bytes(&mut rng); 206 | black_box((bytes, BinaryHeap::::new())) 207 | }, 208 | |(bytes, binary_heap)| { 209 | for &byte in &bytes[..size] { 210 | binary_heap.push(byte); 211 | } 212 | black_box(binary_heap); 213 | }, 214 | BatchSize::SmallInput, 215 | ) 216 | }); 217 | } 218 | 219 | group.finish(); 220 | } 221 | -------------------------------------------------------------------------------- /src/byte_set/traits.rs: -------------------------------------------------------------------------------- 1 | use super::ByteSet; 2 | use crate::Iter; 3 | use core::{cmp, fmt, hash, iter::FromIterator, ops}; 4 | 5 | #[cfg(any(test, feature = "std"))] 6 | use std::collections::HashSet; 7 | 8 | #[cfg(any(test, feature = "alloc"))] 9 | extern crate alloc; 10 | #[cfg(any(test, feature = "alloc"))] 11 | use alloc::collections::BTreeSet; 12 | 13 | impl Default for ByteSet { 14 | #[inline] 15 | fn default() -> Self { 16 | Self::new() 17 | } 18 | } 19 | 20 | impl From for ByteSet { 21 | #[inline] 22 | fn from(byte: u8) -> ByteSet { 23 | ByteSet::from_byte(byte) 24 | } 25 | } 26 | 27 | impl From<&[u8]> for ByteSet { 28 | #[inline] 29 | fn from(bytes: &[u8]) -> Self { 30 | let mut set = ByteSet::new(); 31 | set.extend(bytes); 32 | set 33 | } 34 | } 35 | 36 | impl From<&mut [u8]> for ByteSet { 37 | #[inline] 38 | fn from(bytes: &mut [u8]) -> Self { 39 | (bytes as &[u8]).into() 40 | } 41 | } 42 | 43 | impl From<&str> for ByteSet { 44 | #[inline] 45 | fn from(s: &str) -> Self { 46 | s.as_bytes().into() 47 | } 48 | } 49 | 50 | impl From<&mut str> for ByteSet { 51 | #[inline] 52 | fn from(s: &mut str) -> Self { 53 | (s as &str).into() 54 | } 55 | } 56 | 57 | impl From> for ByteSet { 58 | #[inline] 59 | fn from(range: ops::Range) -> Self { 60 | Self::from_range(range) 61 | } 62 | } 63 | 64 | impl From> for ByteSet { 65 | #[inline] 66 | fn from(range: ops::RangeTo) -> Self { 67 | Self::from_range_to(range) 68 | } 69 | } 70 | 71 | impl From> for ByteSet { 72 | #[inline] 73 | fn from(range: ops::RangeFrom) -> Self { 74 | Self::from_range_from(range) 75 | } 76 | } 77 | 78 | impl From> for ByteSet { 79 | #[inline] 80 | fn from(range: ops::RangeInclusive) -> Self { 81 | Self::from_range_inclusive(range) 82 | } 83 | } 84 | 85 | impl From> for ByteSet { 86 | #[inline] 87 | fn from(range: ops::RangeToInclusive) -> Self { 88 | Self::from_range_to_inclusive(range) 89 | } 90 | } 91 | 92 | impl From for ByteSet { 93 | #[inline] 94 | fn from(_: ops::RangeFull) -> Self { 95 | Self::full() 96 | } 97 | } 98 | 99 | impl Extend for ByteSet { 100 | fn extend>(&mut self, iter: T) { 101 | iter.into_iter().for_each(|byte| self.insert(byte)); 102 | } 103 | } 104 | 105 | impl<'a> Extend<&'a u8> for ByteSet { 106 | fn extend>(&mut self, iter: T) { 107 | self.extend(iter.into_iter().cloned()); 108 | } 109 | } 110 | 111 | impl FromIterator for ByteSet { 112 | fn from_iter>(iter: T) -> Self { 113 | // Make sure to use `insert` over `inserting` to not copy so many bytes 114 | // on each iteration. 115 | let mut set = ByteSet::new(); 116 | set.extend(iter); 117 | set 118 | } 119 | } 120 | 121 | impl<'a> FromIterator<&'a u8> for ByteSet { 122 | fn from_iter>(iter: T) -> Self { 123 | iter.into_iter().cloned().collect() 124 | } 125 | } 126 | 127 | impl IntoIterator for ByteSet { 128 | type Item = u8; 129 | type IntoIter = Iter; 130 | 131 | #[inline] 132 | fn into_iter(self) -> Self::IntoIter { 133 | self.into() 134 | } 135 | } 136 | 137 | impl fmt::Debug for ByteSet { 138 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 139 | f.debug_set().entries(*self).finish() 140 | } 141 | } 142 | 143 | impl PartialOrd for ByteSet { 144 | #[inline] 145 | fn partial_cmp(&self, other: &Self) -> Option { 146 | Some(self.cmp(other)) 147 | } 148 | } 149 | 150 | impl Ord for ByteSet { 151 | #[inline] 152 | fn cmp(&self, other: &Self) -> cmp::Ordering { 153 | // TODO: Optimize using internal representation while keeping semantics. 154 | // See https://github.com/nvzqz/byte-set-rs/issues/9. 155 | self.into_iter().cmp(other.into_iter()) 156 | } 157 | } 158 | 159 | #[allow(clippy::derive_hash_xor_eq)] 160 | impl hash::Hash for ByteSet { 161 | #[inline] 162 | fn hash(&self, state: &mut H) { 163 | self.as_raw_bytes().hash(state) 164 | } 165 | 166 | #[inline] 167 | fn hash_slice(data: &[Self], state: &mut H) { 168 | Self::slice_as_raw_bytes(data).hash(state) 169 | } 170 | } 171 | 172 | impl ops::Sub for ByteSet { 173 | type Output = Self; 174 | 175 | #[inline] 176 | fn sub(self, rhs: Self) -> Self::Output { 177 | self.removing_all(rhs) 178 | } 179 | } 180 | 181 | impl ops::SubAssign for ByteSet { 182 | #[inline] 183 | fn sub_assign(&mut self, rhs: Self) { 184 | self.remove_all(rhs); 185 | } 186 | } 187 | 188 | impl ops::BitAnd for ByteSet { 189 | type Output = Self; 190 | 191 | #[inline] 192 | fn bitand(self, rhs: Self) -> Self::Output { 193 | self.intersection(rhs) 194 | } 195 | } 196 | 197 | impl ops::BitAndAssign for ByteSet { 198 | #[inline] 199 | fn bitand_assign(&mut self, rhs: Self) { 200 | *self = *self & rhs; 201 | } 202 | } 203 | 204 | impl ops::BitOr for ByteSet { 205 | type Output = Self; 206 | 207 | #[inline] 208 | fn bitor(self, rhs: Self) -> Self::Output { 209 | self.inserting_all(rhs) 210 | } 211 | } 212 | 213 | impl ops::BitOrAssign for ByteSet { 214 | #[inline] 215 | fn bitor_assign(&mut self, rhs: Self) { 216 | self.insert_all(rhs); 217 | } 218 | } 219 | 220 | impl ops::BitXor for ByteSet { 221 | type Output = Self; 222 | 223 | #[inline] 224 | fn bitxor(self, rhs: Self) -> Self::Output { 225 | self.symmetric_difference(rhs) 226 | } 227 | } 228 | 229 | impl ops::BitXorAssign for ByteSet { 230 | #[inline] 231 | fn bitxor_assign(&mut self, rhs: Self) { 232 | *self = *self ^ rhs; 233 | } 234 | } 235 | 236 | impl ops::Not for ByteSet { 237 | type Output = Self; 238 | 239 | #[inline] 240 | fn not(self) -> Self::Output { 241 | ByteSet::not(self) 242 | } 243 | } 244 | 245 | #[cfg(any(test, feature = "std"))] 246 | impl PartialEq> for ByteSet { 247 | fn eq(&self, other: &HashSet) -> bool { 248 | if self.len() != other.len() { 249 | return false; 250 | } 251 | 252 | // Using `ByteSet::contains` instead of zipping the iterators because 253 | // it's much cheaper than iterating `ByteSet`. 254 | // 255 | // Although iterating over `HashSet` is slightly slower than `ByteSet`, 256 | // `HashSet::contains` is significantly slower. 257 | other.iter().all(|&byte| self.contains(byte)) 258 | } 259 | } 260 | 261 | #[cfg(any(test, feature = "alloc"))] 262 | impl PartialEq> for ByteSet { 263 | fn eq(&self, other: &BTreeSet) -> bool { 264 | if self.len() != other.len() { 265 | return false; 266 | } 267 | 268 | // Using `ByteSet::contains` instead of zipping the iterators because 269 | // it's much cheaper than iterating `ByteSet`. 270 | // 271 | // Although iterating over `BTreeSet` is slightly slower than `ByteSet`, 272 | // `BTreeSet::contains` is significantly slower. 273 | other.iter().all(|&byte| self.contains(byte)) 274 | } 275 | } 276 | 277 | #[cfg(any(test, feature = "alloc"))] 278 | impl PartialOrd> for ByteSet { 279 | fn partial_cmp(&self, other: &BTreeSet) -> Option { 280 | Some(self.into_iter().cmp(other.iter().cloned())) 281 | } 282 | } 283 | 284 | // Enables `rand::random::()`. 285 | #[cfg(feature = "rand")] 286 | #[cfg_attr(docsrs, doc(cfg(feature = "rand")))] 287 | impl rand::distributions::Distribution 288 | for rand::distributions::Standard 289 | { 290 | #[inline] 291 | fn sample(&self, rng: &mut R) -> ByteSet { 292 | ByteSet::rand(rng) 293 | } 294 | } 295 | 296 | #[cfg(feature = "serde")] 297 | #[cfg_attr(docsrs, doc(cfg(feature = "serde")))] 298 | impl serde::Serialize for ByteSet { 299 | fn serialize(&self, serializer: S) -> Result 300 | where 301 | S: serde::Serializer, 302 | { 303 | use serde::ser::SerializeSeq; 304 | 305 | let mut seq = serializer.serialize_seq(Some(self.len()))?; 306 | for byte in *self { 307 | seq.serialize_element(&byte)?; 308 | } 309 | seq.end() 310 | } 311 | } 312 | 313 | #[cfg(feature = "serde")] 314 | #[cfg_attr(docsrs, doc(cfg(feature = "serde")))] 315 | impl<'de> serde::Deserialize<'de> for ByteSet { 316 | fn deserialize(deserializer: D) -> Result 317 | where 318 | D: serde::Deserializer<'de>, 319 | { 320 | struct ByteSetVisitor; 321 | 322 | impl<'de> serde::de::Visitor<'de> for ByteSetVisitor { 323 | type Value = ByteSet; 324 | 325 | fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { 326 | f.write_str("a set of bytes") 327 | } 328 | 329 | fn visit_bytes(self, v: &[u8]) -> Result 330 | where 331 | E: serde::de::Error, 332 | { 333 | Ok(v.into()) 334 | } 335 | 336 | fn visit_seq(self, mut seq: A) -> Result 337 | where 338 | A: serde::de::SeqAccess<'de>, 339 | { 340 | let mut set = ByteSet::new(); 341 | while let Some(byte) = seq.next_element::()? { 342 | set.insert(byte); 343 | } 344 | Ok(set) 345 | } 346 | } 347 | 348 | deserializer.deserialize_seq(ByteSetVisitor) 349 | } 350 | } 351 | -------------------------------------------------------------------------------- /benches/benchmarks/remove_single.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, BatchSize, BenchmarkId, Criterion, Throughput}; 2 | use rand::{seq::SliceRandom, Rng}; 3 | use std::collections::{BTreeSet, HashSet}; 4 | 5 | use crate::util::{ 6 | self, 7 | hash::{HashbrownSet, IdentityHashSet, IdentityHashbrownSet}, 8 | rand::shuffled_bytes, 9 | Bool256, 10 | }; 11 | use byte_set::ByteSet; 12 | 13 | pub fn benches(criterion: &mut Criterion) { 14 | let mut group = criterion.benchmark_group("Remove (Single)"); 15 | 16 | let mut rng = rand::thread_rng(); 17 | 18 | for &size in util::SIZES { 19 | group.throughput(Throughput::Bytes(size as u64)); 20 | 21 | group.bench_function(BenchmarkId::new("ByteSet", size), |b| { 22 | b.iter_batched_ref( 23 | || { 24 | let bytes = shuffled_bytes(&mut rng); 25 | let bytes = &bytes[..size]; 26 | 27 | if let Some(&byte) = bytes.choose(&mut rng) { 28 | black_box((byte, bytes.iter().collect())) 29 | } else { 30 | black_box((rng.gen::(), ByteSet::new())) 31 | } 32 | }, 33 | |(byte, byte_set)| { 34 | byte_set.remove(*byte); 35 | black_box(byte_set); 36 | }, 37 | BatchSize::SmallInput, 38 | ) 39 | }); 40 | 41 | group.bench_function(BenchmarkId::new("[bool; 256]", size), |b| { 42 | b.iter_batched_ref( 43 | || { 44 | let bytes = shuffled_bytes(&mut rng); 45 | let bytes = &bytes[..size]; 46 | 47 | if let Some(&byte) = bytes.choose(&mut rng) { 48 | black_box((byte, bytes.iter().collect())) 49 | } else { 50 | black_box((rng.gen::(), Bool256::new())) 51 | } 52 | }, 53 | |(byte, bool256)| { 54 | bool256.remove(*byte); 55 | black_box(bool256); 56 | }, 57 | BatchSize::SmallInput, 58 | ) 59 | }); 60 | 61 | group.bench_function(BenchmarkId::new("HashSet", size), |b| { 62 | b.iter_batched_ref( 63 | || { 64 | let bytes = shuffled_bytes(&mut rng); 65 | let bytes = &bytes[..size]; 66 | 67 | if let Some(&byte) = bytes.choose(&mut rng) { 68 | black_box((byte, bytes.iter().cloned().collect())) 69 | } else { 70 | black_box((rng.gen::(), HashSet::::new())) 71 | } 72 | }, 73 | |(byte, hash_set)| { 74 | hash_set.remove(byte); 75 | black_box(hash_set); 76 | }, 77 | BatchSize::SmallInput, 78 | ) 79 | }); 80 | 81 | group.bench_function( 82 | BenchmarkId::new("HashSet (Identity Hash)", size), 83 | |b| { 84 | b.iter_batched_ref( 85 | || { 86 | let bytes = shuffled_bytes(&mut rng); 87 | let bytes = &bytes[..size]; 88 | 89 | if let Some(&byte) = bytes.choose(&mut rng) { 90 | black_box((byte, bytes.iter().cloned().collect())) 91 | } else { 92 | black_box(( 93 | rng.gen::(), 94 | IdentityHashSet::::default(), 95 | )) 96 | } 97 | }, 98 | |(byte, hash_set)| { 99 | hash_set.remove(byte); 100 | black_box(hash_set); 101 | }, 102 | BatchSize::SmallInput, 103 | ) 104 | }, 105 | ); 106 | 107 | group.bench_function( 108 | BenchmarkId::new("hashbrown::HashSet", size), 109 | |b| { 110 | b.iter_batched_ref( 111 | || { 112 | let bytes = shuffled_bytes(&mut rng); 113 | let bytes = &bytes[..size]; 114 | 115 | if let Some(&byte) = bytes.choose(&mut rng) { 116 | black_box((byte, bytes.iter().cloned().collect())) 117 | } else { 118 | black_box(( 119 | rng.gen::(), 120 | HashbrownSet::::new(), 121 | )) 122 | } 123 | }, 124 | |(byte, hash_set)| { 125 | hash_set.remove(byte); 126 | black_box(hash_set); 127 | }, 128 | BatchSize::SmallInput, 129 | ) 130 | }, 131 | ); 132 | 133 | group.bench_function( 134 | BenchmarkId::new("hashbrown::HashSet (Identity Hash)", size), 135 | |b| { 136 | b.iter_batched_ref( 137 | || { 138 | let bytes = shuffled_bytes(&mut rng); 139 | let bytes = &bytes[..size]; 140 | 141 | if let Some(&byte) = bytes.choose(&mut rng) { 142 | black_box((byte, bytes.iter().cloned().collect())) 143 | } else { 144 | black_box(( 145 | rng.gen::(), 146 | IdentityHashbrownSet::::default(), 147 | )) 148 | } 149 | }, 150 | |(byte, hash_set)| { 151 | hash_set.remove(byte); 152 | black_box(hash_set); 153 | }, 154 | BatchSize::SmallInput, 155 | ) 156 | }, 157 | ); 158 | 159 | group.bench_function( 160 | BenchmarkId::new("fixedbitset::FixedBitSet", size), 161 | |b| { 162 | b.iter_batched_ref( 163 | || { 164 | let bytes = shuffled_bytes(&mut rng); 165 | let bytes = &bytes[..size]; 166 | 167 | if let Some(&byte) = bytes.choose(&mut rng) { 168 | black_box(( 169 | byte as usize, 170 | bytes 171 | .iter() 172 | .cloned() 173 | .map(|byte| byte as usize) 174 | .collect(), 175 | )) 176 | } else { 177 | black_box(( 178 | rng.gen::() as usize, 179 | fixedbitset::FixedBitSet::default(), 180 | )) 181 | } 182 | }, 183 | |(byte, fixed_bit_set)| { 184 | if fixed_bit_set.len() != 0 { 185 | fixed_bit_set.set(*byte as usize, false); 186 | } 187 | black_box(fixed_bit_set); 188 | }, 189 | BatchSize::SmallInput, 190 | ) 191 | }, 192 | ); 193 | 194 | group.bench_function(BenchmarkId::new("BTreeSet", size), |b| { 195 | b.iter_batched_ref( 196 | || { 197 | let bytes = shuffled_bytes(&mut rng); 198 | let bytes = &bytes[..size]; 199 | 200 | if let Some(&byte) = bytes.choose(&mut rng) { 201 | black_box((byte, bytes.iter().cloned().collect())) 202 | } else { 203 | black_box((rng.gen::(), BTreeSet::::new())) 204 | } 205 | }, 206 | |(byte, btree_set)| { 207 | btree_set.remove(byte); 208 | black_box(btree_set); 209 | }, 210 | BatchSize::SmallInput, 211 | ) 212 | }); 213 | 214 | group.bench_function(BenchmarkId::new("Vec", size), |b| { 215 | b.iter_batched_ref( 216 | || { 217 | let bytes = shuffled_bytes(&mut rng); 218 | let bytes = &bytes[..size]; 219 | 220 | if let Some(&byte) = bytes.choose(&mut rng) { 221 | black_box((byte, bytes.into())) 222 | } else { 223 | black_box((rng.gen::(), Vec::::new())) 224 | } 225 | }, 226 | |(byte, vec)| { 227 | util::vec_remove_item(vec, byte); 228 | black_box(vec); 229 | }, 230 | BatchSize::SmallInput, 231 | ) 232 | }); 233 | 234 | group.bench_function( 235 | BenchmarkId::new("Vec (Binary Search)", size), 236 | |b| { 237 | b.iter_batched_ref( 238 | || { 239 | let bytes = shuffled_bytes(&mut rng); 240 | let bytes = &bytes[..size]; 241 | 242 | if let Some(&byte) = bytes.choose(&mut rng) { 243 | let mut vec = Vec::::from(bytes); 244 | vec.sort_unstable(); 245 | black_box((byte, vec)) 246 | } else { 247 | black_box((rng.gen::(), Vec::::new())) 248 | } 249 | }, 250 | |(byte, vec)| { 251 | util::vec_remove_item_binary_search(vec, byte); 252 | black_box(vec); 253 | }, 254 | BatchSize::SmallInput, 255 | ) 256 | }, 257 | ); 258 | } 259 | 260 | group.finish(); 261 | } 262 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | Efficient sets of bytes for Rust, brought to you by [@NikolaiVazquez]! 21 | 22 | The star of the show is [`ByteSet`]: an allocation-free sorted set. It is a 23 | *much faster* alternative to [`HashSet`], [`BTreeSet`], and other types 24 | for a variety of scenarios. See ["Implementation"](#implementation) for a peek 25 | under the hood. 26 | 27 | If you found this library useful, please consider 28 | [sponsoring me on GitHub](https://github.com/sponsors/nvzqz)! 29 | 30 | ## Table of Contents 31 | 32 | 1. [Usage](#usage) 33 | 2. [Examples](#examples) 34 | 1. [`ByteSet` Type](#byteset-type) 35 | 1. [Insert](#insert) 36 | 2. [Extend](#extend) 37 | 3. [Remove](#remove) 38 | 4. [Iterate](#iterate) 39 | 5. [Contains](#contains) 40 | 6. [Subset](#subset) 41 | 7. [Min and Max](#min-and-max) 42 | 2. [`byte_set!` Macro](#byte_set-macro) 43 | 3. [Implementation](#implementation) 44 | 4. [Benchmarks](#benchmarks) 45 | 5. [Ecosystem Integrations](#ecosystem-integrations) 46 | 1. [`rand`](#rand) 47 | 2. [`serde`](#serde) 48 | 6. [License](#license) 49 | 50 | ## Usage 51 | 52 | This library is available [on crates.io][crate] and can be used by adding the 53 | following to your project's [`Cargo.toml`]: 54 | 55 | ```toml 56 | [dependencies] 57 | byte_set = "0.1.3" 58 | ``` 59 | 60 | To import the [`byte_set!`] macro, add this to your crate root (`main.rs` or 61 | `lib.rs`): 62 | 63 | ```rust 64 | use byte_set::byte_set; 65 | ``` 66 | 67 | If you're not using [Rust 2018 edition][2018], it must be imported differently: 68 | 69 | ```rust 70 | #[macro_use] 71 | extern crate byte_set; 72 | ``` 73 | 74 | ## Examples 75 | 76 | ### `ByteSet` Type 77 | 78 | First, let's import [`ByteSet`]: 79 | 80 | ```rust 81 | use byte_set::ByteSet; 82 | ``` 83 | 84 | Here's how you create an empty set: 85 | 86 | ```rust 87 | let bytes = ByteSet::new(); 88 | ``` 89 | 90 | You can create a set filled with all bytes (0 through 255) just as easily: 91 | 92 | ```rust 93 | let bytes = ByteSet::full(); 94 | ``` 95 | 96 | Ok, let's see what we can do with this. Note that this isn't the only available 97 | functionality. See [`ByteSet`] for a complete list. 98 | 99 | #### Insert 100 | 101 | Use [`insert`] to include a single byte, by mutating the [`ByteSet`] in-place: 102 | 103 | ```rust 104 | let mut bytes = ByteSet::new(); 105 | bytes.insert(255); 106 | ``` 107 | 108 | Use [`inserting`] as an immutable alternative, by passing the calling 109 | [`ByteSet`] by value: 110 | 111 | ```rust 112 | let bytes = ByteSet::new().inserting(255); 113 | ``` 114 | 115 | Use [`insert_all`] to include all bytes of another [`ByteSet`], by mutating the 116 | [`ByteSet`] in-place: 117 | 118 | ```rust 119 | let mut alphabet = ByteSet::ASCII_UPPERCASE; 120 | alphabet.insert_all(ByteSet::ASCII_LOWERCASE); 121 | 122 | assert_eq!(alphabet, ByteSet::ASCII_ALPHABETIC); 123 | ``` 124 | 125 | Use [`inserting_all`] as an immutable alternative, by passing the calling 126 | [`ByteSet`] by value: 127 | 128 | ```rust 129 | let alphabet = ByteSet::ASCII_UPPERCASE.inserting_all(ByteSet::ASCII_LOWERCASE); 130 | 131 | assert_eq!(alphabet, ByteSet::ASCII_ALPHABETIC); 132 | ``` 133 | 134 | #### Extend 135 | 136 | Rather than call [`insert`] in a loop, [`extend`] simplifies inserting from an 137 | iterator: 138 | 139 | ```rust 140 | fn take_string(bytes: &mut ByteSet, s: &str) { 141 | bytes.extend(s.as_bytes()); 142 | } 143 | ``` 144 | 145 | Because this iterates over the entire input, it is *much* more efficient to use 146 | [`insert_all`] instead of [`extend`] when inserting another [`ByteSet`]. 147 | 148 | #### Remove 149 | 150 | Use [`remove`] to exclude a single byte by mutating the set in-place: 151 | 152 | ```rust 153 | let mut bytes = ByteSet::full(); 154 | bytes.remove(255); 155 | ``` 156 | 157 | Use [`removing`] as an immutable alternative, by passing the calling [`ByteSet`] 158 | by value: 159 | 160 | ```rust 161 | let bytes = ByteSet::full().removing(255); 162 | ``` 163 | 164 | Use [`remove_all`] to exclude all bytes of another [`ByteSet`], by mutating the 165 | [`ByteSet`] in-place: 166 | 167 | ```rust 168 | let mut alphabet = ByteSet::ASCII_ALPHANUMERIC; 169 | alphabet.remove_all(ByteSet::ASCII_DIGIT); 170 | 171 | assert_eq!(alphabet, ByteSet::ASCII_ALPHABETIC); 172 | ``` 173 | 174 | Use [`removing_all`] as an immutable alternative, by passing the calling 175 | [`ByteSet`] by value: 176 | 177 | ```rust 178 | let alphabet = ByteSet::ASCII_ALPHANUMERIC.removing_all(ByteSet::ASCII_DIGIT); 179 | 180 | assert_eq!(alphabet, ByteSet::ASCII_ALPHABETIC); 181 | ``` 182 | 183 | #### Iterate 184 | 185 | Iterating can be done with just a `for` loop, and goes in order from least to 186 | greatest: 187 | 188 | ```rust 189 | fn small_to_big(bytes: ByteSet) { 190 | for byte in bytes { 191 | do_work(byte); 192 | } 193 | } 194 | ``` 195 | 196 | Iterating in reverse is slightly more verbose, and goes in order from greatest 197 | to least: 198 | 199 | ```rust 200 | fn big_to_small(bytes: ByteSet) { 201 | for byte in bytes.into_iter().rev() { 202 | do_work(byte); 203 | } 204 | } 205 | ``` 206 | 207 | #### Contains 208 | 209 | It wouldn't really be a set if you couldn't check if it has specific items. 210 | 211 | Use [`contains`] to check a single byte: 212 | 213 | ```rust 214 | fn has_null(bytes: &ByteSet) -> bool { 215 | bytes.contains(0) 216 | } 217 | ``` 218 | 219 | Use [`contains_any`] to check for any matches in another [`ByteSet`]: 220 | 221 | ```rust 222 | fn intersects(a: &ByteSet, b: &ByteSet) -> bool { 223 | a.contains_any(b) 224 | } 225 | ``` 226 | 227 | #### Subset 228 | 229 | Use [`is_subset`] to check that all of the bytes in a [`ByteSet`] are contained 230 | in another: 231 | 232 | ```rust 233 | fn test(a: &ByteSet, b: &ByteSet) { 234 | assert!(a.is_subset(b)); 235 | 236 | // Always passes because every set is a subset of itself. 237 | assert!(a.is_subset(a)); 238 | } 239 | ``` 240 | 241 | Use [`is_strict_subset`] to check [`is_subset`] *and* that the sets are not the 242 | same: 243 | 244 | ```rust 245 | fn test(a: &ByteSet, b: &ByteSet) { 246 | assert!(a.is_strict_subset(b)); 247 | 248 | // `a` is equal to itself. 249 | assert!(!a.is_strict_subset(a)); 250 | } 251 | ``` 252 | 253 | For the sake of completion, there is also [`is_superset`] and 254 | [`is_strict_superset`], which call these functions with `a` and `b` switched. 255 | 256 | #### Min and Max 257 | 258 | Use [`first`] to get the smallest byte and [`last`] to get the biggest byte: 259 | 260 | ```rust 261 | fn sanity_check(bytes: &ByteSet) { 262 | if let (Some(first), Some(last)) = (bytes.first(), bytes.last()) { 263 | assert!(first <= last); 264 | } else { 265 | // `bytes` is empty. 266 | } 267 | } 268 | ``` 269 | 270 | These are the first and last bytes returned when iterating. 271 | 272 | ### `byte_set!` Macro 273 | 274 | [`byte_set!`] enables you to create a [`ByteSet`] with the same syntax as [`vec!`] 275 | or array expressions: 276 | 277 | ```rust 278 | let bytes = byte_set![1, 2, 3, b'x', b'y', b'z']; 279 | ``` 280 | 281 | It even works at compile-time in a `const` expression: 282 | 283 | ```rust 284 | const WHOA: ByteSet = byte_set![b'w', b'h', b'o', b'a']; 285 | 286 | static ABC: ByteSet = byte_set![b'a', b'c', b'c']; 287 | ``` 288 | 289 | ## Implementation 290 | 291 | [`ByteSet`] is implemented as a 256-bit mask where each bit corresponds to a 292 | byte value. The first (least significant) bit in the mask represents the first 293 | byte (0) in the set. Likewise, the last last (most significant) bit represents 294 | the last byte (255). 295 | 296 | Given the following [`ByteSet`]: 297 | 298 | ```rust 299 | let bytes = byte_set![0, 1, 4, 5, 244]; 300 | ``` 301 | 302 | The in-memory representation of `bytes` would look like: 303 | 304 | ```text 305 | Byte: 0 1 2 3 4 5 6 7 ... 253 244 255 306 | Value: 1 1 0 0 1 1 0 0 ... 0 1 0 307 | ``` 308 | 309 | This bit mask is composed of either `[u64; 4]` or `[u32; 8]` depending on the 310 | target CPU (see [#3]). Because this comes out to only 32 bytes, [`ByteSet`] 311 | implements [`Copy`]. 312 | 313 | ## Benchmarks 314 | 315 | I will upload benchmarks run from my machine soon. 316 | 317 | In the meantime, you can benchmark this library by running: 318 | 319 | ```sh 320 | cargo bench 321 | ``` 322 | 323 | By default, this will benchmark [`ByteSet`] along with various other types to 324 | compare performance. Note that this will take **a long time** (about 1 hour and 325 | 30 minutes). 326 | 327 | Benchmark only [`ByteSet`] by running: 328 | 329 | ```sh 330 | cargo bench ByteSet 331 | ``` 332 | 333 | This takes about 15 minutes, so maybe grab a coffee in the meantime. 334 | 335 | Benchmark a specific [`ByteSet`] operation by running: 336 | 337 | ```sh 338 | cargo bench $operation/ByteSet 339 | ``` 340 | 341 | See `/benches/benchmarks` for strings that can be used for `$operation`. 342 | 343 | Note that `cargo bench` takes a regular expression, so `Contains (Random)` will 344 | not work because the parentheses are treated as a capture group. To match 345 | parentheses, escape them: `Contains \(Random\)`. 346 | 347 | ## Ecosystem Integrations 348 | 349 | This library has extended functionality for some popular crates. 350 | 351 | ### `rand` 352 | 353 | Use the `rand` (or `rand_core`) feature in your [`Cargo.toml`] to enable random 354 | [`ByteSet`] generation: 355 | 356 | ```toml 357 | [dependencies.byte_set] 358 | version = "0.1.3" 359 | features = ["rand"] 360 | ``` 361 | 362 | This makes the following possible: 363 | 364 | ```rust 365 | let bytes = rand::random::(); 366 | 367 | // Same as above. 368 | let bytes = ByteSet::rand(rand::thread_rng()); 369 | 370 | // Handle failure instead of panicking. 371 | match ByteSet::try_rand(rand::rngs::OsRng) { 372 | Ok(bytes) => // ... 373 | Err(error) => // ... 374 | } 375 | ``` 376 | 377 | ### `serde` 378 | 379 | Use the `serde` feature in your [`Cargo.toml`] to enable [`Serialize`] and 380 | [`Deserialize`] for [`ByteSet`]: 381 | 382 | ```toml 383 | [dependencies.byte_set] 384 | version = "0.1.3" 385 | features = ["serde"] 386 | ``` 387 | 388 | This makes the following possible: 389 | 390 | ```rust 391 | use serde::{Serialize, Deserialize}; 392 | 393 | #[derive(Serialize, Deserialize)] 394 | struct MyValue { 395 | bytes: ByteSet 396 | } 397 | ``` 398 | 399 | [`ByteSet`] can be serialized into a `u8` sequence, and deserialized from 400 | `&[u8]` or a `u8` sequence. 401 | 402 | Read more about using `serde` at [serde.rs](https://serde.rs/). 403 | 404 | ## License 405 | 406 | This project is released under either: 407 | 408 | - [MIT License](https://github.com/nvzqz/byte-set-rs/blob/master/LICENSE-MIT) 409 | 410 | - [Apache License (Version 2.0)](https://github.com/nvzqz/byte-set-rs/blob/master/LICENSE-APACHE) 411 | 412 | at your choosing. 413 | 414 | [@NikolaiVazquez]: https://twitter.com/NikolaiVazquez 415 | 416 | [`Cargo.toml`]: https://doc.rust-lang.org/cargo/reference/manifest.html 417 | [2018]: https://blog.rust-lang.org/2018/12/06/Rust-1.31-and-rust-2018.html#rust-2018 418 | [crate]: https://crates.io/crates/byte_set 419 | 420 | [`BTreeSet`]: https://doc.rust-lang.org/std/collections/struct.BTreeSet.html 421 | [`Copy`]: https://doc.rust-lang.org/std/marker/trait.Copy.html 422 | [`HashSet`]: https://doc.rust-lang.org/std/collections/struct.HashSet.html 423 | [`u8`]: https://doc.rust-lang.org/std/primitive.u8.html 424 | [`vec!`]: https://doc.rust-lang.org/std/macro.vec.html 425 | 426 | [`Serialize`]: https://docs.rs/serde/1.*/serde/trait.Serialize.html 427 | [`Deserialize`]: https://docs.rs/serde/1.*/serde/trait.Deserialize.html 428 | 429 | [#3]: https://github.com/nvzqz/byte-set-rs/issues/3 430 | 431 | 432 | [`byte_set!`]: https://docs.rs/byte_set/0.1.3/byte_set/macro.byte_set.html 433 | [`ByteSet`]: https://docs.rs/byte_set/0.1.3/byte_set/struct.ByteSet.html 434 | [`contains_any`]: https://docs.rs/byte_set/0.1.3/byte_set/struct.ByteSet.html#method.contains_any 435 | [`contains`]: https://docs.rs/byte_set/0.1.3/byte_set/struct.ByteSet.html#method.contains 436 | [`extend`]: https://docs.rs/byte_set/0.1.3/byte_set/struct.ByteSet.html#impl-Extend%3Cu8%3E 437 | [`first`]: https://docs.rs/byte_set/0.1.3/byte_set/struct.ByteSet.html#method.first 438 | [`insert_all`]: https://docs.rs/byte_set/0.1.3/byte_set/struct.ByteSet.html#method.insert_all 439 | [`insert`]: https://docs.rs/byte_set/0.1.3/byte_set/struct.ByteSet.html#method.insert 440 | [`inserting_all`]: https://docs.rs/byte_set/0.1.3/byte_set/struct.ByteSet.html#method.inserting_all 441 | [`inserting`]: https://docs.rs/byte_set/0.1.3/byte_set/struct.ByteSet.html#method.inserting 442 | [`last`]: https://docs.rs/byte_set/0.1.3/byte_set/struct.ByteSet.html#method.last 443 | [`remove_all`]: https://docs.rs/byte_set/0.1.3/byte_set/struct.ByteSet.html#method.remove_all 444 | [`remove`]: https://docs.rs/byte_set/0.1.3/byte_set/struct.ByteSet.html#method.remove 445 | [`removing_all`]: https://docs.rs/byte_set/0.1.3/byte_set/struct.ByteSet.html#method.removing_all 446 | [`removing`]: https://docs.rs/byte_set/0.1.3/byte_set/struct.ByteSet.html#method.removing 447 | [`is_strict_subset`]: https://docs.rs/byte_set/0.1.3/byte_set/struct.ByteSet.html#method.is_strict_subset 448 | [`is_subset`]: https://docs.rs/byte_set/0.1.3/byte_set/struct.ByteSet.html#method.is_subset 449 | [`is_strict_superset`]: https://docs.rs/byte_set/0.1.3/byte_set/struct.ByteSet.html#method.is_strict_superset 450 | [`is_superset`]: https://docs.rs/byte_set/0.1.3/byte_set/struct.ByteSet.html#method.is_superset 451 | -------------------------------------------------------------------------------- /src/byte_set/ascii.rs: -------------------------------------------------------------------------------- 1 | use super::ByteSet; 2 | 3 | /// Operations related to the ASCII character set. 4 | impl ByteSet { 5 | /// The set of all ASCII characters: U+0000 NULL ..= U+007F DELETE. 6 | /// 7 | /// # Examples 8 | /// 9 | /// This contains all bytes for which [`u8::is_ascii`] returns `true`: 10 | /// 11 | /// ``` 12 | /// # use byte_set::ByteSet; 13 | /// for byte in ByteSet::ASCII { 14 | /// assert!(byte.is_ascii()); 15 | /// } 16 | /// 17 | /// for byte in !ByteSet::ASCII { 18 | /// assert!(!byte.is_ascii()); 19 | /// } 20 | /// ``` 21 | /// 22 | /// [`u8::is_ascii`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii 23 | pub const ASCII: Self = { 24 | #[cfg(target_pointer_width = "64")] 25 | { 26 | Self([!0, !0, 0, 0]) 27 | } 28 | 29 | #[cfg(not(target_pointer_width = "64"))] 30 | { 31 | Self([!0, !0, !0, !0, 0, 0, 0, 0]) 32 | } 33 | }; 34 | 35 | /// The set of all ASCII alphabetic characters: 36 | /// 37 | /// - U+0041 'A' ..= U+005A 'Z' 38 | /// - U+0061 'a' ..= U+007A 'z' 39 | /// 40 | /// # Examples 41 | /// 42 | /// This contains all bytes for which [`u8::is_ascii_alphabetic`] returns 43 | /// `true`: 44 | /// 45 | /// ``` 46 | /// # use byte_set::ByteSet; 47 | /// for byte in ByteSet::ASCII_ALPHABETIC { 48 | /// assert!(byte.is_ascii_alphabetic()); 49 | /// } 50 | /// 51 | /// for byte in !ByteSet::ASCII_ALPHABETIC { 52 | /// assert!(!byte.is_ascii_alphabetic()); 53 | /// } 54 | /// ``` 55 | /// 56 | /// [`u8::is_ascii_alphabetic`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_alphabetic 57 | pub const ASCII_ALPHABETIC: Self = 58 | Self::ASCII_LOWERCASE.inserting_all(Self::ASCII_UPPERCASE); 59 | 60 | /// The set of all ASCII uppercase characters: U+0041 'A' ..= U+005A 'Z'. 61 | /// 62 | /// # Examples 63 | /// 64 | /// This contains all bytes for which [`u8::is_ascii_uppercase`] returns 65 | /// `true`: 66 | /// 67 | /// ``` 68 | /// # use byte_set::ByteSet; 69 | /// for byte in ByteSet::ASCII_UPPERCASE { 70 | /// assert!(byte.is_ascii_uppercase()); 71 | /// } 72 | /// 73 | /// for byte in !ByteSet::ASCII_UPPERCASE { 74 | /// assert!(!byte.is_ascii_uppercase()); 75 | /// } 76 | /// ``` 77 | /// 78 | /// [`u8::is_ascii_uppercase`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_uppercase 79 | pub const ASCII_UPPERCASE: Self = Self::from_range_inclusive(b'A'..=b'Z'); 80 | 81 | /// The set of all ASCII lowercase characters: U+0061 'a' ..= U+007A 'z'. 82 | /// 83 | /// # Examples 84 | /// 85 | /// This contains all bytes for which [`u8::is_ascii_lowercase`] returns 86 | /// `true`: 87 | /// 88 | /// ``` 89 | /// # use byte_set::ByteSet; 90 | /// for byte in ByteSet::ASCII_LOWERCASE { 91 | /// assert!(byte.is_ascii_lowercase()); 92 | /// } 93 | /// 94 | /// for byte in !ByteSet::ASCII_LOWERCASE { 95 | /// assert!(!byte.is_ascii_lowercase()); 96 | /// } 97 | /// ``` 98 | /// 99 | /// [`u8::is_ascii_lowercase`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_lowercase 100 | pub const ASCII_LOWERCASE: Self = Self::from_range_inclusive(b'a'..=b'z'); 101 | 102 | /// The set of all ASCII alphanumeric characters: 103 | /// 104 | /// - U+0041 'A' ..= U+005A 'Z' 105 | /// - U+0061 'a' ..= U+007A 'z' 106 | /// - U+0030 '0' ..= U+0039 '9' 107 | /// 108 | /// # Examples 109 | /// 110 | /// This contains all bytes for which [`u8::is_ascii_alphanumeric`] returns 111 | /// `true`: 112 | /// 113 | /// ``` 114 | /// # use byte_set::ByteSet; 115 | /// for byte in ByteSet::ASCII_ALPHANUMERIC { 116 | /// assert!(byte.is_ascii_alphanumeric()); 117 | /// } 118 | /// 119 | /// for byte in !ByteSet::ASCII_ALPHANUMERIC { 120 | /// assert!(!byte.is_ascii_alphanumeric()); 121 | /// } 122 | /// ``` 123 | /// 124 | /// [`u8::is_ascii_alphanumeric`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_alphanumeric 125 | pub const ASCII_ALPHANUMERIC: Self = 126 | Self::ASCII_ALPHABETIC.inserting_all(Self::ASCII_DIGIT); 127 | 128 | /// The set of all ASCII decimal digits: U+0030 '0' ..= U+0039 '9'. 129 | /// 130 | /// # Examples 131 | /// 132 | /// This contains all bytes for which [`u8::is_ascii_digit`] returns `true`: 133 | /// 134 | /// ``` 135 | /// # use byte_set::ByteSet; 136 | /// for byte in ByteSet::ASCII_DIGIT { 137 | /// assert!(byte.is_ascii_digit()); 138 | /// } 139 | /// 140 | /// for byte in !ByteSet::ASCII_DIGIT { 141 | /// assert!(!byte.is_ascii_digit()); 142 | /// } 143 | /// ``` 144 | /// 145 | /// [`u8::is_ascii_digit`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_digit 146 | pub const ASCII_DIGIT: Self = Self::from_range_inclusive(b'0'..=b'9'); 147 | 148 | /// The set of all ASCII hexadecimal digits: 149 | /// 150 | /// - U+0030 '0' ..= U+0039 '9' 151 | /// - U+0041 'A' ..= U+0046 'F' 152 | /// - U+0061 'a' ..= U+0066 'f' 153 | /// 154 | /// # Examples 155 | /// 156 | /// This contains all bytes for which [`u8::is_ascii_hexdigit`] returns 157 | /// `true`: 158 | /// 159 | /// ``` 160 | /// # use byte_set::ByteSet; 161 | /// for byte in ByteSet::ASCII_HEXDIGIT { 162 | /// assert!(byte.is_ascii_hexdigit()); 163 | /// } 164 | /// 165 | /// for byte in !ByteSet::ASCII_HEXDIGIT { 166 | /// assert!(!byte.is_ascii_hexdigit()); 167 | /// } 168 | /// ``` 169 | /// 170 | /// [`u8::is_ascii_hexdigit`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_hexdigit 171 | pub const ASCII_HEXDIGIT: Self = Self::ASCII_DIGIT 172 | .inserting_all(Self::from_range_inclusive(b'A'..=b'F')) 173 | .inserting_all(Self::from_range_inclusive(b'a'..=b'f')); 174 | 175 | /// The set of all ASCII punctuation characters: 176 | /// 177 | /// - U+0021 ..= U+002F `! " # $ % & ' ( ) * + , - . /` 178 | /// - U+003A ..= U+0040 `: ; < = > ? @` 179 | /// - U+005B ..= U+0060 ``[ \ ] ^ _ ` `` 180 | /// - U+007B ..= U+007E `{ | } ~` 181 | /// 182 | /// # Examples 183 | /// 184 | /// This contains all bytes for which [`u8::is_ascii_punctuation`] returns 185 | /// `true`: 186 | /// 187 | /// ``` 188 | /// # use byte_set::ByteSet; 189 | /// for byte in ByteSet::ASCII_PUNCTUATION { 190 | /// assert!(byte.is_ascii_punctuation()); 191 | /// } 192 | /// 193 | /// for byte in !ByteSet::ASCII_PUNCTUATION { 194 | /// assert!(!byte.is_ascii_punctuation()); 195 | /// } 196 | /// ``` 197 | /// 198 | /// [`u8::is_ascii_punctuation`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_punctuation 199 | pub const ASCII_PUNCTUATION: Self = byte_set![ 200 | b'!', b'"', b'#', b'$', b'%', b'&', b'\'', b'(', b')', b'*', b'+', 201 | b',', b'-', b'.', b'/', b':', b';', b'<', b'=', b'>', b'?', b'@', b'[', 202 | b'\\', b']', b'^', b'_', b'`', b'{', b'|', b'}', b'~', 203 | ]; 204 | 205 | /// The set of all ASCII graphic characters: U+0021 '!' ..= U+007E '~'. 206 | /// 207 | /// # Examples 208 | /// 209 | /// This contains all bytes for which [`u8::is_ascii_graphic`] returns 210 | /// `true`: 211 | /// 212 | /// ``` 213 | /// # use byte_set::ByteSet; 214 | /// for byte in ByteSet::ASCII_GRAPHIC { 215 | /// assert!(byte.is_ascii_graphic()); 216 | /// } 217 | /// 218 | /// for byte in !ByteSet::ASCII_GRAPHIC { 219 | /// assert!(!byte.is_ascii_graphic()); 220 | /// } 221 | /// ``` 222 | /// 223 | /// [`u8::is_ascii_graphic`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_graphic 224 | pub const ASCII_GRAPHIC: Self = 225 | Self::ASCII_ALPHANUMERIC.inserting_all(Self::ASCII_PUNCTUATION); 226 | 227 | /// The set of all ASCII whitespace characters: 228 | /// 229 | /// - U+0020 SPACE 230 | /// - U+0009 HORIZONTAL TAB 231 | /// - U+000A LINE FEED 232 | /// - U+000C FORM FEED 233 | /// - U+000D CARRIAGE RETURN 234 | /// 235 | /// # Examples 236 | /// 237 | /// This contains all bytes for which [`u8::is_ascii_whitespace`] returns 238 | /// `true`: 239 | /// 240 | /// ``` 241 | /// # use byte_set::ByteSet; 242 | /// for byte in ByteSet::ASCII_WHITESPACE { 243 | /// assert!(byte.is_ascii_whitespace()); 244 | /// } 245 | /// 246 | /// for byte in !ByteSet::ASCII_WHITESPACE { 247 | /// assert!(!byte.is_ascii_whitespace()); 248 | /// } 249 | /// ``` 250 | /// 251 | /// [`u8::is_ascii_whitespace`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_whitespace 252 | pub const ASCII_WHITESPACE: Self = 253 | byte_set![b'\t', b'\n', 0x0C, b'\r', b' ']; 254 | 255 | /// The set of all ASCII control characters: 256 | /// 257 | /// - U+0000 NUL ..= U+001F UNIT SEPARATOR 258 | /// - U+007F DELETE. 259 | /// 260 | /// Note that most ASCII whitespace characters are control characters, but 261 | /// SPACE is not. 262 | /// 263 | /// # Examples 264 | /// 265 | /// This contains all bytes for which [`u8::is_ascii_control`] returns 266 | /// `true`: 267 | /// 268 | /// ``` 269 | /// # use byte_set::ByteSet; 270 | /// for byte in ByteSet::ASCII_CONTROL { 271 | /// assert!(byte.is_ascii_control()); 272 | /// } 273 | /// 274 | /// for byte in !ByteSet::ASCII_CONTROL { 275 | /// assert!(!byte.is_ascii_control()); 276 | /// } 277 | /// ``` 278 | /// 279 | /// [`u8::is_ascii_whitespace`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_whitespace 280 | pub const ASCII_CONTROL: Self = 281 | Self::from_range_inclusive(0..=0x1F).inserting(0x7F); 282 | 283 | /// Returns `true` if [`u8::is_ascii`] returns `true` for all bytes in 284 | /// `self`. 285 | /// 286 | /// This is significantly more efficient than checking each byte in `self` 287 | /// individually. 288 | /// 289 | /// [`u8::is_ascii`]: 290 | /// https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii 291 | #[inline] 292 | #[must_use] 293 | pub const fn is_ascii(&self) -> bool { 294 | self._is_subset(&Self::ASCII) 295 | } 296 | 297 | /// Returns `true` if [`u8::is_ascii_alphabetic`] returns `true` for all 298 | /// bytes in `self`. 299 | /// 300 | /// This is significantly more efficient than checking each byte in `self` 301 | /// individually. 302 | /// 303 | /// [`u8::is_ascii_alphabetic`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_alphabetic 304 | #[inline] 305 | #[must_use] 306 | pub const fn is_ascii_alphabetic(&self) -> bool { 307 | self._is_subset(&Self::ASCII_ALPHABETIC) 308 | } 309 | 310 | /// Returns `true` if [`u8::is_ascii_uppercase`] returns `true` for all 311 | /// bytes in `self`. 312 | /// 313 | /// This is significantly more efficient than checking each byte in `self` 314 | /// individually. 315 | /// 316 | /// [`u8::is_ascii_uppercase`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_uppercase 317 | #[inline] 318 | #[must_use] 319 | pub const fn is_ascii_uppercase(&self) -> bool { 320 | self._is_subset(&Self::ASCII_UPPERCASE) 321 | } 322 | 323 | /// Returns `true` if [`u8::is_ascii_lowercase`] returns `true` for all 324 | /// bytes in `self`. 325 | /// 326 | /// This is significantly more efficient than checking each byte in `self` 327 | /// individually. 328 | /// 329 | /// [`u8::is_ascii_lowercase`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_lowercase 330 | #[inline] 331 | #[must_use] 332 | pub const fn is_ascii_lowercase(&self) -> bool { 333 | self._is_subset(&Self::ASCII_LOWERCASE) 334 | } 335 | 336 | /// Returns `true` if [`u8::is_ascii_alphanumeric`] returns `true` for all 337 | /// bytes in `self`. 338 | /// 339 | /// This is significantly more efficient than checking each byte in `self` 340 | /// individually. 341 | /// 342 | /// [`u8::is_ascii_alphanumeric`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_alphanumeric 343 | #[inline] 344 | #[must_use] 345 | pub const fn is_ascii_alphanumeric(&self) -> bool { 346 | self._is_subset(&Self::ASCII_ALPHANUMERIC) 347 | } 348 | 349 | /// Returns `true` if [`u8::is_ascii_digit`] returns `true` for all bytes in 350 | /// `self`. 351 | /// 352 | /// This is significantly more efficient than checking each byte in `self` 353 | /// individually. 354 | /// 355 | /// [`u8::is_ascii_digit`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_digit 356 | #[inline] 357 | #[must_use] 358 | pub const fn is_ascii_digit(&self) -> bool { 359 | self._is_subset(&Self::ASCII_DIGIT) 360 | } 361 | 362 | /// Returns `true` if [`u8::is_ascii_hexdigit`] returns `true` for all bytes 363 | /// in `self`. 364 | /// 365 | /// This is significantly more efficient than checking each byte in `self` 366 | /// individually. 367 | /// 368 | /// [`u8::is_ascii_hexdigit`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_hexdigit 369 | #[inline] 370 | #[must_use] 371 | pub const fn is_ascii_hexdigit(&self) -> bool { 372 | self._is_subset(&Self::ASCII_HEXDIGIT) 373 | } 374 | 375 | /// Returns `true` if [`u8::is_ascii_punctuation`] returns `true` for all 376 | /// bytes in `self`. 377 | /// 378 | /// This is significantly more efficient than checking each byte in `self` 379 | /// individually. 380 | /// 381 | /// [`u8::is_ascii_punctuation`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_punctuation 382 | #[inline] 383 | #[must_use] 384 | pub const fn is_ascii_punctuation(&self) -> bool { 385 | self._is_subset(&Self::ASCII_PUNCTUATION) 386 | } 387 | 388 | /// Returns `true` if [`u8::is_ascii_graphic`] returns `true` for all bytes 389 | /// in `self`. 390 | /// 391 | /// This is significantly more efficient than checking each byte in `self` 392 | /// individually. 393 | /// 394 | /// [`u8::is_ascii_graphic`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_graphic 395 | #[inline] 396 | #[must_use] 397 | pub const fn is_ascii_graphic(&self) -> bool { 398 | self._is_subset(&Self::ASCII_GRAPHIC) 399 | } 400 | 401 | /// Returns `true` if [`u8::is_ascii_whitespace`] returns `true` for all 402 | /// bytes in `self`. 403 | /// 404 | /// This is significantly more efficient than checking each byte in `self` 405 | /// individually. 406 | /// 407 | /// [`u8::is_ascii_whitespace`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_whitespace 408 | #[inline] 409 | #[must_use] 410 | pub const fn is_ascii_whitespace(&self) -> bool { 411 | self._is_subset(&Self::ASCII_WHITESPACE) 412 | } 413 | 414 | /// Returns `true` if [`u8::is_ascii_control`] returns `true` for all bytes 415 | /// in `self`. 416 | /// 417 | /// This is significantly more efficient than checking each byte in `self` 418 | /// individually. 419 | /// 420 | /// [`u8::is_ascii_control`]: https://doc.rust-lang.org/std/primitive.u8.html#method.is_ascii_control 421 | #[inline] 422 | #[must_use] 423 | pub const fn is_ascii_control(&self) -> bool { 424 | self._is_subset(&Self::ASCII_CONTROL) 425 | } 426 | } 427 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //!
2 | //!

3 | //! 4 | //! ByteSet 5 | //! 6 | //!

7 | //! 8 | //! Crates.io 9 | //! Downloads 10 | //! 11 | //! 12 | //! docs.rs 13 | //! 14 | //! 15 | //! Build Status 16 | //! 17 | //!

18 | //!
19 | //! 20 | //! Efficient sets of bytes for Rust, brought to you by [@NikolaiVazquez]! 21 | //! 22 | //! The star of the show is [`ByteSet`]: an allocation-free sorted set. It is a 23 | //! *much faster* alternative to [`HashSet`], [`BTreeSet`], and other 24 | //! types for a variety of scenarios. See ["Implementation"](#implementation) 25 | //! for a peek under the hood. 26 | //! 27 | //! If you found this library useful, please consider [sponsoring me on 28 | //! GitHub](https://github.com/sponsors/nvzqz)! 29 | //! 30 | //! ## Table of Contents 31 | //! 32 | //! 1. [Usage](#usage) 33 | //! 2. [Examples](#examples) 34 | //! 1. [`ByteSet` Type](#byteset-type) 35 | //! 1. [Insert](#insert) 36 | //! 2. [Extend](#extend) 37 | //! 3. [Remove](#remove) 38 | //! 4. [Iterate](#iterate) 39 | //! 5. [Contains](#contains) 40 | //! 6. [Subset](#subset) 41 | //! 7. [Min and Max](#min-and-max) 42 | //! 2. [`byte_set!` Macro](#byte_set-macro) 43 | //! 3. [Implementation](#implementation) 44 | //! 4. [Benchmarks](#benchmarks) 45 | //! 5. [Ecosystem Integrations](#ecosystem-integrations) 46 | //! 1. [`rand`](#rand) 47 | //! 2. [`serde`](#serde) 48 | //! 6. [License](#license) 49 | //! 50 | //! ## Usage 51 | //! 52 | //! This library is available [on crates.io][crate] and can be used by adding 53 | //! the following to your project's [`Cargo.toml`]: 54 | //! 55 | //! ```toml 56 | //! [dependencies] 57 | //! byte_set = "0.1.3" 58 | //! ``` 59 | //! 60 | //! To import the [`byte_set!`] macro, add this to your crate root (`main.rs` or 61 | //! `lib.rs`): 62 | //! 63 | //! ```rust 64 | //! use byte_set::byte_set; 65 | //! ``` 66 | //! 67 | //! If you're not using [Rust 2018 edition][2018], it must be imported 68 | //! differently: 69 | //! 70 | //! ```rust 71 | //! #[macro_use] 72 | //! extern crate byte_set; 73 | //! # fn main() {} 74 | //! ``` 75 | //! 76 | //! ## Examples 77 | //! 78 | //! ### `ByteSet` Type 79 | //! 80 | //! First, let's import [`ByteSet`]: 81 | //! 82 | //! ```rust 83 | //! use byte_set::ByteSet; 84 | //! ``` 85 | //! 86 | //! Here's how you create an empty set: 87 | //! 88 | //! ```rust 89 | //! # use byte_set::ByteSet; 90 | //! let bytes = ByteSet::new(); 91 | //! ``` 92 | //! 93 | //! You can create a set filled with all bytes (0 through 255) just as easily: 94 | //! 95 | //! ```rust 96 | //! # use byte_set::ByteSet; 97 | //! let bytes = ByteSet::full(); 98 | //! ``` 99 | //! 100 | //! Ok, let's see what we can do with this. Note that this isn't the only 101 | //! available functionality. See [`ByteSet`] for a complete list. 102 | //! 103 | //! #### Insert 104 | //! 105 | //! Use [`insert`] to include a single byte, by mutating the [`ByteSet`] 106 | //! in-place: 107 | //! 108 | //! ```rust 109 | //! # use byte_set::ByteSet; 110 | //! let mut bytes = ByteSet::new(); 111 | //! bytes.insert(255); 112 | //! ``` 113 | //! 114 | //! Use [`inserting`] as an immutable alternative, by passing the calling 115 | //! [`ByteSet`] by value: 116 | //! 117 | //! ```rust 118 | //! # use byte_set::ByteSet; 119 | //! let bytes = ByteSet::new().inserting(255); 120 | //! ``` 121 | //! 122 | //! Use [`insert_all`] to include all bytes of another [`ByteSet`], by mutating 123 | //! the [`ByteSet`] in-place: 124 | //! 125 | //! ```rust 126 | //! # use byte_set::ByteSet; 127 | //! let mut alphabet = ByteSet::ASCII_UPPERCASE; 128 | //! alphabet.insert_all(ByteSet::ASCII_LOWERCASE); 129 | //! 130 | //! assert_eq!(alphabet, ByteSet::ASCII_ALPHABETIC); 131 | //! ``` 132 | //! 133 | //! Use [`inserting_all`] as an immutable alternative, by passing the calling 134 | //! [`ByteSet`] by value: 135 | //! 136 | //! ```rust 137 | //! # use byte_set::ByteSet; 138 | //! let alphabet = ByteSet::ASCII_UPPERCASE.inserting_all(ByteSet::ASCII_LOWERCASE); 139 | //! 140 | //! assert_eq!(alphabet, ByteSet::ASCII_ALPHABETIC); 141 | //! ``` 142 | //! 143 | //! #### Extend 144 | //! 145 | //! Rather than call [`insert`] in a loop, [`extend`] simplifies inserting from 146 | //! an iterator: 147 | //! 148 | //! ```rust 149 | //! # use byte_set::ByteSet; 150 | //! fn take_string(bytes: &mut ByteSet, s: &str) { 151 | //! bytes.extend(s.as_bytes()); 152 | //! } 153 | //! ``` 154 | //! 155 | //! Because this iterates over the entire input, it is *much* more efficient to 156 | //! use [`insert_all`] instead of [`extend`] when inserting another [`ByteSet`]. 157 | //! 158 | //! #### Remove 159 | //! 160 | //! Use [`remove`] to exclude a single byte by mutating the set in-place: 161 | //! 162 | //! ```rust 163 | //! # use byte_set::ByteSet; 164 | //! let mut bytes = ByteSet::full(); 165 | //! bytes.remove(255); 166 | //! ``` 167 | //! 168 | //! Use [`removing`] as an immutable alternative, by passing the calling 169 | //! [`ByteSet`] by value: 170 | //! 171 | //! ```rust 172 | //! # use byte_set::ByteSet; 173 | //! let bytes = ByteSet::full().removing(255); 174 | //! ``` 175 | //! 176 | //! Use [`remove_all`] to exclude all bytes of another [`ByteSet`], by mutating 177 | //! the [`ByteSet`] in-place: 178 | //! 179 | //! ```rust 180 | //! # use byte_set::ByteSet; 181 | //! let mut alphabet = ByteSet::ASCII_ALPHANUMERIC; 182 | //! alphabet.remove_all(ByteSet::ASCII_DIGIT); 183 | //! 184 | //! assert_eq!(alphabet, ByteSet::ASCII_ALPHABETIC); 185 | //! ``` 186 | //! 187 | //! Use [`removing_all`] as an immutable alternative, by passing the calling 188 | //! [`ByteSet`] by value: 189 | //! 190 | //! ```rust 191 | //! # use byte_set::ByteSet; 192 | //! let alphabet = ByteSet::ASCII_ALPHANUMERIC.removing_all(ByteSet::ASCII_DIGIT); 193 | //! 194 | //! assert_eq!(alphabet, ByteSet::ASCII_ALPHABETIC); 195 | //! ``` 196 | //! 197 | //! #### Iterate 198 | //! 199 | //! Iterating can be done with just a `for` loop, and goes in order from least 200 | //! to greatest: 201 | //! 202 | //! ```rust 203 | //! # use byte_set::ByteSet; 204 | //! # fn do_work(_: u8) {} 205 | //! fn small_to_big(bytes: ByteSet) { 206 | //! for byte in bytes { 207 | //! do_work(byte); 208 | //! } 209 | //! } 210 | //! ``` 211 | //! 212 | //! Iterating in reverse is slightly more verbose, and goes in order from 213 | //! greatest to least: 214 | //! 215 | //! ```rust 216 | //! # use byte_set::ByteSet; 217 | //! # fn do_work(_: u8) {} 218 | //! fn big_to_small(bytes: ByteSet) { 219 | //! for byte in bytes.into_iter().rev() { 220 | //! do_work(byte); 221 | //! } 222 | //! } 223 | //! ``` 224 | //! 225 | //! #### Contains 226 | //! 227 | //! It wouldn't really be a set if you couldn't check if it has specific items. 228 | //! 229 | //! Use [`contains`] to check a single byte: 230 | //! 231 | //! ```rust 232 | //! # use byte_set::ByteSet; 233 | //! fn has_null(bytes: &ByteSet) -> bool { 234 | //! bytes.contains(0) 235 | //! } 236 | //! ``` 237 | //! 238 | //! Use [`contains_any`] to check for any matches in another [`ByteSet`]: 239 | //! 240 | //! ```rust 241 | //! # use byte_set::ByteSet; 242 | //! fn intersects(a: &ByteSet, b: &ByteSet) -> bool { 243 | //! a.contains_any(b) 244 | //! } 245 | //! ``` 246 | //! 247 | //! #### Subset 248 | //! 249 | //! Use [`is_subset`] to check that all of the bytes in a [`ByteSet`] are 250 | //! contained in another: 251 | //! 252 | //! ```rust 253 | //! # use byte_set::ByteSet; 254 | //! fn test(a: &ByteSet, b: &ByteSet) { 255 | //! assert!(a.is_subset(b)); 256 | //! 257 | //! // Always passes because every set is a subset of itself. 258 | //! assert!(a.is_subset(a)); 259 | //! } 260 | //! ``` 261 | //! 262 | //! Use [`is_strict_subset`] to check [`is_subset`] *and* that the sets are not 263 | //! the same: 264 | //! 265 | //! ```rust 266 | //! # use byte_set::ByteSet; 267 | //! fn test(a: &ByteSet, b: &ByteSet) { 268 | //! assert!(a.is_strict_subset(b)); 269 | //! 270 | //! // `a` is equal to itself. 271 | //! assert!(!a.is_strict_subset(a)); 272 | //! } 273 | //! ``` 274 | //! 275 | //! For the sake of completion, there is also [`is_superset`] and 276 | //! [`is_strict_superset`], which call these functions with `a` and `b` 277 | //! switched. 278 | //! 279 | //! #### Min and Max 280 | //! 281 | //! Use [`first`] to get the smallest byte and [`last`] to get the biggest byte: 282 | //! 283 | //! ```rust 284 | //! # use byte_set::ByteSet; 285 | //! fn sanity_check(bytes: &ByteSet) { 286 | //! if let (Some(first), Some(last)) = (bytes.first(), bytes.last()) { 287 | //! assert!(first <= last); 288 | //! } else { 289 | //! // `bytes` is empty. 290 | //! } 291 | //! } 292 | //! ``` 293 | //! 294 | //! These are the first and last bytes returned when iterating. 295 | //! 296 | //! ### `byte_set!` Macro 297 | //! 298 | //! [`byte_set!`] enables you to create a [`ByteSet`] with the same syntax as 299 | //! [`vec!`] or array expressions: 300 | //! 301 | //! ```rust 302 | //! # use byte_set::byte_set; 303 | //! let bytes = byte_set![1, 2, 3, b'x', b'y', b'z']; 304 | //! ``` 305 | //! 306 | //! It even works at compile-time in a `const` expression: 307 | //! 308 | //! ```rust 309 | //! # use byte_set::{ByteSet, byte_set}; 310 | //! const WHOA: ByteSet = byte_set![b'w', b'h', b'o', b'a']; 311 | //! 312 | //! static ABC: ByteSet = byte_set![b'a', b'c', b'c']; 313 | //! ``` 314 | //! 315 | //! ## Implementation 316 | //! 317 | //! [`ByteSet`] is implemented as a 256-bit mask where each bit corresponds to a 318 | //! byte value. The first (least significant) bit in the mask represents the 319 | //! first byte (0) in the set. Likewise, the last last (most significant) bit 320 | //! represents the last byte (255). 321 | //! 322 | //! Given the following [`ByteSet`]: 323 | //! 324 | //! ```rust 325 | //! # use byte_set::byte_set; 326 | //! let bytes = byte_set![0, 1, 4, 5, 244]; 327 | //! ``` 328 | //! 329 | //! The in-memory representation of `bytes` would look like: 330 | //! 331 | //! ```text 332 | //! Byte: 0 1 2 3 4 5 6 7 ... 253 244 255 333 | //! Value: 1 1 0 0 1 1 0 0 ... 0 1 0 334 | //! ``` 335 | //! 336 | //! This bit mask is composed of either `[u64; 4]` or `[u32; 8]` depending on 337 | //! the target CPU (see [#3]). Because this comes out to only 32 bytes, 338 | //! [`ByteSet`] implements [`Copy`]. 339 | //! 340 | //! ## Benchmarks 341 | //! 342 | //! I will upload benchmarks run from my machine soon. 343 | //! 344 | //! In the meantime, you can benchmark this library by running: 345 | //! 346 | //! ```sh 347 | //! cargo bench 348 | //! ``` 349 | //! 350 | //! By default, this will benchmark [`ByteSet`] along with various other types 351 | //! to compare performance. Note that this will take **a long time** (about 1 352 | //! hour and 30 minutes). 353 | //! 354 | //! Benchmark only [`ByteSet`] by running: 355 | //! 356 | //! ```sh 357 | //! cargo bench ByteSet 358 | //! ``` 359 | //! 360 | //! This takes about 15 minutes, so maybe grab a coffee in the meantime. 361 | //! 362 | //! Benchmark a specific [`ByteSet`] operation by running: 363 | //! 364 | //! ```sh 365 | //! cargo bench $operation/ByteSet 366 | //! ``` 367 | //! 368 | //! See `/benches/benchmarks` for strings that can be used for `$operation`. 369 | //! 370 | //! Note that `cargo bench` takes a regular expression, so `Contains (Random)` 371 | //! will not work because the parentheses are treated as a capture group. To 372 | //! match parentheses, escape them: `Contains \(Random\)`. 373 | //! 374 | //! ## Ecosystem Integrations 375 | //! 376 | //! This library has extended functionality for some popular crates. 377 | //! 378 | //! ### `rand` 379 | //! 380 | //! Use the `rand` (or `rand_core`) feature in your [`Cargo.toml`] to enable 381 | //! random [`ByteSet`] generation: 382 | //! 383 | //! ```toml 384 | //! [dependencies.byte_set] 385 | //! version = "0.1.3" 386 | //! features = ["rand"] 387 | //! ``` 388 | //! 389 | //! This makes the following possible: 390 | //! 391 | //! ```rust,ignore 392 | //! let bytes = rand::random::(); 393 | //! 394 | //! // Same as above. 395 | //! let bytes = ByteSet::rand(rand::thread_rng()); 396 | //! 397 | //! // Handle failure instead of panicking. 398 | //! match ByteSet::try_rand(rand::rngs::OsRng) { 399 | //! Ok(bytes) => // ... 400 | //! Err(error) => // ... 401 | //! } 402 | //! ``` 403 | //! 404 | //! ### `serde` 405 | //! 406 | //! Use the `serde` feature in your [`Cargo.toml`] to enable [`Serialize`] and 407 | //! [`Deserialize`] for [`ByteSet`]: 408 | //! 409 | //! ```toml 410 | //! [dependencies.byte_set] 411 | //! version = "0.1.3" 412 | //! features = ["serde"] 413 | //! ``` 414 | //! 415 | //! This makes the following possible: 416 | //! 417 | //! ```rust,ignore 418 | //! use serde::{Serialize, Deserialize}; 419 | //! 420 | //! #[derive(Serialize, Deserialize)] 421 | //! struct MyValue { 422 | //! bytes: ByteSet 423 | //! } 424 | //! ``` 425 | //! 426 | //! [`ByteSet`] can be serialized into a `u8` sequence, and deserialized from 427 | //! `&[u8]` or a `u8` sequence. 428 | //! 429 | //! Read more about using `serde` at [serde.rs](https://serde.rs/). 430 | //! 431 | //! ## License 432 | //! 433 | //! This project is released under either: 434 | //! 435 | //! - [MIT License](https://github.com/nvzqz/byte-set-rs/blob/master/LICENSE-MIT) 436 | //! 437 | //! - [Apache License (Version 2.0)](https://github.com/nvzqz/byte-set-rs/blob/master/LICENSE-APACHE) 438 | //! 439 | //! at your choosing. 440 | //! 441 | //! [@NikolaiVazquez]: https://twitter.com/NikolaiVazquez 442 | //! 443 | //! [`Cargo.toml`]: https://doc.rust-lang.org/cargo/reference/manifest.html 444 | //! [2018]: https://blog.rust-lang.org/2018/12/06/Rust-1.31-and-rust-2018.html#rust-2018 445 | //! [crate]: https://crates.io/crates/byte_set 446 | //! 447 | //! [`BTreeSet`]: https://doc.rust-lang.org/std/collections/struct.BTreeSet.html 448 | //! [`Copy`]: https://doc.rust-lang.org/std/marker/trait.Copy.html 449 | //! [`HashSet`]: https://doc.rust-lang.org/std/collections/struct.HashSet.html 450 | //! [`u8`]: https://doc.rust-lang.org/std/primitive.u8.html 451 | //! [`vec!`]: https://doc.rust-lang.org/std/macro.vec.html 452 | //! 453 | //! [`Serialize`]: https://docs.rs/serde/1.*/serde/trait.Serialize.html 454 | //! [`Deserialize`]: https://docs.rs/serde/1.*/serde/trait.Deserialize.html 455 | //! 456 | //! [#3]: https://github.com/nvzqz/byte-set-rs/issues/3 457 | //! 458 | //! [`byte_set!`]: macro.byte_set.html 459 | //! [`ByteSet`]: struct.ByteSet.html 460 | //! [`contains_any`]: struct.ByteSet.html#method.contains_any 461 | //! [`contains`]: struct.ByteSet.html#method.contains 462 | //! [`extend`]: struct.ByteSet.html#impl-Extend%3Cu8%3E 463 | //! [`first`]: struct.ByteSet.html#method.first 464 | //! [`insert_all`]: struct.ByteSet.html#method.insert_all 465 | //! [`insert`]: struct.ByteSet.html#method.insert 466 | //! [`inserting_all`]: struct.ByteSet.html#method.inserting_all 467 | //! [`inserting`]: struct.ByteSet.html#method.inserting 468 | //! [`last`]: struct.ByteSet.html#method.last 469 | //! [`remove_all`]: struct.ByteSet.html#method.remove_all 470 | //! [`remove`]: struct.ByteSet.html#method.remove 471 | //! [`removing_all`]: struct.ByteSet.html#method.removing_all 472 | //! [`removing`]: struct.ByteSet.html#method.removing 473 | //! [`is_strict_subset`]: struct.ByteSet.html#method.is_strict_subset 474 | //! [`is_subset`]: struct.ByteSet.html#method.is_subset 475 | //! [`is_strict_superset`]: struct.ByteSet.html#method.is_strict_superset 476 | //! [`is_superset`]: struct.ByteSet.html#method.is_superset 477 | 478 | #![cfg_attr(docsrs, feature(doc_cfg))] 479 | #![cfg_attr(not(any(test, feature = "std")), no_std)] 480 | #![warn(missing_docs)] 481 | 482 | #[macro_use] 483 | mod macros; 484 | 485 | #[cfg(test)] 486 | mod tests; 487 | 488 | pub(crate) mod chunk; 489 | pub(crate) use chunk::Chunk; 490 | 491 | mod byte_set; 492 | pub use self::byte_set::ByteSet; 493 | 494 | mod iter; 495 | pub use iter::Iter; 496 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | [[package]] 4 | name = "ahash" 5 | version = "0.3.5" 6 | source = "registry+https://github.com/rust-lang/crates.io-index" 7 | checksum = "2f3e0bf23f51883cce372d5d5892211236856e4bb37fb942e1eb135ee0f146e3" 8 | 9 | [[package]] 10 | name = "atty" 11 | version = "0.2.14" 12 | source = "registry+https://github.com/rust-lang/crates.io-index" 13 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" 14 | dependencies = [ 15 | "hermit-abi", 16 | "libc", 17 | "winapi", 18 | ] 19 | 20 | [[package]] 21 | name = "autocfg" 22 | version = "1.0.0" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d" 25 | 26 | [[package]] 27 | name = "bitflags" 28 | version = "1.2.1" 29 | source = "registry+https://github.com/rust-lang/crates.io-index" 30 | checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" 31 | 32 | [[package]] 33 | name = "bstr" 34 | version = "0.2.13" 35 | source = "registry+https://github.com/rust-lang/crates.io-index" 36 | checksum = "31accafdb70df7871592c058eca3985b71104e15ac32f64706022c58867da931" 37 | dependencies = [ 38 | "lazy_static", 39 | "memchr", 40 | "regex-automata", 41 | "serde", 42 | ] 43 | 44 | [[package]] 45 | name = "bumpalo" 46 | version = "3.4.0" 47 | source = "registry+https://github.com/rust-lang/crates.io-index" 48 | checksum = "2e8c087f005730276d1096a652e92a8bacee2e2472bcc9715a74d2bec38b5820" 49 | 50 | [[package]] 51 | name = "byte_set" 52 | version = "0.1.3" 53 | dependencies = [ 54 | "criterion", 55 | "fixedbitset", 56 | "hashbrown", 57 | "rand", 58 | "rand_core", 59 | "serde", 60 | ] 61 | 62 | [[package]] 63 | name = "byteorder" 64 | version = "1.3.4" 65 | source = "registry+https://github.com/rust-lang/crates.io-index" 66 | checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" 67 | 68 | [[package]] 69 | name = "cast" 70 | version = "0.2.3" 71 | source = "registry+https://github.com/rust-lang/crates.io-index" 72 | checksum = "4b9434b9a5aa1450faa3f9cb14ea0e8c53bb5d2b3c1bfd1ab4fc03e9f33fbfb0" 73 | dependencies = [ 74 | "rustc_version", 75 | ] 76 | 77 | [[package]] 78 | name = "cfg-if" 79 | version = "0.1.10" 80 | source = "registry+https://github.com/rust-lang/crates.io-index" 81 | checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" 82 | 83 | [[package]] 84 | name = "clap" 85 | version = "2.33.1" 86 | source = "registry+https://github.com/rust-lang/crates.io-index" 87 | checksum = "bdfa80d47f954d53a35a64987ca1422f495b8d6483c0fe9f7117b36c2a792129" 88 | dependencies = [ 89 | "bitflags", 90 | "textwrap", 91 | "unicode-width", 92 | ] 93 | 94 | [[package]] 95 | name = "criterion" 96 | version = "0.3.2" 97 | source = "registry+https://github.com/rust-lang/crates.io-index" 98 | checksum = "63f696897c88b57f4ffe3c69d8e1a0613c7d0e6c4833363c8560fbde9c47b966" 99 | dependencies = [ 100 | "atty", 101 | "cast", 102 | "clap", 103 | "criterion-plot", 104 | "csv", 105 | "itertools", 106 | "lazy_static", 107 | "num-traits", 108 | "oorandom", 109 | "plotters", 110 | "rayon", 111 | "regex", 112 | "serde", 113 | "serde_derive", 114 | "serde_json", 115 | "tinytemplate", 116 | "walkdir", 117 | ] 118 | 119 | [[package]] 120 | name = "criterion-plot" 121 | version = "0.4.2" 122 | source = "registry+https://github.com/rust-lang/crates.io-index" 123 | checksum = "ddeaf7989f00f2e1d871a26a110f3ed713632feac17f65f03ca938c542618b60" 124 | dependencies = [ 125 | "cast", 126 | "itertools", 127 | ] 128 | 129 | [[package]] 130 | name = "crossbeam-deque" 131 | version = "0.7.3" 132 | source = "registry+https://github.com/rust-lang/crates.io-index" 133 | checksum = "9f02af974daeee82218205558e51ec8768b48cf524bd01d550abe5573a608285" 134 | dependencies = [ 135 | "crossbeam-epoch", 136 | "crossbeam-utils", 137 | "maybe-uninit", 138 | ] 139 | 140 | [[package]] 141 | name = "crossbeam-epoch" 142 | version = "0.8.2" 143 | source = "registry+https://github.com/rust-lang/crates.io-index" 144 | checksum = "058ed274caafc1f60c4997b5fc07bf7dc7cca454af7c6e81edffe5f33f70dace" 145 | dependencies = [ 146 | "autocfg", 147 | "cfg-if", 148 | "crossbeam-utils", 149 | "lazy_static", 150 | "maybe-uninit", 151 | "memoffset", 152 | "scopeguard", 153 | ] 154 | 155 | [[package]] 156 | name = "crossbeam-queue" 157 | version = "0.2.2" 158 | source = "registry+https://github.com/rust-lang/crates.io-index" 159 | checksum = "ab6bffe714b6bb07e42f201352c34f51fefd355ace793f9e638ebd52d23f98d2" 160 | dependencies = [ 161 | "cfg-if", 162 | "crossbeam-utils", 163 | ] 164 | 165 | [[package]] 166 | name = "crossbeam-utils" 167 | version = "0.7.2" 168 | source = "registry+https://github.com/rust-lang/crates.io-index" 169 | checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8" 170 | dependencies = [ 171 | "autocfg", 172 | "cfg-if", 173 | "lazy_static", 174 | ] 175 | 176 | [[package]] 177 | name = "csv" 178 | version = "1.1.3" 179 | source = "registry+https://github.com/rust-lang/crates.io-index" 180 | checksum = "00affe7f6ab566df61b4be3ce8cf16bc2576bca0963ceb0955e45d514bf9a279" 181 | dependencies = [ 182 | "bstr", 183 | "csv-core", 184 | "itoa", 185 | "ryu", 186 | "serde", 187 | ] 188 | 189 | [[package]] 190 | name = "csv-core" 191 | version = "0.1.10" 192 | source = "registry+https://github.com/rust-lang/crates.io-index" 193 | checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" 194 | dependencies = [ 195 | "memchr", 196 | ] 197 | 198 | [[package]] 199 | name = "either" 200 | version = "1.5.3" 201 | source = "registry+https://github.com/rust-lang/crates.io-index" 202 | checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" 203 | 204 | [[package]] 205 | name = "fixedbitset" 206 | version = "0.3.0" 207 | source = "registry+https://github.com/rust-lang/crates.io-index" 208 | checksum = "2fc4fcacf5cd3681968f6524ea159383132937739c6c40dabab9e37ed515911b" 209 | 210 | [[package]] 211 | name = "getrandom" 212 | version = "0.1.14" 213 | source = "registry+https://github.com/rust-lang/crates.io-index" 214 | checksum = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb" 215 | dependencies = [ 216 | "cfg-if", 217 | "libc", 218 | "wasi", 219 | ] 220 | 221 | [[package]] 222 | name = "hashbrown" 223 | version = "0.7.2" 224 | source = "registry+https://github.com/rust-lang/crates.io-index" 225 | checksum = "96282e96bfcd3da0d3aa9938bedf1e50df3269b6db08b4876d2da0bb1a0841cf" 226 | dependencies = [ 227 | "ahash", 228 | "autocfg", 229 | ] 230 | 231 | [[package]] 232 | name = "hermit-abi" 233 | version = "0.1.13" 234 | source = "registry+https://github.com/rust-lang/crates.io-index" 235 | checksum = "91780f809e750b0a89f5544be56617ff6b1227ee485bcb06ebe10cdf89bd3b71" 236 | dependencies = [ 237 | "libc", 238 | ] 239 | 240 | [[package]] 241 | name = "itertools" 242 | version = "0.9.0" 243 | source = "registry+https://github.com/rust-lang/crates.io-index" 244 | checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b" 245 | dependencies = [ 246 | "either", 247 | ] 248 | 249 | [[package]] 250 | name = "itoa" 251 | version = "0.4.5" 252 | source = "registry+https://github.com/rust-lang/crates.io-index" 253 | checksum = "b8b7a7c0c47db5545ed3fef7468ee7bb5b74691498139e4b3f6a20685dc6dd8e" 254 | 255 | [[package]] 256 | name = "js-sys" 257 | version = "0.3.40" 258 | source = "registry+https://github.com/rust-lang/crates.io-index" 259 | checksum = "ce10c23ad2ea25ceca0093bd3192229da4c5b3c0f2de499c1ecac0d98d452177" 260 | dependencies = [ 261 | "wasm-bindgen", 262 | ] 263 | 264 | [[package]] 265 | name = "lazy_static" 266 | version = "1.4.0" 267 | source = "registry+https://github.com/rust-lang/crates.io-index" 268 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 269 | 270 | [[package]] 271 | name = "libc" 272 | version = "0.2.71" 273 | source = "registry+https://github.com/rust-lang/crates.io-index" 274 | checksum = "9457b06509d27052635f90d6466700c65095fdf75409b3fbdd903e988b886f49" 275 | 276 | [[package]] 277 | name = "log" 278 | version = "0.4.8" 279 | source = "registry+https://github.com/rust-lang/crates.io-index" 280 | checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" 281 | dependencies = [ 282 | "cfg-if", 283 | ] 284 | 285 | [[package]] 286 | name = "maybe-uninit" 287 | version = "2.0.0" 288 | source = "registry+https://github.com/rust-lang/crates.io-index" 289 | checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00" 290 | 291 | [[package]] 292 | name = "memchr" 293 | version = "2.3.3" 294 | source = "registry+https://github.com/rust-lang/crates.io-index" 295 | checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" 296 | 297 | [[package]] 298 | name = "memoffset" 299 | version = "0.5.4" 300 | source = "registry+https://github.com/rust-lang/crates.io-index" 301 | checksum = "b4fc2c02a7e374099d4ee95a193111f72d2110197fe200272371758f6c3643d8" 302 | dependencies = [ 303 | "autocfg", 304 | ] 305 | 306 | [[package]] 307 | name = "num-traits" 308 | version = "0.2.11" 309 | source = "registry+https://github.com/rust-lang/crates.io-index" 310 | checksum = "c62be47e61d1842b9170f0fdeec8eba98e60e90e5446449a0545e5152acd7096" 311 | dependencies = [ 312 | "autocfg", 313 | ] 314 | 315 | [[package]] 316 | name = "num_cpus" 317 | version = "1.13.0" 318 | source = "registry+https://github.com/rust-lang/crates.io-index" 319 | checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" 320 | dependencies = [ 321 | "hermit-abi", 322 | "libc", 323 | ] 324 | 325 | [[package]] 326 | name = "oorandom" 327 | version = "11.1.1" 328 | source = "registry+https://github.com/rust-lang/crates.io-index" 329 | checksum = "94af325bc33c7f60191be4e2c984d48aaa21e2854f473b85398344b60c9b6358" 330 | 331 | [[package]] 332 | name = "plotters" 333 | version = "0.2.15" 334 | source = "registry+https://github.com/rust-lang/crates.io-index" 335 | checksum = "0d1685fbe7beba33de0330629da9d955ac75bd54f33d7b79f9a895590124f6bb" 336 | dependencies = [ 337 | "js-sys", 338 | "num-traits", 339 | "wasm-bindgen", 340 | "web-sys", 341 | ] 342 | 343 | [[package]] 344 | name = "ppv-lite86" 345 | version = "0.2.8" 346 | source = "registry+https://github.com/rust-lang/crates.io-index" 347 | checksum = "237a5ed80e274dbc66f86bd59c1e25edc039660be53194b5fe0a482e0f2612ea" 348 | 349 | [[package]] 350 | name = "proc-macro2" 351 | version = "1.0.18" 352 | source = "registry+https://github.com/rust-lang/crates.io-index" 353 | checksum = "beae6331a816b1f65d04c45b078fd8e6c93e8071771f41b8163255bbd8d7c8fa" 354 | dependencies = [ 355 | "unicode-xid", 356 | ] 357 | 358 | [[package]] 359 | name = "quote" 360 | version = "1.0.6" 361 | source = "registry+https://github.com/rust-lang/crates.io-index" 362 | checksum = "54a21852a652ad6f610c9510194f398ff6f8692e334fd1145fed931f7fbe44ea" 363 | dependencies = [ 364 | "proc-macro2", 365 | ] 366 | 367 | [[package]] 368 | name = "rand" 369 | version = "0.7.3" 370 | source = "registry+https://github.com/rust-lang/crates.io-index" 371 | checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" 372 | dependencies = [ 373 | "getrandom", 374 | "libc", 375 | "rand_chacha", 376 | "rand_core", 377 | "rand_hc", 378 | ] 379 | 380 | [[package]] 381 | name = "rand_chacha" 382 | version = "0.2.2" 383 | source = "registry+https://github.com/rust-lang/crates.io-index" 384 | checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" 385 | dependencies = [ 386 | "ppv-lite86", 387 | "rand_core", 388 | ] 389 | 390 | [[package]] 391 | name = "rand_core" 392 | version = "0.5.1" 393 | source = "registry+https://github.com/rust-lang/crates.io-index" 394 | checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" 395 | dependencies = [ 396 | "getrandom", 397 | ] 398 | 399 | [[package]] 400 | name = "rand_hc" 401 | version = "0.2.0" 402 | source = "registry+https://github.com/rust-lang/crates.io-index" 403 | checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" 404 | dependencies = [ 405 | "rand_core", 406 | ] 407 | 408 | [[package]] 409 | name = "rayon" 410 | version = "1.3.0" 411 | source = "registry+https://github.com/rust-lang/crates.io-index" 412 | checksum = "db6ce3297f9c85e16621bb8cca38a06779ffc31bb8184e1be4bed2be4678a098" 413 | dependencies = [ 414 | "crossbeam-deque", 415 | "either", 416 | "rayon-core", 417 | ] 418 | 419 | [[package]] 420 | name = "rayon-core" 421 | version = "1.7.0" 422 | source = "registry+https://github.com/rust-lang/crates.io-index" 423 | checksum = "08a89b46efaf957e52b18062fb2f4660f8b8a4dde1807ca002690868ef2c85a9" 424 | dependencies = [ 425 | "crossbeam-deque", 426 | "crossbeam-queue", 427 | "crossbeam-utils", 428 | "lazy_static", 429 | "num_cpus", 430 | ] 431 | 432 | [[package]] 433 | name = "regex" 434 | version = "1.3.9" 435 | source = "registry+https://github.com/rust-lang/crates.io-index" 436 | checksum = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6" 437 | dependencies = [ 438 | "regex-syntax", 439 | ] 440 | 441 | [[package]] 442 | name = "regex-automata" 443 | version = "0.1.9" 444 | source = "registry+https://github.com/rust-lang/crates.io-index" 445 | checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4" 446 | dependencies = [ 447 | "byteorder", 448 | ] 449 | 450 | [[package]] 451 | name = "regex-syntax" 452 | version = "0.6.18" 453 | source = "registry+https://github.com/rust-lang/crates.io-index" 454 | checksum = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8" 455 | 456 | [[package]] 457 | name = "rustc_version" 458 | version = "0.2.3" 459 | source = "registry+https://github.com/rust-lang/crates.io-index" 460 | checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" 461 | dependencies = [ 462 | "semver", 463 | ] 464 | 465 | [[package]] 466 | name = "ryu" 467 | version = "1.0.5" 468 | source = "registry+https://github.com/rust-lang/crates.io-index" 469 | checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" 470 | 471 | [[package]] 472 | name = "same-file" 473 | version = "1.0.6" 474 | source = "registry+https://github.com/rust-lang/crates.io-index" 475 | checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" 476 | dependencies = [ 477 | "winapi-util", 478 | ] 479 | 480 | [[package]] 481 | name = "scopeguard" 482 | version = "1.1.0" 483 | source = "registry+https://github.com/rust-lang/crates.io-index" 484 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" 485 | 486 | [[package]] 487 | name = "semver" 488 | version = "0.9.0" 489 | source = "registry+https://github.com/rust-lang/crates.io-index" 490 | checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" 491 | dependencies = [ 492 | "semver-parser", 493 | ] 494 | 495 | [[package]] 496 | name = "semver-parser" 497 | version = "0.7.0" 498 | source = "registry+https://github.com/rust-lang/crates.io-index" 499 | checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" 500 | 501 | [[package]] 502 | name = "serde" 503 | version = "1.0.111" 504 | source = "registry+https://github.com/rust-lang/crates.io-index" 505 | checksum = "c9124df5b40cbd380080b2cc6ab894c040a3070d995f5c9dc77e18c34a8ae37d" 506 | 507 | [[package]] 508 | name = "serde_derive" 509 | version = "1.0.111" 510 | source = "registry+https://github.com/rust-lang/crates.io-index" 511 | checksum = "3f2c3ac8e6ca1e9c80b8be1023940162bf81ae3cffbb1809474152f2ce1eb250" 512 | dependencies = [ 513 | "proc-macro2", 514 | "quote", 515 | "syn", 516 | ] 517 | 518 | [[package]] 519 | name = "serde_json" 520 | version = "1.0.53" 521 | source = "registry+https://github.com/rust-lang/crates.io-index" 522 | checksum = "993948e75b189211a9b31a7528f950c6adc21f9720b6438ff80a7fa2f864cea2" 523 | dependencies = [ 524 | "itoa", 525 | "ryu", 526 | "serde", 527 | ] 528 | 529 | [[package]] 530 | name = "syn" 531 | version = "1.0.30" 532 | source = "registry+https://github.com/rust-lang/crates.io-index" 533 | checksum = "93a56fabc59dce20fe48b6c832cc249c713e7ed88fa28b0ee0a3bfcaae5fe4e2" 534 | dependencies = [ 535 | "proc-macro2", 536 | "quote", 537 | "unicode-xid", 538 | ] 539 | 540 | [[package]] 541 | name = "textwrap" 542 | version = "0.11.0" 543 | source = "registry+https://github.com/rust-lang/crates.io-index" 544 | checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" 545 | dependencies = [ 546 | "unicode-width", 547 | ] 548 | 549 | [[package]] 550 | name = "tinytemplate" 551 | version = "1.1.0" 552 | source = "registry+https://github.com/rust-lang/crates.io-index" 553 | checksum = "6d3dc76004a03cec1c5932bca4cdc2e39aaa798e3f82363dd94f9adf6098c12f" 554 | dependencies = [ 555 | "serde", 556 | "serde_json", 557 | ] 558 | 559 | [[package]] 560 | name = "unicode-width" 561 | version = "0.1.7" 562 | source = "registry+https://github.com/rust-lang/crates.io-index" 563 | checksum = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479" 564 | 565 | [[package]] 566 | name = "unicode-xid" 567 | version = "0.2.0" 568 | source = "registry+https://github.com/rust-lang/crates.io-index" 569 | checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" 570 | 571 | [[package]] 572 | name = "walkdir" 573 | version = "2.3.1" 574 | source = "registry+https://github.com/rust-lang/crates.io-index" 575 | checksum = "777182bc735b6424e1a57516d35ed72cb8019d85c8c9bf536dccb3445c1a2f7d" 576 | dependencies = [ 577 | "same-file", 578 | "winapi", 579 | "winapi-util", 580 | ] 581 | 582 | [[package]] 583 | name = "wasi" 584 | version = "0.9.0+wasi-snapshot-preview1" 585 | source = "registry+https://github.com/rust-lang/crates.io-index" 586 | checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" 587 | 588 | [[package]] 589 | name = "wasm-bindgen" 590 | version = "0.2.63" 591 | source = "registry+https://github.com/rust-lang/crates.io-index" 592 | checksum = "4c2dc4aa152834bc334f506c1a06b866416a8b6697d5c9f75b9a689c8486def0" 593 | dependencies = [ 594 | "cfg-if", 595 | "wasm-bindgen-macro", 596 | ] 597 | 598 | [[package]] 599 | name = "wasm-bindgen-backend" 600 | version = "0.2.63" 601 | source = "registry+https://github.com/rust-lang/crates.io-index" 602 | checksum = "ded84f06e0ed21499f6184df0e0cb3494727b0c5da89534e0fcc55c51d812101" 603 | dependencies = [ 604 | "bumpalo", 605 | "lazy_static", 606 | "log", 607 | "proc-macro2", 608 | "quote", 609 | "syn", 610 | "wasm-bindgen-shared", 611 | ] 612 | 613 | [[package]] 614 | name = "wasm-bindgen-macro" 615 | version = "0.2.63" 616 | source = "registry+https://github.com/rust-lang/crates.io-index" 617 | checksum = "838e423688dac18d73e31edce74ddfac468e37b1506ad163ffaf0a46f703ffe3" 618 | dependencies = [ 619 | "quote", 620 | "wasm-bindgen-macro-support", 621 | ] 622 | 623 | [[package]] 624 | name = "wasm-bindgen-macro-support" 625 | version = "0.2.63" 626 | source = "registry+https://github.com/rust-lang/crates.io-index" 627 | checksum = "3156052d8ec77142051a533cdd686cba889537b213f948cd1d20869926e68e92" 628 | dependencies = [ 629 | "proc-macro2", 630 | "quote", 631 | "syn", 632 | "wasm-bindgen-backend", 633 | "wasm-bindgen-shared", 634 | ] 635 | 636 | [[package]] 637 | name = "wasm-bindgen-shared" 638 | version = "0.2.63" 639 | source = "registry+https://github.com/rust-lang/crates.io-index" 640 | checksum = "c9ba19973a58daf4db6f352eda73dc0e289493cd29fb2632eb172085b6521acd" 641 | 642 | [[package]] 643 | name = "web-sys" 644 | version = "0.3.40" 645 | source = "registry+https://github.com/rust-lang/crates.io-index" 646 | checksum = "7b72fe77fd39e4bd3eaa4412fd299a0be6b3dfe9d2597e2f1c20beb968f41d17" 647 | dependencies = [ 648 | "js-sys", 649 | "wasm-bindgen", 650 | ] 651 | 652 | [[package]] 653 | name = "winapi" 654 | version = "0.3.8" 655 | source = "registry+https://github.com/rust-lang/crates.io-index" 656 | checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" 657 | dependencies = [ 658 | "winapi-i686-pc-windows-gnu", 659 | "winapi-x86_64-pc-windows-gnu", 660 | ] 661 | 662 | [[package]] 663 | name = "winapi-i686-pc-windows-gnu" 664 | version = "0.4.0" 665 | source = "registry+https://github.com/rust-lang/crates.io-index" 666 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 667 | 668 | [[package]] 669 | name = "winapi-util" 670 | version = "0.1.5" 671 | source = "registry+https://github.com/rust-lang/crates.io-index" 672 | checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" 673 | dependencies = [ 674 | "winapi", 675 | ] 676 | 677 | [[package]] 678 | name = "winapi-x86_64-pc-windows-gnu" 679 | version = "0.4.0" 680 | source = "registry+https://github.com/rust-lang/crates.io-index" 681 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 682 | -------------------------------------------------------------------------------- /src/byte_set/main_impl.rs: -------------------------------------------------------------------------------- 1 | use super::{chunk_index_and_shift, ByteSet, Chunk}; 2 | use crate::chunk; 3 | use core::ops; 4 | 5 | // Makes `ByteSet::{rand,try_rand}` simpler to express. 6 | #[cfg(feature = "rand")] 7 | use rand as rand_core; 8 | 9 | impl ByteSet { 10 | /// Returns a set containing no bytes. 11 | #[inline] 12 | #[must_use] 13 | pub const fn new() -> Self { 14 | Self([0; Self::NUM_SLOTS]) 15 | } 16 | 17 | /// Returns a set containing all bytes (0-255). 18 | #[inline] 19 | #[must_use] 20 | pub const fn full() -> Self { 21 | Self([Chunk::max_value(); Self::NUM_SLOTS]) 22 | } 23 | 24 | /// Returns a set containing only `byte`. 25 | /// 26 | /// # Examples 27 | /// 28 | /// ``` 29 | /// # use byte_set::ByteSet; 30 | /// let byte = 42; 31 | /// let set = ByteSet::from_byte(byte); 32 | /// 33 | /// assert_eq!(set.first(), Some(byte)); 34 | /// assert_eq!(set.last(), Some(byte)); 35 | /// ``` 36 | #[inline] 37 | #[must_use] 38 | pub const fn from_byte(byte: u8) -> Self { 39 | byte_set![byte] 40 | } 41 | 42 | /// Construct a ByteSet from a `RangeTo` value, i.e. `..x` 43 | #[inline] 44 | #[must_use] 45 | pub const fn from_range_to(range: ops::RangeTo) -> Self { 46 | const fn chunk_for( 47 | this_chunk: usize, 48 | byte_chunk: usize, 49 | shift: usize, 50 | ) -> Chunk { 51 | // the following code is equivalent to 52 | // if this_chunk == byte_chunk { 53 | // value 54 | // } else if this_chunk < byte_chunk { 55 | // Chunk::max_value() 56 | // } else { 57 | // 0 58 | // } 59 | // 60 | // Once `if` works in const, this can be cleaned up 61 | // https://github.com/rust-lang/rust/pull/72437 62 | let value: Chunk = (1 << shift) - 1; 63 | let is_equal = (this_chunk == byte_chunk) as usize; 64 | let is_less_than = (this_chunk < byte_chunk) as usize; 65 | let if_unequal = [0, Chunk::max_value()][is_less_than]; 66 | 67 | [if_unequal, value][is_equal] 68 | } 69 | let (index, shift) = chunk_index_and_shift(range.end); 70 | #[cfg(target_pointer_width = "64")] 71 | let array = [ 72 | chunk_for(0, index, shift), 73 | chunk_for(1, index, shift), 74 | chunk_for(2, index, shift), 75 | chunk_for(3, index, shift), 76 | ]; 77 | #[cfg(not(target_pointer_width = "64"))] 78 | let array = [ 79 | chunk_for(0, index, shift), 80 | chunk_for(1, index, shift), 81 | chunk_for(2, index, shift), 82 | chunk_for(3, index, shift), 83 | chunk_for(4, index, shift), 84 | chunk_for(5, index, shift), 85 | chunk_for(6, index, shift), 86 | chunk_for(7, index, shift), 87 | ]; 88 | ByteSet(array) 89 | } 90 | 91 | /// Construct a ByteSet from a `RangeToInclusive` value, i.e. `..=x` 92 | #[inline] 93 | #[must_use] 94 | pub const fn from_range_to_inclusive( 95 | range: ops::RangeToInclusive, 96 | ) -> Self { 97 | [ 98 | Self::full(), 99 | Self::from_range_to(..(range.end.wrapping_add(1))), 100 | ][(range.end != 255) as usize] 101 | } 102 | 103 | /// Construct a ByteSet from a `RangeFrom` value, i.e. `x..` 104 | #[inline] 105 | #[must_use] 106 | pub const fn from_range_from(range: ops::RangeFrom) -> Self { 107 | Self::from_range_to(..range.start).not() 108 | } 109 | 110 | /// Construct a ByteSet from a `RangeToInclusive` value, i.e. `x..y` 111 | #[inline] 112 | #[must_use] 113 | pub const fn from_range(range: ops::Range) -> Self { 114 | Self::from_range_from(range.start..) 115 | .intersection(Self::from_range_to(..range.end)) 116 | } 117 | 118 | /// Construct a ByteSet from a `RangeInclusive` value, i.e. `x..=y` 119 | #[inline] 120 | #[must_use] 121 | pub const fn from_range_inclusive(range: ops::RangeInclusive) -> Self { 122 | Self::from_range_from(*range.start()..) 123 | .intersection(Self::from_range_to_inclusive(..=*range.end())) 124 | } 125 | 126 | /// Returns a set containing uniformly-distributed random bytes from `rng`. 127 | /// 128 | /// This uses [`fill_bytes`] under the hood. 129 | /// 130 | /// [`fill_bytes`]: https://docs.rs/rand_core/0.5.*/rand_core/trait.RngCore.html#tymethod.fill_bytes 131 | #[cfg(any(feature = "rand", feature = "rand_core"))] 132 | #[cfg_attr(docsrs, doc(cfg(any(feature = "rand", feature = "rand_core"))))] 133 | #[inline] 134 | pub fn rand(mut rng: R) -> Self { 135 | let mut set = Self::new(); 136 | rng.fill_bytes(set.as_raw_bytes_mut()); 137 | set 138 | } 139 | 140 | /// Returns a set containing uniformly-distributed random bytes from `rng`, 141 | /// or `Err` if `rng` failed. 142 | /// 143 | /// This uses [`try_fill_bytes`] under the hood. 144 | /// 145 | /// [`try_fill_bytes`]: https://docs.rs/rand_core/0.5.*/rand_core/trait.RngCore.html#tymethod.try_fill_bytes 146 | #[cfg(any(feature = "rand", feature = "rand_core"))] 147 | #[cfg_attr(docsrs, doc(cfg(any(feature = "rand", feature = "rand_core"))))] 148 | #[inline] 149 | pub fn try_rand( 150 | mut rng: R, 151 | ) -> Result { 152 | let mut set = Self::new(); 153 | rng.try_fill_bytes(set.as_raw_bytes_mut())?; 154 | Ok(set) 155 | } 156 | 157 | /// Returns `true` if `self` contains no bytes. 158 | /// 159 | /// This is more efficient than checking `self.len() == 0`. 160 | #[inline] 161 | #[must_use] 162 | #[allow(clippy::let_and_return)] 163 | pub const fn is_empty(&self) -> bool { 164 | let is_empty = (self.0[0] == 0) 165 | & (self.0[1] == 0) 166 | & (self.0[2] == 0) 167 | & (self.0[3] == 0); 168 | 169 | #[cfg(not(target_pointer_width = "64"))] 170 | { 171 | is_empty 172 | & (self.0[4] == 0) 173 | & (self.0[5] == 0) 174 | & (self.0[6] == 0) 175 | & (self.0[7] == 0) 176 | } 177 | 178 | #[cfg(target_pointer_width = "64")] 179 | is_empty 180 | } 181 | 182 | /// Returns `true` if `self` contains all bytes. 183 | /// 184 | /// This is more efficient than checking `self.len() == 256`. 185 | #[inline] 186 | #[must_use] 187 | #[allow(clippy::let_and_return)] 188 | pub const fn is_full(&self) -> bool { 189 | let is_full = (self.0[0] == !0) 190 | & (self.0[1] == !0) 191 | & (self.0[2] == !0) 192 | & (self.0[3] == !0); 193 | 194 | #[cfg(not(target_pointer_width = "64"))] 195 | { 196 | is_full 197 | & (self.0[4] == !0) 198 | & (self.0[5] == !0) 199 | & (self.0[6] == !0) 200 | & (self.0[7] == !0) 201 | } 202 | 203 | #[cfg(target_pointer_width = "64")] 204 | is_full 205 | } 206 | 207 | /// Returns the number of bytes contained in `self`. 208 | #[cfg_attr(target_feature = "popcnt", inline)] 209 | #[must_use] 210 | #[allow(clippy::let_and_return)] 211 | pub const fn len(&self) -> usize { 212 | let len = (self.0[0].count_ones() as usize) 213 | + (self.0[1].count_ones() as usize) 214 | + (self.0[2].count_ones() as usize) 215 | + (self.0[3].count_ones() as usize); 216 | 217 | #[cfg(not(target_pointer_width = "64"))] 218 | { 219 | len + (self.0[4].count_ones() as usize) 220 | + (self.0[5].count_ones() as usize) 221 | + (self.0[6].count_ones() as usize) 222 | + (self.0[7].count_ones() as usize) 223 | } 224 | 225 | #[cfg(target_pointer_width = "64")] 226 | len 227 | } 228 | 229 | /// Removes all bytes from `self`. 230 | #[inline] 231 | pub fn clear(&mut self) { 232 | *self = ByteSet::new(); 233 | } 234 | 235 | /// Returns the first (least) byte in `self`, or `None` if `self` is empty. 236 | pub fn first(&self) -> Option { 237 | for (i, &chunk) in self.0.iter().enumerate() { 238 | if let Some(lsb) = chunk::lsb(chunk) { 239 | return Some(lsb + (i * chunk::INDEX_OFFSET) as u8); 240 | } 241 | } 242 | None 243 | } 244 | 245 | /// Removes the first (least) byte in `self` and returns it, or `None` if 246 | /// `self` is empty. 247 | pub fn pop_first(&mut self) -> Option { 248 | for (i, chunk) in self.0.iter_mut().enumerate() { 249 | if let Some(lsb) = chunk::pop_lsb(chunk) { 250 | return Some(lsb + (i * chunk::INDEX_OFFSET) as u8); 251 | } 252 | } 253 | None 254 | } 255 | 256 | /// Returns the last (greatest) byte in `self`, or `None` if `self` is 257 | /// empty. 258 | pub fn last(&self) -> Option { 259 | for (i, &chunk) in self.0.iter().rev().enumerate() { 260 | if let Some(msb) = chunk::msb(chunk) { 261 | let i = Self::LAST_SLOT_INDEX - i; 262 | return Some(msb + (i * chunk::INDEX_OFFSET) as u8); 263 | } 264 | } 265 | None 266 | } 267 | 268 | /// Removes the last (least) byte in `self` and returns it, or `None` if 269 | /// `self` is empty. 270 | pub fn pop_last(&mut self) -> Option { 271 | for (i, chunk) in self.0.iter_mut().rev().enumerate() { 272 | if let Some(msb) = chunk::pop_msb(chunk) { 273 | let i = Self::LAST_SLOT_INDEX - i; 274 | return Some(msb + (i * chunk::INDEX_OFFSET) as u8); 275 | } 276 | } 277 | None 278 | } 279 | 280 | /// Inserts `byte` into `self` in-place. 281 | /// 282 | /// Unlike [`HashSet::insert`] and [`BTreeSet::insert`], this does not 283 | /// return a `bool` for whether `byte` was not present. This is because it's 284 | /// just as efficient to call [`contains`](#method.contains) before. 285 | /// 286 | /// [`HashSet::insert`]: https://doc.rust-lang.org/std/collections/struct.HashSet.html#method.insert 287 | /// [`BTreeSet::insert`]: https://doc.rust-lang.org/std/collections/struct.BTreeSet.html#method.insert 288 | #[inline] 289 | pub fn insert(&mut self, byte: u8) { 290 | let (index, shift) = chunk_index_and_shift(byte); 291 | 292 | self.0[index] |= 1 << shift; 293 | } 294 | 295 | /// Inserts all bytes of `other` into `self` in-place. 296 | #[inline] 297 | pub fn insert_all(&mut self, other: Self) { 298 | self.0[0] |= other.0[0]; 299 | self.0[1] |= other.0[1]; 300 | self.0[2] |= other.0[2]; 301 | self.0[3] |= other.0[3]; 302 | 303 | #[cfg(not(target_pointer_width = "64"))] 304 | { 305 | self.0[4] |= other.0[4]; 306 | self.0[5] |= other.0[5]; 307 | self.0[6] |= other.0[6]; 308 | self.0[7] |= other.0[7]; 309 | } 310 | } 311 | 312 | /// Returns a copy of `self` with `byte` inserted. 313 | #[inline] 314 | #[must_use] 315 | pub const fn inserting(mut self, byte: u8) -> Self { 316 | let (index, shift) = chunk_index_and_shift(byte); 317 | 318 | self.0[index] |= 1 << shift; 319 | self 320 | } 321 | 322 | /// Returns a copy of `self` with all of `other` inserted. 323 | /// 324 | /// This is equivalent to the [`union`](#method.union) method. 325 | #[inline] 326 | #[must_use] 327 | pub const fn inserting_all(self, other: Self) -> Self { 328 | self.union(other) 329 | } 330 | 331 | /// Removes `byte` from `self` in-place. 332 | /// 333 | /// Unlike [`HashSet::remove`] and [`BTreeSet::remove`], this does not 334 | /// return a `bool` for whether `byte` was present. This is because it's 335 | /// just as efficient to call [`contains`](#method.contains) before. 336 | /// 337 | /// [`HashSet::remove`]: https://doc.rust-lang.org/std/collections/struct.HashSet.html#method.remove 338 | /// [`BTreeSet::remove`]: https://doc.rust-lang.org/std/collections/struct.BTreeSet.html#method.remove 339 | #[inline] 340 | pub fn remove(&mut self, byte: u8) { 341 | let (index, shift) = chunk_index_and_shift(byte); 342 | 343 | self.0[index] &= !(1 << shift); 344 | } 345 | 346 | /// Removes all bytes of `other` from `self` in-place. 347 | #[inline] 348 | pub fn remove_all(&mut self, other: Self) { 349 | *self &= !other; 350 | } 351 | 352 | /// Returns a copy of `self` with `byte` removed. 353 | #[inline] 354 | #[must_use] 355 | pub const fn removing(mut self, byte: u8) -> Self { 356 | let (index, shift) = chunk_index_and_shift(byte); 357 | 358 | self.0[index] &= !(1 << shift); 359 | self 360 | } 361 | 362 | /// Returns a copy of `self` with `byte` removed. 363 | #[inline] 364 | #[must_use] 365 | pub const fn removing_all(self, other: Self) -> Self { 366 | self.difference(other) 367 | } 368 | 369 | /// Sets `byte` in `self` to `enabled` in-place. 370 | #[inline] 371 | pub fn set(&mut self, byte: u8, enabled: bool) { 372 | let (index, shift) = chunk_index_and_shift(byte); 373 | let chunk = self.0[index]; 374 | 375 | self.0[index] = (chunk & !(1 << shift)) | ((enabled as Chunk) << shift); 376 | } 377 | 378 | /// Returns a copy of `self` with `byte` set to `enabled`. 379 | #[inline] 380 | #[must_use] 381 | pub const fn setting(mut self, byte: u8, enabled: bool) -> Self { 382 | let (index, shift) = chunk_index_and_shift(byte); 383 | let chunk = self.0[index]; 384 | 385 | self.0[index] = (chunk & !(1 << shift)) | ((enabled as Chunk) << shift); 386 | self 387 | } 388 | 389 | /// Returns `true` if `byte` is contained in `self`. 390 | #[inline] 391 | #[must_use] 392 | pub const fn contains(&self, byte: u8) -> bool { 393 | let (index, shift) = chunk_index_and_shift(byte); 394 | 395 | self.0[index] & (1 << shift) != 0 396 | } 397 | 398 | #[inline] 399 | #[must_use] 400 | const fn chunk_and_or(&self, other: &Self) -> Chunk { 401 | map_reduce_chunks!(self, other, &, |) 402 | } 403 | 404 | /// Returns `true` if `self` contains any bytes in `other`. 405 | #[inline] 406 | #[must_use] 407 | // Not `const` because it may be later improved with SIMD intrinsics. 408 | pub fn contains_any(&self, other: &Self) -> bool { 409 | self.chunk_and_or(other) != 0 410 | } 411 | 412 | #[inline] 413 | pub(crate) const fn _is_subset(&self, other: &Self) -> bool { 414 | self.intersection(*other).eq(self) 415 | } 416 | 417 | /// Returns `true` if `other` contains all bytes in `self`. 418 | #[inline] 419 | #[must_use] 420 | // Not `const` because it may be later improved with SIMD intrinsics. 421 | pub fn is_subset(&self, other: &Self) -> bool { 422 | self._is_subset(other) 423 | } 424 | 425 | /// Returns `true` if `other` contains all bytes in `self` and at least one 426 | /// other byte not contained in `self`. 427 | /// 428 | /// This is also known as a "proper subset". 429 | #[must_use] 430 | // Not inlined because lots of code is generated on x86. 431 | // Not `const` because it may be later improved with SIMD intrinsics. 432 | pub fn is_strict_subset(&self, other: &Self) -> bool { 433 | // On x86, checking inequality first produces less code and uses fewer 434 | // registers. 435 | self.ne(other) && self.is_subset(other) 436 | } 437 | 438 | /// Returns `true` if `self` contains all bytes in `other`. 439 | #[inline] 440 | #[must_use] 441 | pub fn is_superset(&self, other: &Self) -> bool { 442 | other.is_subset(self) 443 | } 444 | 445 | /// Returns `true` if `self` contains all bytes in `other` and at least one 446 | /// other byte not contained in `other`. 447 | /// 448 | /// This is also known as a "proper superset". 449 | #[inline] 450 | #[must_use] 451 | pub fn is_strict_superset(&self, other: &Self) -> bool { 452 | other.is_strict_subset(self) 453 | } 454 | 455 | /// Returns `true` if `self` and `other` have no bytes in common. 456 | #[inline] 457 | #[must_use] 458 | // Not `const` because it may be later improved with SIMD intrinsics. 459 | pub fn is_disjoint(&self, other: &Self) -> bool { 460 | self.intersection(*other).is_empty() 461 | } 462 | 463 | /// Returns a set with the bytes contained in `self`, but not in `other`. 464 | #[inline] 465 | #[must_use] 466 | pub const fn difference(self, other: Self) -> Self { 467 | self.intersection(other.not()) 468 | } 469 | 470 | /// Returns a set with the bytes contained in `self` or `other`, but not in 471 | /// both. 472 | #[inline] 473 | #[must_use] 474 | pub const fn symmetric_difference(self, other: Self) -> Self { 475 | map_chunks!(self, ^, other) 476 | } 477 | 478 | /// Returns a set with the bytes contained both in `self` and `other`. 479 | #[inline] 480 | #[must_use] 481 | pub const fn intersection(self, other: Self) -> Self { 482 | map_chunks!(self, &, other) 483 | } 484 | 485 | /// Returns a new set with the bytes contained in `self` or `other`. 486 | #[inline] 487 | #[must_use] 488 | pub const fn union(self, other: Self) -> Self { 489 | map_chunks!(self, |, other) 490 | } 491 | 492 | /// Returns a new set with the bytes not contained in `self`. 493 | /// 494 | /// This exists because the [`Not`] trait cannot be used in `const` yet. 495 | /// 496 | /// [`Not`]: https://doc.rust-lang.org/std/ops/trait.Not.html 497 | #[inline] 498 | #[must_use] 499 | #[allow(clippy::should_implement_trait)] 500 | pub const fn not(self) -> Self { 501 | map_chunks!(self, !) 502 | } 503 | 504 | /// Returns `self` with its bits reversed. 505 | /// 506 | /// This is equivalent to checking for `255 - b` in all subsequent searches 507 | /// of `b`. 508 | #[must_use] 509 | // The `rbit` instruction makes this reasonable to inline. 510 | #[cfg_attr(target_arch = "aarch64", inline)] 511 | // Not inlined because lots of code is generated on x86. 512 | pub const fn reverse_bits(self) -> Self { 513 | Self([ 514 | #[cfg(not(target_pointer_width = "64"))] 515 | self.0[7].reverse_bits(), 516 | #[cfg(not(target_pointer_width = "64"))] 517 | self.0[6].reverse_bits(), 518 | #[cfg(not(target_pointer_width = "64"))] 519 | self.0[5].reverse_bits(), 520 | #[cfg(not(target_pointer_width = "64"))] 521 | self.0[4].reverse_bits(), 522 | self.0[3].reverse_bits(), 523 | self.0[2].reverse_bits(), 524 | self.0[1].reverse_bits(), 525 | self.0[0].reverse_bits(), 526 | ]) 527 | } 528 | 529 | /// Returns `true` if `self` and `other` contain the same bytes. 530 | /// 531 | /// This exists because `PartialEq` is currently unstable in `const`. 532 | #[inline] 533 | #[must_use] 534 | #[allow(clippy::should_implement_trait)] 535 | #[allow(clippy::let_and_return)] 536 | pub const fn eq(&self, other: &Self) -> bool { 537 | let eq = (self.0[0] == other.0[0]) 538 | & (self.0[1] == other.0[1]) 539 | & (self.0[2] == other.0[2]) 540 | & (self.0[3] == other.0[3]); 541 | 542 | #[cfg(not(target_pointer_width = "64"))] 543 | { 544 | eq & (self.0[4] == other.0[4]) 545 | & (self.0[5] == other.0[5]) 546 | & (self.0[6] == other.0[6]) 547 | & (self.0[7] == other.0[7]) 548 | } 549 | 550 | #[cfg(target_pointer_width = "64")] 551 | eq 552 | } 553 | 554 | /// Returns `true` if `self` and `other` do not contain the same bytes. 555 | /// 556 | /// This exists because `PartialEq` is currently unstable in `const`. 557 | #[inline] 558 | #[must_use] 559 | #[allow(clippy::should_implement_trait)] 560 | pub const fn ne(&self, other: &Self) -> bool { 561 | !self.eq(other) 562 | } 563 | } 564 | --------------------------------------------------------------------------------