├── .github ├── dependabot.yml └── workflows │ └── CI.yml ├── .gitignore ├── Cargo.toml ├── LICENSE.txt ├── benches └── kensler.rs ├── docs └── README.md └── src ├── error.rs ├── iterator.rs ├── kensler.rs └── lib.rs /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: cargo 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | time: "13:00" 8 | open-pull-requests-limit: 10 9 | -------------------------------------------------------------------------------- /.github/workflows/CI.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | name: Build ${{ matrix.os }} 15 | runs-on: ${{ matrix.os }} 16 | strategy: 17 | matrix: 18 | os: 19 | - macos-latest 20 | - windows-latest 21 | - ubuntu-latest 22 | steps: 23 | - uses: actions/checkout@v2 24 | - name: Install rust 25 | uses: actions-rs/toolchain@v1 26 | with: 27 | toolchain: stable 28 | - name: Build 29 | uses: actions-rs/cargo@v1 30 | with: 31 | command: build 32 | args: --verbose --all-features 33 | - name: Test 34 | uses: actions-rs/cargo@v1 35 | with: 36 | command: test 37 | args: --verbose --all 38 | - uses: actions/cache@v2 39 | with: 40 | path: | 41 | ~/.cargo/registry 42 | ~/.cargo/git 43 | target 44 | key: ${{ matrix.job.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 45 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | Cargo.lock 4 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "hashed-permutation" 3 | description = "A fast, instant-access way to permute a range of numbers" 4 | repository = "https://github.com/afnanenayet/hashed-permutation.git" 5 | keywords = ["hashed", "permutation", "permute", "fast", "instant"] 6 | version = "3.0.3-alpha.0" 7 | authors = ["Afnan Enayet "] 8 | edition = "2018" 9 | license = "MIT" 10 | readme = "docs/README.md" 11 | resolver = "2" 12 | 13 | [badges] 14 | azure-devops = { project = "afnanenayet/hashed-permutation", pipeline = "afnanenayet.hashed-permutation" } 15 | maintenance = { status = "actively-developed" } 16 | 17 | [dependencies] 18 | rand = { version = "0.9", optional = true } 19 | thiserror = "1.0" 20 | 21 | [features] 22 | default = [] 23 | use-rand = ["rand"] 24 | 25 | [dev-dependencies] 26 | divan = "0.1.17" 27 | hashed-permutation = { path = ".", features = ["use-rand"] } 28 | 29 | [[bench]] 30 | name = "kensler" 31 | harness = false 32 | 33 | [profile.bench] 34 | debug = 2 35 | lto = "thin" 36 | codegen-units = 1 37 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2019 Afnan Enayet 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 7 | of the Software, and to permit persons to whom the Software is furnished to do 8 | so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /benches/kensler.rs: -------------------------------------------------------------------------------- 1 | use divan::counter::ItemsCount; 2 | use divan::{black_box_drop, Bencher}; 3 | use hashed_permutation::{HashedIter, HashedPermutation}; 4 | use rand::seq::SliceRandom; 5 | use rand::{rng, Rng}; 6 | use std::num::NonZeroU32; 7 | 8 | fn main() { 9 | divan::main(); 10 | } 11 | 12 | fn lens() -> impl IntoIterator { 13 | vec![1u32, 2, 4, 8, 16, 20, 21, 22] 14 | .iter() 15 | .map(|x| (1 << x) as u32) 16 | .collect::>() 17 | } 18 | 19 | /// Benchmarks by setting the size of the permutation vector using len_exp as the shift factor for 20 | /// the length. 21 | #[divan::bench(args = lens())] 22 | fn permutation(bencher: Bencher, length: u32) { 23 | let mut rng = rng(); 24 | let seed: u32 = rng.random(); 25 | let perm = HashedPermutation::new_with_seed(NonZeroU32::new(length).unwrap(), seed); 26 | let l: u32 = length.into(); 27 | bencher.counter(ItemsCount::new(l)).bench(|| { 28 | for i in 0..l { 29 | black_box_drop(perm.shuffle(i).unwrap()); 30 | } 31 | l 32 | }); 33 | } 34 | 35 | #[divan::bench(args = lens())] 36 | fn iterator(bencher: Bencher, length: u32) { 37 | let mut rng = rng(); 38 | let seed: u32 = rng.random(); 39 | let l: u32 = length.into(); 40 | bencher 41 | .counter(ItemsCount::new(l)) 42 | .with_inputs(|| HashedIter::new_with_seed(NonZeroU32::new(length).unwrap(), seed)) 43 | .bench_refs(|perm| { 44 | perm.for_each(black_box_drop); 45 | }); 46 | } 47 | 48 | #[divan::bench(args = lens())] 49 | fn naive_shuffle(bencher: Bencher, length: u32) { 50 | bencher 51 | .counter(ItemsCount::new(length)) 52 | .with_inputs(|| -> Vec { (0..length).collect::>() }) 53 | .bench_local_refs(|v| { 54 | let mut rng = rng(); 55 | v.shuffle(&mut rng); 56 | }) 57 | } 58 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # hashed-permutation 2 | 3 | [![CI](https://github.com/afnanenayet/hashed-permutation/actions/workflows/CI.yml/badge.svg)](https://github.com/afnanenayet/hashed-permutation/actions/workflows/CI.yml) 4 | [![crates badge](https://meritbadge.herokuapp.com/hashed-permutation)](https://crates.io/crates/hashed-permutation) 5 | [![Documentation](https://docs.rs/hashed-permutation/badge.svg)](https://docs.rs/hashed-permutation) 6 | ![License](https://img.shields.io/crates/l/hashed-permutation/1.0.0.svg) 7 | 8 | ## Synopsis 9 | 10 | This is an implementation of Andrew Kensler's hashed permutation, which allows 11 | you to take an array of the elements [0 ... n) and shuffle it with no memory 12 | overhead and very little computational overhead. This works by using a clever 13 | hash function to effectively permute all of the elements in the array. 14 | 15 | Basically, you get a nearly free method to shuffle a bunch of numbers that 16 | doesn't require you to allocate a vector of size `n`, letting you sample the 17 | set without replacement. 18 | 19 | You can find the paper here: https://graphics.pixar.com/library/MultiJitteredSampling/paper.pdf. 20 | I have a little writeup of how the algorithm works [here](https://afnan.io/posts/2019-04-05-explaining-the-hashed-permutation), 21 | and [Timothy Hobbs](https://github.com/timthelion) made a nice writeup explaining how to use the library itself 22 | [here](https://timothy.hobbs.cz/rust-play/hashed-permutation.html). 23 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | use thiserror::Error; 2 | 3 | /// The different types of errors that can arise from this crate 4 | #[derive(Debug, Error)] 5 | // We allow the name repetition because this struct will not make sense outside of the crate 6 | // otherwise, and this is exported as part of the library. 7 | #[allow(clippy::module_name_repetitions)] 8 | pub enum PermutationError { 9 | /// This error is invoked when the caller attempts to use an index on the `shuffle` method that 10 | /// is larger than the size of the set. 11 | /// 12 | /// The user can only shuffle indices that are within the set, otherwise the hashing algorithm 13 | /// does not work. `shuffle` is the index that the user called, and `max_shuffle` is the size 14 | /// of the permutation set (which is also the upper bound for the calling index). 15 | #[error("Attempted to shuffle index {shuffle}, but the length of the array is {max_shuffle}")] 16 | ShuffleOutOfRange { shuffle: u32, max_shuffle: u32 }, 17 | } 18 | 19 | /// A permutation result, which is simply an alias for any type that could return a permutation 20 | /// error. 21 | pub type PermutationResult = Result; 22 | -------------------------------------------------------------------------------- /src/iterator.rs: -------------------------------------------------------------------------------- 1 | use crate::HashedPermutation; 2 | use std::num::NonZeroU32; 3 | 4 | /// An iterator that allows you to iterate over a sequence of permuted numbers with O(1) space. 5 | pub struct HashedIter { 6 | /// The "engine" driving the permutations 7 | permutation_engine: HashedPermutation, 8 | 9 | /// The current index that's being iterated on 10 | current_idx: u32, 11 | } 12 | 13 | /// The iterator version of the hashed permutation algorithm 14 | /// 15 | /// This allows you to use an iterator as you would normally. 16 | /// 17 | /// ``` 18 | /// # use hashed_permutation::HashedIter; 19 | /// use std::num::NonZeroU32; 20 | /// 21 | /// let mut iterator = HashedIter::new_with_seed(NonZeroU32::new(5).unwrap(), 100); 22 | /// 23 | /// for i in iterator { 24 | /// println!("{}", i); 25 | /// } 26 | /// ``` 27 | impl HashedIter { 28 | /// Create a new hashed iterator with a given length 29 | /// 30 | /// This will create an iterator with an underlying `HashedPermutation` engine with a random 31 | /// seed. The seed is generated using the standard library's `thread_rng` class. 32 | #[cfg(feature = "use-rand")] 33 | pub fn new(length: NonZeroU32) -> Self { 34 | let permutation_engine = HashedPermutation::new(length); 35 | 36 | Self { 37 | permutation_engine, 38 | current_idx: 0, 39 | } 40 | } 41 | 42 | /// Create a new hashed iterator with a given length and a seed value 43 | pub fn new_with_seed(length: NonZeroU32, seed: u32) -> Self { 44 | let permutation_engine = HashedPermutation::new_with_seed(length, seed); 45 | 46 | Self { 47 | permutation_engine, 48 | current_idx: 0, 49 | } 50 | } 51 | } 52 | 53 | impl Iterator for HashedIter { 54 | type Item = u32; 55 | 56 | fn next(&mut self) -> Option { 57 | if self.current_idx >= self.permutation_engine.length.into() { 58 | return None; 59 | } 60 | let res = unsafe { 61 | self.permutation_engine 62 | .shuffle(self.current_idx) 63 | .unwrap_unchecked() 64 | }; 65 | self.current_idx += 1; 66 | Some(res) 67 | } 68 | } 69 | 70 | #[cfg(test)] 71 | mod test { 72 | use super::*; 73 | use std::collections::HashSet; 74 | 75 | /// A convenient helper method that returns a pair of lengths and seeds (in that order). 76 | /// 77 | /// This method defines the lengths and the seeds for the test cases, since these are reused 78 | /// in the tests, and it's best practice to consolidate them in one place so code is not 79 | /// repeated. 80 | fn lengths_and_seeds() -> (Vec, Vec) { 81 | let lengths: Vec = vec![100, 5, 13, 128, 249] 82 | .iter() 83 | .map(|&x| NonZeroU32::new(x).unwrap()) 84 | .collect(); 85 | let seeds = vec![100, 5, 13, 128, 249]; 86 | assert_eq!(lengths.len(), seeds.len()); 87 | (lengths, seeds) 88 | } 89 | 90 | #[test] 91 | // This method checks to see that a permutation does not have any collisions and that every 92 | // number maps to another unique number. In other words, we are testing to see whether we have 93 | // a bijective function. 94 | fn test_bijection() { 95 | let (lengths, seeds) = lengths_and_seeds(); 96 | 97 | for (&length, seed) in lengths.iter().zip(seeds) { 98 | let it = HashedIter::new_with_seed(length, seed); 99 | 100 | // Check that each entry doesn't exist 101 | // Check that every number is "hit" (as they'd have to be) for a perfect bijection 102 | // Check that the number is within range 103 | let mut set = HashSet::with_capacity(length.get() as usize); 104 | 105 | for elem in it { 106 | // Make sure there are no duplicates 107 | assert!(set.insert(elem)); 108 | } 109 | // Need to dereference the types into regular integers 110 | let mut result: Vec = set.into_iter().collect(); 111 | result.sort(); 112 | let expected: Vec = (0..length.get()).collect(); 113 | assert_eq!(expected, result); 114 | } 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /src/kensler.rs: -------------------------------------------------------------------------------- 1 | //! The module for the hashed permutation implementation and the struct that stores its state. 2 | //! 3 | //! This method was first conceived by Andrew Kensler of Pixar Research, and discussed in his 2013 4 | //! [paper](https://graphics.pixar.com/library/MultiJitteredSampling/paper.pdf) 5 | //! on correlated multi-jittered sampling. 6 | 7 | use crate::error::{PermutationError, PermutationResult}; 8 | use std::num::{NonZeroU32, Wrapping}; 9 | 10 | #[cfg(feature = "use-rand")] 11 | use rand::prelude::*; 12 | 13 | /// The `HashedPermutation` struct stores the initial `seed` and `length` of the permutation 14 | /// vector. In other words, if you want to shuffle the numbers from `0..n`, then `length = n`. 15 | /// 16 | /// Because the shuffle is performed using bit arithmetic, the fields have to be 32 bit integers. 17 | /// Unfortunately, larger types are not supported at this time. 18 | #[derive(Clone, Debug)] 19 | pub struct HashedPermutation { 20 | /// The random seed that dictates which permutation you want to use. The shuffle is 21 | /// deterministic, so using the same seed will yield the same permutation every time. 22 | pub seed: u32, 23 | 24 | /// The upper bound on the range of numbers to shuffle (from `0..length`). This value must be 25 | /// greater zero, otherwise undefined behavior may occur. 26 | pub length: NonZeroU32, 27 | } 28 | 29 | impl HashedPermutation { 30 | /// Create a new instance of the hashed permutation with a random seed. 31 | /// 32 | /// This method creates a hashed permutation of some length and initializes the seed to some 33 | /// random number created by Rust's `thread_rng`. 34 | #[cfg(feature = "use-rand")] 35 | pub fn new(length: NonZeroU32) -> Self { 36 | // Uses thread-rng under the hood 37 | let seed = rand::random(); 38 | HashedPermutation { length, seed } 39 | } 40 | 41 | /// Create a new instance of the hashed permutation given a length and seed 42 | pub fn new_with_seed(length: NonZeroU32, seed: u32) -> Self { 43 | HashedPermutation { seed, length } 44 | } 45 | 46 | /// Shuffle or permute a particular value. 47 | /// 48 | /// This method uses the technique described in Kensler's paper to perform an in-place shuffle 49 | /// with no memory overhead. 50 | // We disable the `unreadable_literal` because these literals are arbitrary and don't really 51 | // need to be readable anyways. 52 | #[allow(clippy::unreadable_literal)] 53 | pub fn shuffle(&self, input: u32) -> PermutationResult { 54 | if input >= self.length.get() { 55 | return Err(PermutationError::ShuffleOutOfRange { 56 | shuffle: input, 57 | max_shuffle: self.length.get(), 58 | }); 59 | } 60 | let mut i = Wrapping(input); 61 | let n = self.length.get(); 62 | let seed = Wrapping(self.seed); 63 | let w = Wrapping(n.checked_next_power_of_two().map_or(u32::MAX, |x| x - 1)); 64 | 65 | while i.0 >= n { 66 | i ^= seed; 67 | i *= 0xe170893d; 68 | i ^= seed >> 16; 69 | i ^= (i & w) >> 4; 70 | i ^= seed >> 8; 71 | i *= 0x0929eb3f; 72 | i ^= seed >> 23; 73 | i ^= (i & w) >> 1; 74 | i *= Wrapping(1) | seed >> 27; 75 | i *= 0x6935fa69; 76 | i ^= (i & w) >> 11; 77 | i *= 0x74dcb303; 78 | i ^= (i & w) >> 2; 79 | i *= 0x9e501cc3; 80 | i ^= (i & w) >> 2; 81 | i *= 0xc860a3df; 82 | i &= w; 83 | i ^= i >> 5; 84 | } 85 | Ok((i + seed).0 % n) 86 | } 87 | } 88 | 89 | #[cfg(test)] 90 | mod test { 91 | use super::*; 92 | use std::collections::HashMap; 93 | 94 | /// A convenient helper method that returns a pair of lengths and seeds (in that order). 95 | /// 96 | /// This method defines the lengths and the seeds for the test cases, since these are reused 97 | /// in the tests, and it's best practice to consolidate them in one place so code is not 98 | /// repeated. 99 | fn lengths_and_seeds() -> (Vec, Vec) { 100 | let lengths: Vec = vec![100, 5, 13, 128, 249] 101 | .iter() 102 | .map(|&x| NonZeroU32::new(x).unwrap()) 103 | .collect(); 104 | let seeds = vec![100, 5, 13, 128, 249]; 105 | assert_eq!(lengths.len(), seeds.len()); 106 | (lengths, seeds) 107 | } 108 | 109 | #[test] 110 | // This method is a sanity check that tests to see if a shuffle has points that all stay within 111 | // the domain that they are supposed to. 112 | fn test_domain() { 113 | let (lengths, seeds) = lengths_and_seeds(); 114 | 115 | for (&length, seed) in lengths.iter().zip(seeds) { 116 | let perm = HashedPermutation { seed, length }; 117 | 118 | for i in 0..perm.length.get() { 119 | let res = perm.shuffle(i); 120 | assert!(res.is_ok()); 121 | assert!(res.unwrap() < perm.length.get()); 122 | } 123 | } 124 | } 125 | 126 | #[test] 127 | // This method checks to see that a permutation does not have any collisions and that every 128 | // number maps to another unique number. In other words, we are testing to see whether we have 129 | // a bijective function. 130 | fn test_bijection() { 131 | let (lengths, seeds) = lengths_and_seeds(); 132 | 133 | for (length, seed) in lengths.iter().zip(seeds) { 134 | let perm = HashedPermutation { 135 | seed, 136 | length: *length, 137 | }; 138 | 139 | // Check that each entry doesn't exist 140 | // Check that every number is "hit" (as they'd have to be) for a perfect bijection 141 | // Check that the number is within range 142 | let mut map = HashMap::with_capacity(length.get() as usize); 143 | 144 | for i in 0..perm.length.get() { 145 | let res = perm.shuffle(i); 146 | let res = res.unwrap(); 147 | assert!(map.insert(res, i).is_none()); 148 | } 149 | let (mut keys_vec, mut vals_vec): (Vec, Vec) = map.iter().unzip(); 150 | keys_vec.sort(); 151 | vals_vec.sort(); 152 | let ground_truth: Vec = (0..length.get()).collect(); 153 | assert_eq!(ground_truth, keys_vec); 154 | assert_eq!(ground_truth, vals_vec); 155 | } 156 | } 157 | 158 | #[test] 159 | fn test_out_of_range() { 160 | let lengths: Vec = vec![1, 50, 256, 18] 161 | .iter() 162 | .map(|&x| NonZeroU32::new(x).unwrap()) 163 | .collect(); 164 | let offsets = vec![0, 1, 5, 15, 100]; 165 | 166 | for length in lengths { 167 | let perm = HashedPermutation { seed: 0, length }; 168 | 169 | for offset in &offsets { 170 | let result = perm.shuffle(length.get() + offset); 171 | assert!(result.is_err()); 172 | } 173 | } 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! This crate implements an algorithm that performs zero-cost permutations and shuffling on a 2 | //! range of numbers. 3 | //! 4 | //! This method, discovered by Andrew Kensler in 2013, uses bit-twiddling to permute a range of 5 | //! numbers, from `[0..n)` without needing to mutate state or store the whole range of numbers. It 6 | //! is extremely efficient, with no memory overhead (i.e. you don't have to store the whole range 7 | //! of numbers). 8 | //! 9 | //! This is effectively the same as taking some vector of numbers from `[0..n)`, randomly shuffling 10 | //! each element, and then calling the nth index of that vector. Kensler's algorithm offers a way 11 | //! to achieve the same effect, except we don't need to store a whole vector for that range of 12 | //! numbers. 13 | //! 14 | //! # Example Usage 15 | //! 16 | //! Using this library is fairly simple: 17 | //! 18 | //! ```rust 19 | //! # use hashed_permutation::HashedPermutation; 20 | //! use std::num::NonZeroU32; 21 | //! 22 | //! let perm = HashedPermutation { 23 | //! seed: 1234, 24 | //! length: NonZeroU32::new(10).unwrap(), 25 | //! }; 26 | //! 27 | //! // Let's pick a randomly permuted number 28 | //! let permuted_number = perm.shuffle(0).unwrap(); 29 | //! ``` 30 | //! 31 | //! ## Iterators 32 | //! 33 | //! You can also use this structure as an iterator to iterate through a permuted set from `(0..n)`. 34 | //! 35 | //! ```rust 36 | //! # use hashed_permutation::HashedIter; 37 | //! use std::num::NonZeroU32; 38 | //! 39 | //! // Loop from (0..10) in a shuffled set 40 | //! let mut iterator = HashedIter::new_with_seed(NonZeroU32::new(10).unwrap(), 100); 41 | //! 42 | //! for i in iterator { 43 | //! println!("{}", i); 44 | //! } 45 | //! ``` 46 | 47 | mod error; 48 | mod iterator; 49 | mod kensler; 50 | 51 | pub use error::{PermutationError, PermutationResult}; 52 | pub use iterator::HashedIter; 53 | pub use kensler::HashedPermutation; 54 | --------------------------------------------------------------------------------