├── .gitignore ├── .env ├── src ├── dedicated │ ├── mod.rs │ └── cs_u16.rs ├── traits │ ├── mod.rs │ └── radixsort.rs ├── types │ ├── mod.rs │ └── bool.rs ├── algo │ ├── mod.rs │ ├── k_way_merge_mt.rs │ └── k_way_merge.rs ├── tests │ ├── mod.rs │ ├── utils_mt.rs │ ├── dedicated.rs │ ├── speed_sort_ded.rs │ ├── comparative_sort.rs │ ├── test_helpers.rs │ ├── utils.rs │ ├── regions_graph.rs │ └── types.rs ├── sorts │ ├── mod.rs │ ├── utils_mt.rs │ ├── lsd_stable_sort.rs │ ├── boolean_sort.rs │ ├── american_flag_sort.rs │ ├── counting_sort.rs │ ├── msd_stable_sort.rs │ ├── lsd_sort.rs │ ├── msd_sort.rs │ ├── ska_sort.rs │ ├── comparative_sort.rs │ ├── voracious_sort.rs │ ├── rollercoaster_sort.rs │ ├── peeka_sort.rs │ └── dlsd_sort.rs └── generators │ ├── string.rs │ ├── mod.rs │ ├── signed_i128.rs │ ├── unsigned_u128.rs │ ├── boolean.rs │ ├── char.rs │ ├── signed_i16.rs │ ├── unsigned_u16.rs │ ├── signed_i8.rs │ ├── unsigned_u8.rs │ ├── float_32.rs │ └── signed_i32.rs ├── clippy.toml ├── results ├── benchmark_1_2_0 │ ├── computer_spec │ ├── benchmark_results_trait_mt_char │ ├── benchmark_results_trait_mt_i32 │ ├── benchmark_results_trait_u128 │ ├── benchmark_results_trait_i128 │ ├── benchmark_results_trait_mt_i64 │ ├── benchmark_results_trait_mt_f32 │ ├── benchmark_results_trait_mt_u32 │ ├── benchmark_results_trait_mt_u64 │ └── benchmark_results_trait_char └── benchmark_1_0_0 │ ├── regions_sort_ryzen_9_3950x_u32 │ ├── regions_sort_ryzen_9_3950x_u64 │ ├── peeka_sort_vs_regions_sort.md │ └── human_readable │ └── benchmark_1_0_0_ryzen_9_3950x_cmp_structf32 ├── rustfmt.toml ├── .github └── workflows │ └── rust.yml ├── Cargo.toml ├── LICENSE ├── Makefile ├── PROFILING.md ├── RELEASES.md └── Cargo.lock /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | export RUSTFLAGS="-C target-cpu=native" 2 | export RAYON_NUM_THREADS=16 3 | -------------------------------------------------------------------------------- /src/dedicated/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod cs_u16; 2 | pub mod lsd_f32; 3 | pub mod lsd_u32; 4 | -------------------------------------------------------------------------------- /src/traits/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod dispatcher; 2 | pub mod radix_key; 3 | pub mod radixable; 4 | pub mod radixsort; 5 | -------------------------------------------------------------------------------- /clippy.toml: -------------------------------------------------------------------------------- 1 | cognitive-complexity-threshold = 25 2 | type-complexity-threshold = 350 3 | too-many-arguments-threshold = 12 4 | -------------------------------------------------------------------------------- /src/types/mod.rs: -------------------------------------------------------------------------------- 1 | mod bool; 2 | mod char; 3 | pub mod custom; 4 | mod floats; 5 | mod isize; 6 | mod signed_integer; 7 | mod unsigned_integer; 8 | mod usize; 9 | -------------------------------------------------------------------------------- /src/algo/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod k_way_merge; 2 | // pub mod k_way_merge_mt; 3 | #[cfg(feature = "voracious_multithread")] pub mod regions_graph; 4 | pub mod verge_sort_heuristic; 5 | -------------------------------------------------------------------------------- /results/benchmark_1_2_0/computer_spec: -------------------------------------------------------------------------------- 1 | CPU: AMD Ryzen 3950x 16c/32t 2 | RAM: 3 | 2x16Go Gskill 3600 C16 16 19 19 39 4 | 2x32Go Corsair 3600 C18 18 22 22 41 5 | MB: Asus TUF X570-Plus AMD AM4 Ryzen 3000 6 | -------------------------------------------------------------------------------- /src/tests/mod.rs: -------------------------------------------------------------------------------- 1 | mod comparative_sort; 2 | mod dedicated; 3 | mod regions_graph; 4 | #[rustfmt::skip] mod sorts; 5 | mod types; 6 | mod utils; 7 | mod utils_mt; 8 | mod verge_sort_heuristic; 9 | 10 | // mod test_helpers; 11 | // #[rustfmt::skip] mod speed_sort; 12 | // #[rustfmt::skip] mod speed_sort_ded; 13 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | reorder_imports = true 2 | max_width = 80 3 | use_small_heuristics = "max" 4 | wrap_comments = false 5 | use_try_shorthand = true 6 | use_field_init_shorthand = true 7 | struct_lit_single_line = true 8 | reorder_modules = true 9 | overflow_delimited_expr = true 10 | normalize_doc_attributes = false 11 | match_block_trailing_comma = true 12 | inline_attribute_width = 80 13 | format_strings = true 14 | fn_single_line = true 15 | #fn_params_layout = "compressed" 16 | -------------------------------------------------------------------------------- /src/sorts/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod american_flag_sort; 2 | pub mod boolean_sort; 3 | pub mod comparative_sort; 4 | pub mod counting_sort; 5 | pub mod dlsd_sort; 6 | pub mod lsd_sort; 7 | pub mod lsd_stable_sort; 8 | pub mod msd_sort; 9 | pub mod msd_stable_sort; 10 | #[cfg(feature = "voracious_multithread")] pub mod peeka_sort; 11 | pub mod rollercoaster_sort; 12 | pub mod ska_sort; 13 | pub mod thiel_sort; 14 | pub mod utils; 15 | #[cfg(feature = "voracious_multithread")] pub mod utils_mt; 16 | pub mod voracious_sort; 17 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Build 20 | run: cargo build --release --features "voracious_multithread" --verbose 21 | - name: Run tests 22 | run: cargo test --release --features "voracious_multithread" --verbose 23 | -------------------------------------------------------------------------------- /src/generators/string.rs: -------------------------------------------------------------------------------- 1 | use rand::distributions::Alphanumeric; 2 | use rand::{thread_rng, Rng}; 3 | use rayon::prelude::*; 4 | 5 | pub fn helper_random_array_uniform_string( 6 | size: usize, 7 | string_size: usize, 8 | ) -> Vec { 9 | (0..size).into_par_iter().map(|_| 10 | thread_rng().sample_iter(&Alphanumeric) 11 | .take(string_size) 12 | .collect::() 13 | ).collect::>() 14 | } 15 | 16 | pub fn generators_string( 17 | ) -> Vec<(&'static dyn Fn(usize, usize) -> Vec, &'static str)> { 18 | vec![(&helper_random_array_uniform_string, "-- Unif :")] 19 | } 20 | -------------------------------------------------------------------------------- /src/types/bool.rs: -------------------------------------------------------------------------------- 1 | use super::super::sorts::boolean_sort::boolean_sort; 2 | use super::super::Radixable; 3 | 4 | impl Radixable for bool { 5 | type Key = bool; 6 | 7 | #[inline] 8 | fn key(&self) -> bool { *self } 9 | #[inline] 10 | fn to_generic(&self, value: usize) -> Self { value == 1 } 11 | fn voracious_sort(&self, arr: &mut [bool]) { boolean_sort(arr); } 12 | fn voracious_stable_sort(&self, arr: &mut [bool]) { boolean_sort(arr); } 13 | #[cfg(feature = "voracious_multithread")] 14 | fn voracious_mt_sort(&self, arr: &mut [bool], _thread_n: usize) { 15 | boolean_sort(arr); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/generators/mod.rs: -------------------------------------------------------------------------------- 1 | #[allow(dead_code)] pub mod boolean; 2 | #[allow(dead_code)] pub mod char; 3 | #[allow(dead_code)] pub mod custom; 4 | #[allow(dead_code)] pub mod float_32; 5 | #[allow(dead_code)] pub mod float_64; 6 | #[allow(dead_code)] pub mod signed_i128; 7 | #[allow(dead_code)] pub mod signed_i16; 8 | #[allow(dead_code)] pub mod signed_i32; 9 | #[allow(dead_code)] pub mod signed_i64; 10 | #[allow(dead_code)] pub mod signed_i8; 11 | // #[allow(dead_code)] pub mod string; 12 | #[allow(dead_code)] pub mod unsigned_u128; 13 | #[allow(dead_code)] pub mod unsigned_u16; 14 | #[allow(dead_code)] pub mod unsigned_u32; 15 | #[allow(dead_code)] pub mod unsigned_u64; 16 | #[allow(dead_code)] pub mod unsigned_u8; 17 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "voracious_radix_sort" 3 | version = "1.2.0" 4 | authors = ["Axelle Piot "] 5 | edition = "2018" 6 | description = "State of the art radix sort algorithms. Single thread and multi thread versions." 7 | license = "MIT" 8 | readme = "README.md" 9 | documentation = "https://docs.rs/voracious_radix_sort/" 10 | repository = "https://github.com/lakwet/voracious_sort" 11 | keywords = ["sort", "algorithms", "radixsort", "research"] 12 | categories = ["algorithms", "science", "concurrency"] 13 | 14 | [features] 15 | voracious_multithread = ["rayon"] 16 | 17 | [dependencies] 18 | rayon = { version = "1.7.0", optional = true } 19 | 20 | [dev-dependencies] 21 | rand = "0.7.3" 22 | rand_distr = "0.3.0" 23 | -------------------------------------------------------------------------------- /src/tests/utils_mt.rs: -------------------------------------------------------------------------------- 1 | use super::super::sorts::utils_mt::{aggregate_histograms, compute_offset_mt}; 2 | 3 | #[test] 4 | fn test_utils_mt_compute_offset_mt() { 5 | let mut arr: Vec = vec![0b0000_0111_0000_0000_0000_0000_0000_0000]; 6 | let (offset, raw_offset) = compute_offset_mt(&mut arr, 8); 7 | assert_eq!(offset, 0); 8 | assert_eq!(raw_offset, 5); 9 | } 10 | 11 | #[test] 12 | fn test_utils_mt_aggregate_histograms() { 13 | let h1 = vec![0, 1, 2, 3, 4]; 14 | let h2 = vec![0, 1, 2, 3, 4]; 15 | let h3 = vec![0, 1, 2, 3, 4]; 16 | let histograms = vec![h1, h2, h3]; 17 | 18 | let g = aggregate_histograms(&histograms); 19 | 20 | assert_eq!(g[0], 0); 21 | assert_eq!(g[1], 3); 22 | assert_eq!(g[2], 6); 23 | assert_eq!(g[3], 9); 24 | assert_eq!(g[4], 12); 25 | } 26 | -------------------------------------------------------------------------------- /src/tests/dedicated.rs: -------------------------------------------------------------------------------- 1 | use super::super::dedicated::cs_u16::cs_u16; 2 | use super::super::dedicated::lsd_f32::lsd_f32; 3 | use super::super::dedicated::lsd_u32::lsd_u32; 4 | 5 | use super::super::generators::float_32::*; 6 | use super::super::generators::unsigned_u16::*; 7 | use super::super::generators::unsigned_u32::*; 8 | 9 | use super::sorts::helper_sort; 10 | 11 | #[test] 12 | fn test_ded_sort_lsd_f32() { 13 | for size in [0, 1, 100_000].iter() { 14 | helper_sort(false, &|a| lsd_f32(a), generators_f32(), *size); 15 | } 16 | } 17 | 18 | #[test] 19 | fn test_ded_sort_lsd_u32() { 20 | for size in [0, 1, 100_000].iter() { 21 | helper_sort(false, &|a| lsd_u32(a), generators_u32(), *size); 22 | } 23 | } 24 | 25 | #[test] 26 | fn test_ded_sort_counting_sort_u16() { 27 | for size in [0, 1, 100_000].iter() { 28 | helper_sort(false, &|a| cs_u16(a), generators_u16(), *size); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/sorts/utils_mt.rs: -------------------------------------------------------------------------------- 1 | use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; 2 | 3 | use super::super::{RadixKey, Radixable}; 4 | use super::utils::offset_from_bits; 5 | 6 | pub fn compute_offset_mt, K: RadixKey>( 7 | arr: &mut [T], 8 | radix: usize, 9 | ) -> (usize, usize) { 10 | let dummy = arr[0]; 11 | let max = arr.par_iter().map(|item| item.into_key_type()).max().unwrap(); 12 | 13 | offset_from_bits( 14 | arr, 15 | max, 16 | radix, 17 | dummy.type_size(), 18 | dummy.default_key(), 19 | dummy.one(), 20 | ) 21 | } 22 | 23 | pub fn aggregate_histograms(histograms: &[Vec]) -> Vec { 24 | let mut global_histogram = vec![0; histograms[0].len()]; 25 | 26 | histograms.iter().for_each(|histogram| { 27 | histogram.iter().enumerate().for_each(|(i, v)| { 28 | global_histogram[i] += v; 29 | }); 30 | }); 31 | 32 | global_histogram 33 | } 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Axelle Piot 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | test: ## run test 2 | @cargo test --release --features "voracious_multithread" 3 | 4 | check: ## check code 5 | @cargo check --features "voracious_multithread" 6 | 7 | clean: ## clean build files 8 | @cargo clean 9 | 10 | lint: ## lint code 11 | @rustup component add clippy 12 | @cargo clippy -- -A clippy::comparison_chain -A clippy::unused_unit 13 | 14 | doc: ## build doc and open it in the browser 15 | @cargo doc --open 16 | 17 | fmt: ## format code 18 | @rustup component add rustfmt 19 | @rustup component add rustfmt --toolchain nightly 20 | @cargo +nightly fmt 21 | 22 | build-dev: ## build in dev mode 23 | @cargo build --features "voracious_multithread" 24 | 25 | build-dev-single: ## build in dev mode without multithread sort 26 | @cargo build 27 | 28 | build-release: ## build in release mode 29 | @cargo build --release --features "voracious_multithread" 30 | 31 | build-release-single: ## build in release mode without multithread sort 32 | @cargo build --release 33 | 34 | help: 35 | @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' 36 | 37 | .DEFAULT_GOAL := help 38 | .PHONY: test check clean lint doc fmt build-dev build-dev-single build-release build-release-single help 39 | -------------------------------------------------------------------------------- /src/sorts/lsd_stable_sort.rs: -------------------------------------------------------------------------------- 1 | use super::super::algo::k_way_merge::k_way_merge; 2 | use super::super::algo::verge_sort_heuristic::verge_sort_preprocessing; 3 | use super::super::{RadixKey, Radixable}; 4 | use super::lsd_sort::lsd_radixsort_aux; 5 | 6 | /// # LSD stable sort 7 | /// 8 | /// An implementation of the 9 | /// [LSD sort](https://en.wikipedia.org/wiki/Radix_sort) 10 | /// algorithm. 11 | /// 12 | /// Implementation has been deeply optimized: 13 | /// - Small preliminary check to skip prefix zero bits. 14 | /// - Use ping pong copy. 15 | /// - Use vectorization. 16 | /// - Compute histograms in one pass. 17 | /// - Check the number of non-empty buckets, if only one bucket is non-empty, 18 | /// then skip the `copy_by_histogram`. 19 | /// 20 | /// The Verge sort pre-processing heuristic is also added. 21 | /// 22 | /// This LSD stable sort is an out of place stable radix sort. 23 | pub fn lsd_stable_radixsort(arr: &mut [T], radix: usize) 24 | where 25 | T: Radixable, 26 | K: RadixKey, 27 | { 28 | if arr.len() <= 128 { 29 | arr.sort_by(|a, b| a.partial_cmp(b).unwrap()); 30 | return; 31 | } 32 | 33 | let mut separators = verge_sort_preprocessing(arr, radix, &|arr, radix| { 34 | if arr.len() <= 128 { 35 | arr.sort_by(|a, b| a.partial_cmp(b).unwrap()); 36 | } else { 37 | lsd_radixsort_aux(arr, radix, false, 0) 38 | } 39 | }); 40 | k_way_merge(arr, &mut separators); 41 | } 42 | -------------------------------------------------------------------------------- /src/traits/radixsort.rs: -------------------------------------------------------------------------------- 1 | use super::super::{RadixKey, Radixable}; 2 | 3 | pub trait RadixSort, K: RadixKey> { 4 | fn voracious_sort(&mut self); 5 | fn voracious_stable_sort(&mut self); 6 | #[cfg(feature = "voracious_multithread")] 7 | fn voracious_mt_sort(&mut self, thread_n: usize); 8 | } 9 | 10 | impl, K: RadixKey> RadixSort for [T] { 11 | fn voracious_sort(&mut self) { 12 | if !self.is_empty() { 13 | let dummy = self[0]; 14 | dummy.voracious_sort(self); 15 | } 16 | } 17 | fn voracious_stable_sort(&mut self) { 18 | if !self.is_empty() { 19 | let dummy = self[0]; 20 | dummy.voracious_stable_sort(self); 21 | } 22 | } 23 | #[cfg(feature = "voracious_multithread")] 24 | fn voracious_mt_sort(&mut self, thread_n: usize) { 25 | if !self.is_empty() { 26 | let dummy = self[0]; 27 | dummy.voracious_mt_sort(self, thread_n); 28 | } 29 | } 30 | } 31 | 32 | impl, K: RadixKey> RadixSort for Vec { 33 | fn voracious_sort(&mut self) { self.as_mut_slice().voracious_sort(); } 34 | fn voracious_stable_sort(&mut self) { 35 | self.as_mut_slice().voracious_sort(); 36 | } 37 | #[cfg(feature = "voracious_multithread")] 38 | fn voracious_mt_sort(&mut self, thread_n: usize) { 39 | self.as_mut_slice().voracious_mt_sort(thread_n); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/generators/signed_i128.rs: -------------------------------------------------------------------------------- 1 | use rand::{thread_rng, Rng}; 2 | use rayon::prelude::*; 3 | 4 | // Uniform 5 | pub fn helper_random_array_uniform_i128(size: usize) -> Vec { 6 | (0..size).into_par_iter().map(|_| thread_rng().gen()).collect::>() 7 | } 8 | 9 | // Small values 10 | pub fn helper_random_array_109_i128(size: usize) -> Vec { 11 | (0..size) 12 | .into_par_iter() 13 | .map(|_| thread_rng().gen_range(-1_000_000_000, 1_000_000_000)) 14 | .collect::>() 15 | } 16 | 17 | pub fn generators_i128( 18 | ) -> Vec<(&'static dyn Fn(usize) -> Vec, &'static str)> { 19 | vec![ 20 | (&helper_random_array_uniform_i128, "-- Unif :"), 21 | (&helper_random_array_109_i128, "-- +-10^9 :"), 22 | ] 23 | } 24 | 25 | #[cfg(target_pointer_width = "128")] 26 | pub fn generators_isize( 27 | ) -> Vec<(&'static dyn Fn(usize) -> Vec, &'static str)> { 28 | generators_i128() 29 | .into_iter() 30 | .map(|(gen, title)| { 31 | let new_gen = move |size: usize| -> Vec { 32 | unsafe { 33 | let arr = gen(size); 34 | std::mem::transmute::, Vec>(arr) 35 | } 36 | }; 37 | 38 | ( 39 | Box::leak(Box::new(new_gen)) 40 | as &'static dyn Fn(usize) -> Vec, 41 | title, 42 | ) 43 | }) 44 | .collect() 45 | } 46 | -------------------------------------------------------------------------------- /src/generators/unsigned_u128.rs: -------------------------------------------------------------------------------- 1 | use rand::{thread_rng, Rng}; 2 | use rayon::prelude::*; 3 | 4 | // Uniform 5 | pub fn helper_random_array_uniform_u128(size: usize) -> Vec { 6 | (0..size).into_par_iter().map(|_| thread_rng().gen()).collect::>() 7 | } 8 | 9 | // Uniform 10^9 10 | pub fn helper_random_array_uniform_10_9_u128(size: usize) -> Vec { 11 | (0..size) 12 | .into_par_iter() 13 | .map(|_| thread_rng().gen_range(0, 1_000_000_000)) 14 | .collect::>() 15 | } 16 | 17 | pub fn generators_u128( 18 | ) -> Vec<(&'static dyn Fn(usize) -> Vec, &'static str)> { 19 | vec![ 20 | (&helper_random_array_uniform_u128, "-- Unif :"), 21 | (&helper_random_array_uniform_10_9_u128, "-- Unif 10^9 :"), 22 | ] 23 | } 24 | 25 | #[cfg(target_pointer_width = "128")] 26 | pub fn generators_usize( 27 | ) -> Vec<(&'static dyn Fn(usize) -> Vec, &'static str)> { 28 | generators_u128() 29 | .into_iter() 30 | .map(|(gen, title)| { 31 | let new_gen = move |size: usize| -> Vec { 32 | unsafe { 33 | let arr = gen(size); 34 | std::mem::transmute::, Vec>(arr) 35 | } 36 | }; 37 | 38 | ( 39 | Box::leak(Box::new(new_gen)) 40 | as &'static dyn Fn(usize) -> Vec, 41 | title, 42 | ) 43 | }) 44 | .collect() 45 | } 46 | -------------------------------------------------------------------------------- /src/dedicated/cs_u16.rs: -------------------------------------------------------------------------------- 1 | const UNROLL_SIZE: usize = 4; 2 | 3 | pub fn cs_u16(arr: &mut [u16]) { 4 | let size = arr.len(); 5 | if size < 2 { 6 | return; 7 | } 8 | 9 | let mut histogram = vec![0; 65536]; 10 | 11 | let remainder = size % UNROLL_SIZE; 12 | let (arr_main, arr_remainder) = arr.split_at_mut(size - remainder); 13 | 14 | arr_main.chunks_exact(UNROLL_SIZE).for_each(|chunk| { 15 | histogram[chunk[0] as usize] += 1; 16 | histogram[chunk[1] as usize] += 1; 17 | histogram[chunk[2] as usize] += 1; 18 | histogram[chunk[3] as usize] += 1; 19 | }); 20 | arr_remainder.iter().for_each(|item| { 21 | histogram[*item as usize] += 1; 22 | }); 23 | 24 | let mut position = 0; 25 | histogram.iter().enumerate().for_each(|(value, count)| { 26 | if *count > 0 { 27 | let v = value as u16; 28 | let quotient = *count / 4; 29 | let remainder = count % 4; 30 | for _ in 0..quotient { 31 | unsafe { 32 | *arr.get_unchecked_mut(position) = v; 33 | *arr.get_unchecked_mut(position + 1) = v; 34 | *arr.get_unchecked_mut(position + 2) = v; 35 | *arr.get_unchecked_mut(position + 3) = v; 36 | position += 4; 37 | } 38 | } 39 | for _ in 0..remainder { 40 | unsafe { 41 | *arr.get_unchecked_mut(position) = v; 42 | position += 1; 43 | } 44 | } 45 | } 46 | }); 47 | } 48 | -------------------------------------------------------------------------------- /src/generators/boolean.rs: -------------------------------------------------------------------------------- 1 | use rand::{thread_rng, Rng}; 2 | use rayon::prelude::*; 3 | 4 | pub fn helper_random_array_bool_unif(size: usize) -> Vec { 5 | (0..size) 6 | .into_par_iter() 7 | .map(|_| { 8 | let value: u8 = thread_rng().gen_range(0, 2); 9 | value != 0 10 | }) 11 | .collect::>() 12 | } 13 | 14 | pub fn helper_random_array_bool_alt(size: usize) -> Vec { 15 | (0..size) 16 | .into_par_iter() 17 | .enumerate() 18 | .map(|(i, _)| i % 2 == 0) 19 | .collect::>() 20 | } 21 | 22 | pub fn helper_random_array_bool_true(size: usize) -> Vec { 23 | vec![true; size] 24 | } 25 | 26 | pub fn helper_random_array_bool_false(size: usize) -> Vec { 27 | vec![false; size] 28 | } 29 | 30 | pub fn helper_random_array_bool_pipe(size: usize) -> Vec { 31 | let mut trues = vec![false; size / 2]; 32 | let falses = vec![true; size - (size / 2)]; 33 | 34 | trues.extend(falses.iter()); 35 | trues 36 | } 37 | 38 | pub fn helper_random_array_bool_pipe_rev(size: usize) -> Vec { 39 | let trues = vec![false; size / 2]; 40 | let mut falses = vec![true; size - (size / 2)]; 41 | 42 | falses.extend(trues.iter()); 43 | falses 44 | } 45 | 46 | pub fn generators_bool( 47 | ) -> Vec<(&'static dyn Fn(usize) -> Vec, &'static str)> { 48 | vec![ 49 | (&helper_random_array_bool_unif, "-- Unif :"), 50 | (&helper_random_array_bool_alt, "-- Alt :"), 51 | (&helper_random_array_bool_true, "-- True only :"), 52 | (&helper_random_array_bool_false, "-- False only :"), 53 | (&helper_random_array_bool_pipe, "-- Pipe Organ :"), 54 | (&helper_random_array_bool_pipe_rev, "-- Pipe Rev :"), 55 | ] 56 | } 57 | -------------------------------------------------------------------------------- /src/tests/speed_sort_ded.rs: -------------------------------------------------------------------------------- 1 | use super::super::{Radixable, RadixKey}; 2 | 3 | use super::super::dedicated::lsd_f32::lsd_f32; 4 | use super::super::dedicated::lsd_u32::lsd_u32; 5 | use super::super::dedicated::cs_u16::cs_u16; 6 | 7 | use super::super::generators::float_32::*; 8 | use super::super::generators::unsigned_u16::*; 9 | use super::super::generators::unsigned_u32::*; 10 | 11 | use super::test_helpers::helper_sort_aux; 12 | 13 | fn speed_dedicated( 14 | name: &str, 15 | sort: &dyn Fn(&mut [T]) -> (), 16 | generators: Vec<(&dyn Fn(usize) -> Vec, &'static str)>, 17 | ) where 18 | T: Radixable + std::fmt::Debug, 19 | K: RadixKey, 20 | { 21 | let runs = 3; 22 | let with_check = true; 23 | 24 | let sizes: Vec = vec![ 25 | 100, 26 | 1000, 27 | 10000, 28 | 100_000, 29 | 1_000_000, 30 | 5_000_000, 31 | 10_000_000, 32 | 50_000_000, 33 | 100_000_000, 34 | 500_000_000, 35 | 1_000_000_000, 36 | ]; 37 | 38 | println!("Number of iterations: {}", runs); 39 | println!("=== Dedicated {} ===", name); 40 | for size in sizes.iter() { 41 | println!("Array size: {}", size); 42 | for (generator, gen_name) in generators.iter() { 43 | print!("{}", gen_name); 44 | helper_sort_aux(&|arr: &mut [T]| sort(arr), runs, *size, generator, with_check); 45 | println!(); 46 | } 47 | } 48 | } 49 | 50 | #[test] 51 | fn speed_dedicated_lsd_f32() { 52 | speed_dedicated("LSD f32", &lsd_f32, generators_f32()); 53 | } 54 | 55 | #[test] 56 | fn speed_dedicated_lsd_u32() { 57 | speed_dedicated("LSD u32", &lsd_u32, generators_u32()); 58 | } 59 | 60 | #[test] 61 | fn speed_dedicated_cs_u16() { 62 | speed_dedicated("CS u16", &cs_u16, generators_u16()); 63 | } 64 | -------------------------------------------------------------------------------- /results/benchmark_1_0_0/regions_sort_ryzen_9_3950x_u32: -------------------------------------------------------------------------------- 1 | workers = 32 2 | Test: u32 3 | Array size:1000000 range: 0 to 4294967295. Time: 15 ms. 4 | Array size:1000000 range: 0 to 1000000000. Time: 4 ms. 5 | Array size:5000000 range: 0 to 4294967295. Time: 17 ms. 6 | Array size:5000000 range: 0 to 1000000000. Time: 8 ms. 7 | Array size:10000000 range: 0 to 4294967295. Time: 20 ms. 8 | Array size:10000000 range: 0 to 1000000000. Time: 15 ms. 9 | Array size:20000000 range: 0 to 4294967295. Time: 30 ms. 10 | Array size:20000000 range: 0 to 1000000000. Time: 31 ms. 11 | Array size:50000000 range: 0 to 4294967295. Time: 63 ms. 12 | Array size:50000000 range: 0 to 1000000000. Time: 66 ms. 13 | Array size:100000000 range: 0 to 4294967295. Time: 143 ms. 14 | Array size:100000000 range: 0 to 1000000000. Time: 135 ms. 15 | Array size:200000000 range: 0 to 4294967295. Time: 266 ms. 16 | Array size:200000000 range: 0 to 1000000000. Time: 275 ms. 17 | Array size:300000000 range: 0 to 4294967295. Time: 402 ms. 18 | Array size:300000000 range: 0 to 1000000000. Time: 393 ms. 19 | Array size:400000000 range: 0 to 4294967295. Time: 556 ms. 20 | Array size:400000000 range: 0 to 1000000000. Time: 514 ms. 21 | Array size:500000000 range: 0 to 4294967295. Time: 711 ms. 22 | Array size:500000000 range: 0 to 1000000000. Time: 641 ms. 23 | Array size:600000000 range: 0 to 4294967295. Time: 870 ms. 24 | Array size:600000000 range: 0 to 1000000000. Time: 766 ms. 25 | Array size:700000000 range: 0 to 4294967295. Time: 1013 ms. 26 | Array size:700000000 range: 0 to 1000000000. Time: 893 ms. 27 | Array size:800000000 range: 0 to 4294967295. Time: 1124 ms. 28 | Array size:800000000 range: 0 to 1000000000. Time: 1018 ms. 29 | Array size:900000000 range: 0 to 4294967295. Time: 1290 ms. 30 | Array size:900000000 range: 0 to 1000000000. Time: 1141 ms. 31 | Array size:1000000000 range: 0 to 4294967295. Time: 1369 ms. 32 | Array size:1000000000 range: 0 to 1000000000. Time: 1281 ms. 33 | -------------------------------------------------------------------------------- /src/sorts/boolean_sort.rs: -------------------------------------------------------------------------------- 1 | // use super::super::{RadixKey, Radixable}; 2 | 3 | #[inline] 4 | fn boolean_sort_aux(arr: &mut [bool], shift: usize, count: usize, value: bool) { 5 | let quotient = count / 4; 6 | let remainder = count % 4; 7 | for q in 0..quotient { 8 | unsafe { 9 | let i = shift + (q * 4); 10 | *arr.get_unchecked_mut(i) = value; 11 | *arr.get_unchecked_mut(i + 1) = value; 12 | *arr.get_unchecked_mut(i + 2) = value; 13 | *arr.get_unchecked_mut(i + 3) = value; 14 | } 15 | } 16 | let offset = quotient * 4; 17 | for i in 0..remainder { 18 | unsafe { 19 | *arr.get_unchecked_mut(shift + offset + i) = value; 20 | } 21 | } 22 | } 23 | 24 | /// # Boolean sort 25 | /// 26 | /// A dedicated sort for boolean. 27 | pub fn boolean_sort(arr: &mut [bool]) { 28 | let mut count_false = 0; 29 | 30 | let quotient = arr.len() / 4; 31 | let remainder = arr.len() % 4; 32 | 33 | for q in 0..quotient { 34 | unsafe { 35 | let i = q * 4; 36 | let b0 = arr.get_unchecked(i); 37 | let b1 = arr.get_unchecked(i + 1); 38 | let b2 = arr.get_unchecked(i + 2); 39 | let b3 = arr.get_unchecked(i + 3); 40 | count_false += if !b0 { 1 } else { 0 }; 41 | count_false += if !b1 { 1 } else { 0 }; 42 | count_false += if !b2 { 1 } else { 0 }; 43 | count_false += if !b3 { 1 } else { 0 }; 44 | } 45 | } 46 | 47 | let offset = quotient * 4; 48 | for i in 0..remainder { 49 | unsafe { 50 | if !arr.get_unchecked(offset + i) { 51 | count_false += 1; 52 | } 53 | } 54 | } 55 | 56 | if count_false == arr.len() || count_false == 0 { 57 | return; 58 | } 59 | 60 | boolean_sort_aux(arr, 0, count_false, false); 61 | boolean_sort_aux(arr, count_false, arr.len() - count_false, true); 62 | } 63 | -------------------------------------------------------------------------------- /results/benchmark_1_0_0/regions_sort_ryzen_9_3950x_u64: -------------------------------------------------------------------------------- 1 | workers = 32 2 | Test: u64 3 | Array size:1000000 range: 0 to 18446744073709551615. Time: 15 ms. 4 | Array size:1000000 range: 0 to 1000000000. Time: 4 ms. 5 | Array size:5000000 range: 0 to 18446744073709551615. Time: 21 ms. 6 | Array size:5000000 range: 0 to 1000000000. Time: 9 ms. 7 | Array size:10000000 range: 0 to 18446744073709551615. Time: 30 ms. 8 | Array size:10000000 range: 0 to 1000000000. Time: 24 ms. 9 | Array size:20000000 range: 0 to 18446744073709551615. Time: 45 ms. 10 | Array size:20000000 range: 0 to 1000000000. Time: 48 ms. 11 | Array size:50000000 range: 0 to 18446744073709551615. Time: 133 ms. 12 | Array size:50000000 range: 0 to 1000000000. Time: 123 ms. 13 | Array size:100000000 range: 0 to 18446744073709551615. Time: 254 ms. 14 | Array size:100000000 range: 0 to 1000000000. Time: 246 ms. 15 | Array size:200000000 range: 0 to 18446744073709551615. Time: 514 ms. 16 | Array size:200000000 range: 0 to 1000000000. Time: 498 ms. 17 | Array size:300000000 range: 0 to 18446744073709551615. Time: 769 ms. 18 | Array size:300000000 range: 0 to 1000000000. Time: 749 ms. 19 | Array size:400000000 range: 0 to 18446744073709551615. Time: 1013 ms. 20 | Array size:400000000 range: 0 to 1000000000. Time: 996 ms. 21 | Array size:500000000 range: 0 to 18446744073709551615. Time: 1278 ms. 22 | Array size:500000000 range: 0 to 1000000000. Time: 1238 ms. 23 | Array size:600000000 range: 0 to 18446744073709551615. Time: 1536 ms. 24 | Array size:600000000 range: 0 to 1000000000. Time: 1479 ms. 25 | Array size:700000000 range: 0 to 18446744073709551615. Time: 1796 ms. 26 | Array size:700000000 range: 0 to 1000000000. Time: 1722 ms. 27 | Array size:800000000 range: 0 to 18446744073709551615. Time: 2063 ms. 28 | Array size:800000000 range: 0 to 1000000000. Time: 1960 ms. 29 | Array size:900000000 range: 0 to 18446744073709551615. Time: 2325 ms. 30 | Array size:900000000 range: 0 to 1000000000. Time: 2192 ms. 31 | Array size:1000000000 range: 0 to 18446744073709551615. Time: 2585 ms. 32 | Array size:1000000000 range: 0 to 1000000000. Time: 2422 ms. 33 | -------------------------------------------------------------------------------- /src/tests/comparative_sort.rs: -------------------------------------------------------------------------------- 1 | use super::super::sorts::comparative_sort::insertion_sort_try; 2 | use super::super::sorts::utils::Params; 3 | 4 | #[test] 5 | fn test_comparative_sort_insertion_sort_try() { 6 | let p = Params::new(0, 8, 0, 1); // level, radix, offset, max_level 7 | let mut arr: Vec = vec![ 8 | 512, 9 | 1024, 10 | 1024, 11 | 1024, 12 | 1024, 13 | 1024 + 74, 14 | 1024 + 73, 15 | 1024 + 72, 16 | 1024 + 71, 17 | 1024 + 70, 18 | 1024 + 69, 19 | 1024 + 68, 20 | 1024 + 67, 21 | 1024 + 66, 22 | 1024 + 65, 23 | 1024 + 64, 24 | 1024 + 62, 25 | 1024 + 61, 26 | 1024 + 60, 27 | 1024 + 59, 28 | 1024 + 58, 29 | 1024 + 57, 30 | 1024 + 56, 31 | 1024 + 55, 32 | 1024 + 54, 33 | 1024 + 53, 34 | 1024 + 52, 35 | 1024 + 51, 36 | 1024 + 50, 37 | 1024 + 49, 38 | 1024 + 48, 39 | 1024 + 47, 40 | 1024 + 46, 41 | 1024 + 45, 42 | 1024 + 44, 43 | 1024 + 43, 44 | 1024 + 42, 45 | 1024 + 41, 46 | 1024 + 40, 47 | 1024 + 39, 48 | 1024 + 38, 49 | 1024 + 37, 50 | 1024 + 36, 51 | 1024 + 35, 52 | 1024 + 34, 53 | 1024 + 33, 54 | 1024 + 32, 55 | 1024 + 31, 56 | 1024 + 30, 57 | 1024 + 29, 58 | 1024 + 28, 59 | 1024 + 27, 60 | 1024 + 26, 61 | 1024 + 25, 62 | 1024 + 24, 63 | 1024 + 23, 64 | 1024 + 22, 65 | 1024 + 21, 66 | 1024 + 20, 67 | 1024 + 19, 68 | 1024 + 18, 69 | 1024 + 17, 70 | 1024 + 16, 71 | 1024 + 15, 72 | 1024 + 14, 73 | 1024 + 13, 74 | 1024 + 12, 75 | 1024 + 11, 76 | 1024 + 10, 77 | 1024 + 9, 78 | 1024 + 8, 79 | 1024 + 7, 80 | 1024 + 6, 81 | 1024 + 5, 82 | 1024 + 4, 83 | 1024 + 3, 84 | 1024 + 2, 85 | 1024 + 1, 86 | 1024, 87 | 2048, 88 | ]; 89 | 90 | let unsorted_parts = insertion_sort_try(&mut arr, &p); 91 | assert_eq!(unsorted_parts[0].0, 1); 92 | assert_eq!(unsorted_parts[0].1, 79); 93 | } 94 | -------------------------------------------------------------------------------- /src/tests/test_helpers.rs: -------------------------------------------------------------------------------- 1 | use rayon::prelude::*; 2 | 3 | use std::time::Instant; 4 | 5 | use super::super::{RadixKey, Radixable}; 6 | 7 | #[allow(dead_code)] 8 | pub fn std_deviation(data: &Vec, mean: u64, size: usize) -> f32 { 9 | let variance = data 10 | .iter() 11 | .map(|value| { 12 | let diff = 13 | if mean >= *value { mean - *value } else { *value - mean }; 14 | 15 | (diff * diff) as f64 16 | }) 17 | .sum::() 18 | / size as f64; 19 | 20 | variance.sqrt() as f32 21 | } 22 | 23 | #[allow(dead_code)] 24 | pub fn helper_sort_aux( 25 | sort: &dyn Fn(&mut [T]) -> (), 26 | runs: usize, 27 | size: usize, 28 | generator: &dyn Fn(usize) -> Vec, 29 | with_check: bool, 30 | ) where 31 | T: Radixable + std::fmt::Debug, 32 | K: RadixKey, 33 | { 34 | let mut nanos: Vec = Vec::with_capacity(runs); 35 | 36 | for _ in 0..runs { 37 | if with_check { 38 | let mut array = generator(size); 39 | let mut check = array.to_vec(); 40 | 41 | let start = Instant::now(); 42 | sort(&mut array); 43 | let ns: u64 = start.elapsed().as_nanos() as u64; 44 | nanos.push(ns); 45 | 46 | check.par_sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); 47 | assert_eq!(check, array); 48 | } else { 49 | let mut array = generator(size); 50 | 51 | let start = Instant::now(); 52 | sort(&mut array); 53 | let ns: u64 = start.elapsed().as_nanos() as u64; 54 | nanos.push(ns); 55 | } 56 | } 57 | 58 | // if nanos.len() > 1 { 59 | // nanos.remove(0); 60 | // } 61 | 62 | let sum: u64 = nanos.iter().sum(); 63 | let mean: u64 = if runs > 1 { sum / (runs as u64 - 1) } else { sum as u64 }; 64 | let std_dev: f32 = std_deviation(&nanos, mean, size); 65 | let per_item: f32 = (mean as f64 / size as f64) as f32; 66 | 67 | // \u{1b} => escape for terminal 68 | // 0 => no color 69 | // 0;30 => gray 70 | // 0;31 => red 71 | // 1;31 => red 72 | // 0;32 => green 73 | // 0;33 => brown 74 | // 0;34 => blue 75 | // 1;34 => light blue 76 | // 0;37 => light gray 77 | 78 | // print time, standard deviation and time per item 79 | print!( 80 | "\u{1b}[0;32m{}us\u{1b}[0m\t\u{1b}[1;31m{:.0}ns\u{1b}[0m\t(\u{1b}[0;\ 81 | 33m{:.2}ns\u{1b}[0m)\t", 82 | mean / 1000, 83 | std_dev, 84 | per_item 85 | ); 86 | } 87 | -------------------------------------------------------------------------------- /src/generators/char.rs: -------------------------------------------------------------------------------- 1 | use rand::{thread_rng, Rng}; 2 | use rayon::prelude::*; 3 | 4 | fn get_charset() -> Vec { 5 | vec![ 6 | 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 7 | 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', 8 | '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 9 | 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 10 | 'U', 'V', 'W', 'X', 'Y', 'Z', 11 | ] 12 | } 13 | 14 | pub fn helper_random_array_uniform_char(size: usize) -> Vec { 15 | (0..size) 16 | .into_par_iter() 17 | .map(|_| thread_rng().gen::()) 18 | .collect::>() 19 | } 20 | 21 | pub fn helper_random_array_equal_char(size: usize) -> Vec { 22 | vec![thread_rng().gen::(); size] 23 | } 24 | 25 | pub fn helper_random_array_charset_char(size: usize) -> Vec { 26 | let charset = get_charset(); 27 | (0..size) 28 | .into_par_iter() 29 | .map(|_| { 30 | let index: usize = thread_rng().gen_range(0, charset.len()); 31 | charset[index] 32 | }) 33 | .collect::>() 34 | } 35 | 36 | pub fn helper_random_array_charset_den_char(size: usize) -> Vec { 37 | let mut charset = get_charset(); 38 | charset.push(std::char::from_u32(0x00000db4).unwrap()); 39 | (0..size) 40 | .into_par_iter() 41 | .map(|_| { 42 | let index: usize = thread_rng().gen_range(0, charset.len()); 43 | charset[index] 44 | }) 45 | .collect::>() 46 | } 47 | 48 | pub fn helper_random_array_charset_vden_char(size: usize) -> Vec { 49 | let mut charset = get_charset(); 50 | for i in 0..11 { 51 | let char_u32 = std::char::MAX as u32 - i as u32; 52 | charset.push(std::char::from_u32(char_u32).unwrap()); 53 | } 54 | (0..size) 55 | .into_par_iter() 56 | .map(|_| { 57 | let index: usize = thread_rng().gen_range(0, charset.len()); 58 | charset[index] 59 | }) 60 | .collect::>() 61 | } 62 | 63 | pub fn generators_char( 64 | ) -> Vec<(&'static dyn Fn(usize) -> Vec, &'static str)> { 65 | vec![ 66 | (&helper_random_array_uniform_char, "-- Unif :"), 67 | (&helper_random_array_equal_char, "-- Equal :"), 68 | (&helper_random_array_charset_char, "-- Charset :"), 69 | (&helper_random_array_charset_den_char, "-- Charset Den:"), 70 | (&helper_random_array_charset_vden_char, "-- Charset VDe:"), 71 | ] 72 | } 73 | -------------------------------------------------------------------------------- /src/sorts/american_flag_sort.rs: -------------------------------------------------------------------------------- 1 | use super::super::{RadixKey, Radixable}; 2 | use super::comparative_sort::insertion_sort; 3 | use super::utils::{get_histogram, prefix_sums, Params}; 4 | 5 | fn serial_swap, K: RadixKey>( 6 | arr: &mut [T], 7 | heads: &mut Vec, 8 | tails: &[usize], 9 | p: &Params, 10 | mask: <>::Key as RadixKey>::Key, 11 | shift: usize, 12 | ) { 13 | for i in 0..(p.radix_range) - 1 { 14 | while heads[i] < tails[i] { 15 | unsafe { 16 | let mut bucket = 17 | arr.get_unchecked(heads[i]).extract(mask, shift); 18 | while bucket != i { 19 | arr.swap(heads[i], heads[bucket]); 20 | heads[bucket] += 1; 21 | bucket = arr.get_unchecked(heads[i]).extract(mask, shift); 22 | } 23 | heads[i] += 1; 24 | } 25 | } 26 | } 27 | } 28 | 29 | pub fn serial_radixsort_rec, K: RadixKey>( 30 | arr: &mut [T], 31 | p: Params, 32 | ) { 33 | if arr.len() <= 64 { 34 | insertion_sort(arr); 35 | return; 36 | } 37 | 38 | let dummy = arr[0]; 39 | let (mask, shift) = dummy.get_mask_and_shift_from_left(&p); 40 | let histogram = get_histogram(arr, &p, mask, shift); 41 | let (p_sums, mut heads, tails) = prefix_sums(&histogram); 42 | 43 | serial_swap(arr, &mut heads, &tails, &p, mask, shift); 44 | 45 | let mut rest = arr; 46 | if p.level < p.max_level - 1 { 47 | for i in 0..(p.radix_range) { 48 | let bucket_end = p_sums[i + 1] - p_sums[i]; 49 | let (first_part, second_part) = rest.split_at_mut(bucket_end); 50 | rest = second_part; 51 | if histogram[i] > 1 { 52 | let new_params = p.new_level(p.level + 1); 53 | serial_radixsort_rec(first_part, new_params); 54 | } 55 | } 56 | } 57 | } 58 | 59 | /// # American flag sort 60 | /// 61 | /// An implementation of the famous 62 | /// [American flag sort](https://en.wikipedia.org/wiki/American_flag_sort) 63 | /// algorithm. 64 | /// 65 | /// This algorithm is used as a fallback in the Ska sort. 66 | /// 67 | /// The American flag sort is an in place unstable radix sort. 68 | pub fn american_flag_sort, K: RadixKey>( 69 | arr: &mut [T], 70 | radix: usize, 71 | ) { 72 | if arr.len() <= 64 { 73 | insertion_sort(arr); 74 | return; 75 | } 76 | 77 | let dummy = arr[0]; 78 | let (_, raw_offset) = dummy.compute_offset(arr, radix); 79 | let max_level = dummy.compute_max_level(raw_offset, radix); 80 | 81 | if max_level == 0 { 82 | return; 83 | } 84 | 85 | let params = Params::new(0, radix, raw_offset, max_level); 86 | serial_radixsort_rec(arr, params); 87 | } 88 | -------------------------------------------------------------------------------- /src/sorts/counting_sort.rs: -------------------------------------------------------------------------------- 1 | use super::super::{RadixKey, Radixable}; 2 | use super::utils::Params; 3 | 4 | fn counting_sort_aux(arr: &mut [T], p: Params) 5 | where 6 | T: Radixable, 7 | K: RadixKey, 8 | { 9 | let dummy = arr[0]; 10 | let mut histogram = vec![0; p.radix_range]; 11 | let mask = dummy.default_mask(p.radix); 12 | 13 | let quotient = arr.len() / 4; 14 | let remainder = arr.len() % 4; 15 | for q in 0..quotient { 16 | unsafe { 17 | let i = q * 4; 18 | let bucket0 = arr.get_unchecked(i).extract(mask, 0); 19 | let bucket1 = arr.get_unchecked(i + 1).extract(mask, 0); 20 | let bucket2 = arr.get_unchecked(i + 2).extract(mask, 0); 21 | let bucket3 = arr.get_unchecked(i + 3).extract(mask, 0); 22 | histogram[bucket0] += 1; 23 | histogram[bucket1] += 1; 24 | histogram[bucket2] += 1; 25 | histogram[bucket3] += 1; 26 | } 27 | } 28 | let offset = quotient * 4; 29 | for i in 0..remainder { 30 | unsafe { 31 | let bucket = arr.get_unchecked(offset + i).extract(mask, 0); 32 | histogram[bucket] += 1; 33 | } 34 | } 35 | 36 | let dummy = arr[0]; 37 | 38 | let mut position = 0; 39 | histogram.iter().enumerate().for_each(|(value, count)| { 40 | if *count > 0 { 41 | let quotient = *count / 4; 42 | let remainder = count % 4; 43 | let v = dummy.to_generic(value); 44 | for _ in 0..quotient { 45 | unsafe { 46 | *arr.get_unchecked_mut(position) = v; 47 | *arr.get_unchecked_mut(position + 1) = v; 48 | *arr.get_unchecked_mut(position + 2) = v; 49 | *arr.get_unchecked_mut(position + 3) = v; 50 | } 51 | position += 4; 52 | } 53 | for _ in 0..remainder { 54 | unsafe { 55 | *arr.get_unchecked_mut(position) = v; 56 | position += 1; 57 | } 58 | } 59 | } 60 | }); 61 | } 62 | 63 | /// # Counting sort 64 | /// 65 | /// An implementation of the 66 | /// [Counting sort](https://en.wikipedia.org/wiki/Counting_sort) 67 | /// algorithm. 68 | /// 69 | /// Counting sort is very fast for inputs with a small bit representation. 70 | /// 71 | /// This Counting sort has been a bit customized since it takes a radix input. 72 | pub fn counting_sort(arr: &mut [T], radix: usize) 73 | where 74 | T: Radixable, 75 | K: RadixKey, 76 | { 77 | if arr.len() < 2 { 78 | return; 79 | } 80 | 81 | let offset = 0; 82 | let level = 0; 83 | let max_level = 1; 84 | let params = Params::new(level, radix, offset, max_level); 85 | 86 | counting_sort_aux(arr, params); 87 | } 88 | -------------------------------------------------------------------------------- /src/sorts/msd_stable_sort.rs: -------------------------------------------------------------------------------- 1 | use super::super::algo::k_way_merge::k_way_merge; 2 | use super::super::algo::verge_sort_heuristic::verge_sort_preprocessing; 3 | use super::super::{RadixKey, Radixable}; 4 | use super::msd_sort::copy_by_histogram; 5 | use super::utils::{get_histogram, prefix_sums, Params}; 6 | 7 | const FALLBACK_THRESHOLD: usize = 128; 8 | 9 | fn msd_stable_radixsort_rec, K: RadixKey>( 10 | arr: &mut [T], 11 | p: Params, 12 | ) { 13 | if arr.len() <= FALLBACK_THRESHOLD { 14 | arr.sort_by(|a, b| a.partial_cmp(b).unwrap()); 15 | return; 16 | } 17 | 18 | let dummy = arr[0]; 19 | let (mask, shift) = dummy.get_mask_and_shift_from_left(&p); 20 | let histogram = get_histogram(arr, &p, mask, shift); 21 | let (p_sums, mut heads, _) = prefix_sums(&histogram); 22 | 23 | let mut buffer = arr.to_vec(); 24 | 25 | copy_by_histogram(arr.len(), &mut buffer, arr, &mut heads, mask, shift); 26 | 27 | let mut rest = arr; 28 | if p.level < p.max_level - 1 { 29 | for i in 0..(p.radix_range) { 30 | let bucket_end = p_sums[i + 1] - p_sums[i]; 31 | let (first_part, second_part) = rest.split_at_mut(bucket_end); 32 | rest = second_part; 33 | if histogram[i] > 1 { 34 | let new_params = p.new_level(p.level + 1); 35 | msd_stable_radixsort_rec(first_part, new_params); 36 | } 37 | } 38 | } 39 | } 40 | 41 | fn msd_stable_radixsort_aux, K: RadixKey>( 42 | arr: &mut [T], 43 | radix: usize, 44 | ) { 45 | if arr.len() <= FALLBACK_THRESHOLD { 46 | arr.sort_by(|a, b| a.partial_cmp(b).unwrap()); 47 | return; 48 | } 49 | 50 | let dummy = arr[0]; 51 | let (_, raw_offset) = dummy.compute_offset(arr, radix); 52 | let max_level = dummy.compute_max_level(raw_offset, radix); 53 | 54 | if max_level == 0 { 55 | return; 56 | } 57 | 58 | let params = Params::new(0, radix, raw_offset, max_level); 59 | 60 | msd_stable_radixsort_rec(arr, params); 61 | } 62 | 63 | /// # MSD stable sort 64 | /// 65 | /// An implementation of the 66 | /// [MSD sort](https://en.wikipedia.org/wiki/Radix_sort) 67 | /// algorithm. 68 | /// 69 | /// Implementation has been deeply optimized: 70 | /// - Small preliminary check to skip prefix zero bits. 71 | /// - Use vectorization. 72 | /// 73 | /// We choose to use an out of place implementation to have a fast radix sort 74 | /// for small input. 75 | /// 76 | /// The Verge sort pre-processing heuristic is also added. 77 | /// 78 | /// This MSD sort is an out of place stable radix sort. 79 | pub fn msd_stable_radixsort, K: RadixKey>( 80 | arr: &mut [T], 81 | radix: usize, 82 | ) { 83 | if arr.len() <= FALLBACK_THRESHOLD { 84 | arr.sort_by(|a, b| a.partial_cmp(b).unwrap()); 85 | return; 86 | } 87 | 88 | let mut separators = 89 | verge_sort_preprocessing(arr, radix, &msd_stable_radixsort_aux); 90 | k_way_merge(arr, &mut separators); 91 | } 92 | -------------------------------------------------------------------------------- /src/generators/signed_i16.rs: -------------------------------------------------------------------------------- 1 | use rand::seq::SliceRandom; 2 | use rand::{thread_rng, Rng}; 3 | use rayon::prelude::*; 4 | 5 | // Uniform 6 | pub fn helper_random_array_uniform_i16(size: usize) -> Vec { 7 | (0..size) 8 | .into_par_iter() 9 | .map(|_| thread_rng().gen::()) 10 | .collect::>() 11 | } 12 | 13 | // Small 14 | pub fn helper_random_array_small_i16(size: usize) -> Vec { 15 | (0..size) 16 | .into_par_iter() 17 | .map(|_| thread_rng().gen_range(-128, 127)) 18 | .collect::>() 19 | } 20 | 21 | // Ascending sawtooth 22 | pub fn helper_random_array_asc_sawtooth_i16(size: usize) -> Vec { 23 | (0..size).into_par_iter().map(|i| i as i16).collect::>() 24 | } 25 | 26 | // Descending sawtooth 27 | pub fn helper_random_array_desc_sawtooth_i16(size: usize) -> Vec { 28 | (0..size) 29 | .into_par_iter() 30 | .map(|i| (size - 1 - i) as i16) 31 | .collect::>() 32 | } 33 | 34 | // All equals 35 | pub fn helper_random_array_allequals_i16(size: usize) -> Vec { 36 | vec![thread_rng().gen(); size] 37 | } 38 | 39 | // Zipf 40 | pub fn helper_random_array_zipf_i16(size: usize) -> Vec { 41 | let mut array: Vec = Vec::with_capacity(size); 42 | let mut rng = thread_rng(); 43 | let mut quantity = size / 2; 44 | let mut i = 0; 45 | let mut value: i16 = 0; 46 | while quantity > 2 { 47 | value = rng.gen(); 48 | 49 | for _ in 0..quantity { 50 | array.push(value); 51 | i += 1; 52 | } 53 | 54 | quantity = quantity / 2; 55 | } 56 | while i < size { 57 | array.push(value); 58 | i += 1; 59 | } 60 | 61 | array.as_mut_slice().shuffle(&mut rng); 62 | 63 | array 64 | } 65 | 66 | pub fn generators_i16( 67 | ) -> Vec<(&'static dyn Fn(usize) -> Vec, &'static str)> { 68 | vec![ 69 | (&helper_random_array_uniform_i16, "-- Unif :"), 70 | (&helper_random_array_small_i16, "-- Small :"), 71 | (&helper_random_array_asc_sawtooth_i16, "-- Asc Saw :"), 72 | (&helper_random_array_desc_sawtooth_i16, "-- Desc Saw :"), 73 | (&helper_random_array_allequals_i16, "-- Equal :"), 74 | (&helper_random_array_zipf_i16, "-- Zipf :"), 75 | ] 76 | } 77 | 78 | #[cfg(target_pointer_width = "16")] 79 | pub fn generators_isize( 80 | ) -> Vec<(&'static dyn Fn(usize) -> Vec, &'static str)> { 81 | generators_i16() 82 | .into_iter() 83 | .map(|(gen, title)| { 84 | let new_gen = move |size: usize| -> Vec { 85 | unsafe { 86 | let arr = gen(size); 87 | std::mem::transmute::, Vec>(arr) 88 | } 89 | }; 90 | 91 | ( 92 | Box::leak(Box::new(new_gen)) 93 | as &'static dyn Fn(usize) -> Vec, 94 | title, 95 | ) 96 | }) 97 | .collect() 98 | } 99 | -------------------------------------------------------------------------------- /src/generators/unsigned_u16.rs: -------------------------------------------------------------------------------- 1 | use rand::seq::SliceRandom; 2 | use rand::{thread_rng, Rng}; 3 | use rayon::prelude::*; 4 | 5 | // Uniform 6 | pub fn helper_random_array_uniform_u16(size: usize) -> Vec { 7 | (0..size) 8 | .into_par_iter() 9 | .map(|_| thread_rng().gen::()) 10 | .collect::>() 11 | } 12 | 13 | // Small 14 | pub fn helper_random_array_small_u16(size: usize) -> Vec { 15 | (0..size) 16 | .into_par_iter() 17 | .map(|_| thread_rng().gen_range(0, 255)) 18 | .collect::>() 19 | } 20 | 21 | // Ascending sawtooth 22 | pub fn helper_random_array_asc_sawtooth_u16(size: usize) -> Vec { 23 | (0..size).into_par_iter().map(|i| i as u16).collect::>() 24 | } 25 | 26 | // Descending sawtooth 27 | pub fn helper_random_array_desc_sawtooth_u16(size: usize) -> Vec { 28 | (0..size) 29 | .into_par_iter() 30 | .map(|i| (size - 1 - i) as u16) 31 | .collect::>() 32 | } 33 | 34 | // All equals 35 | pub fn helper_random_array_allequals_u16(size: usize) -> Vec { 36 | vec![thread_rng().gen(); size] 37 | } 38 | 39 | // Zipf 40 | pub fn helper_random_array_zipf_u16(size: usize) -> Vec { 41 | let mut array: Vec = Vec::with_capacity(size); 42 | let mut rng = thread_rng(); 43 | let mut quantity = size / 2; 44 | let mut i = 0; 45 | let mut value: u16 = 0; 46 | while quantity > 2 { 47 | value = rng.gen(); 48 | 49 | for _ in 0..quantity { 50 | array.push(value); 51 | i += 1; 52 | } 53 | 54 | quantity = quantity / 2; 55 | } 56 | while i < size { 57 | array.push(value); 58 | i += 1; 59 | } 60 | 61 | array.as_mut_slice().shuffle(&mut rng); 62 | 63 | array 64 | } 65 | 66 | pub fn generators_u16( 67 | ) -> Vec<(&'static dyn Fn(usize) -> Vec, &'static str)> { 68 | vec![ 69 | (&helper_random_array_uniform_u16, "-- Unif :"), 70 | (&helper_random_array_small_u16, "-- Small :"), 71 | (&helper_random_array_asc_sawtooth_u16, "-- Asc Saw :"), 72 | (&helper_random_array_desc_sawtooth_u16, "-- Desc Saw :"), 73 | (&helper_random_array_allequals_u16, "-- Equal :"), 74 | (&helper_random_array_zipf_u16, "-- Zipf :"), 75 | ] 76 | } 77 | 78 | #[cfg(target_pointer_width = "16")] 79 | pub fn generators_usize( 80 | ) -> Vec<(&'static dyn Fn(usize) -> Vec, &'static str)> { 81 | generators_u16() 82 | .into_iter() 83 | .map(|(gen, title)| { 84 | let new_gen = move |size: usize| -> Vec { 85 | unsafe { 86 | let arr = gen(size); 87 | std::mem::transmute::, Vec>(arr) 88 | } 89 | }; 90 | 91 | ( 92 | Box::leak(Box::new(new_gen)) 93 | as &'static dyn Fn(usize) -> Vec, 94 | title, 95 | ) 96 | }) 97 | .collect() 98 | } 99 | -------------------------------------------------------------------------------- /src/generators/signed_i8.rs: -------------------------------------------------------------------------------- 1 | use rand::seq::SliceRandom; 2 | use rand::{thread_rng, Rng}; 3 | use rand_distr::{Distribution, Normal}; 4 | use rayon::prelude::*; 5 | 6 | // Uniform 7 | pub fn helper_random_array_uniform_i8(size: usize) -> Vec { 8 | (0..size) 9 | .into_par_iter() 10 | .map(|_| thread_rng().gen::()) 11 | .collect::>() 12 | } 13 | 14 | // Ascending sawtooth 15 | pub fn helper_random_array_asc_sawtooth_i8(size: usize) -> Vec { 16 | (0..size).into_par_iter().map(|i| i as i8).collect::>() 17 | } 18 | 19 | // Ascending sawtooth 20 | pub fn helper_random_array_desc_sawtooth_i8(size: usize) -> Vec { 21 | (0..size).into_par_iter().map(|i| (size - 1 - i) as i8).collect::>() 22 | } 23 | 24 | // All equals 25 | pub fn helper_random_array_allequals_i8(size: usize) -> Vec { 26 | vec![thread_rng().gen(); size] 27 | } 28 | 29 | // Zipf 30 | pub fn helper_random_array_zipf_i8(size: usize) -> Vec { 31 | let mut array: Vec = Vec::with_capacity(size); 32 | let mut rng = thread_rng(); 33 | let mut quantity = size / 2; 34 | let mut i = 0; 35 | let mut value: i8 = 0; 36 | while quantity > 2 { 37 | value = rng.gen(); 38 | 39 | for _ in 0..quantity { 40 | array.push(value); 41 | i += 1; 42 | } 43 | 44 | quantity = quantity / 2; 45 | } 46 | while i < size { 47 | array.push(value); 48 | i += 1; 49 | } 50 | 51 | array.as_mut_slice().shuffle(&mut rng); 52 | 53 | array 54 | } 55 | 56 | // Normale(0, 2^10) 57 | pub fn helper_random_array_normale_10_i8(size: usize) -> Vec { 58 | let normal = Normal::new(0.0, 1024.0).unwrap(); 59 | (0..size) 60 | .into_par_iter() 61 | .map(|_| normal.sample(&mut thread_rng()) as i8) 62 | .collect::>() 63 | } 64 | 65 | pub fn generators_i8() -> Vec<(&'static dyn Fn(usize) -> Vec, &'static str)> 66 | { 67 | vec![ 68 | (&helper_random_array_uniform_i8, "-- Unif :"), 69 | (&helper_random_array_asc_sawtooth_i8, "-- Asc Saw :"), 70 | (&helper_random_array_desc_sawtooth_i8, "-- Desc Saw :"), 71 | (&helper_random_array_allequals_i8, "-- Equal :"), 72 | (&helper_random_array_zipf_i8, "-- Zipf :"), 73 | (&helper_random_array_normale_10_i8, "-- Normale 10 :"), 74 | ] 75 | } 76 | 77 | #[cfg(target_pointer_width = "8")] 78 | pub fn generators_isize( 79 | ) -> Vec<(&'static dyn Fn(usize) -> Vec, &'static str)> { 80 | generators_i8() 81 | .into_iter() 82 | .map(|(gen, title)| { 83 | let new_gen = move |size: usize| -> Vec { 84 | unsafe { 85 | let arr = gen(size); 86 | std::mem::transmute::, Vec>(arr) 87 | } 88 | }; 89 | 90 | ( 91 | Box::leak(Box::new(new_gen)) 92 | as &'static dyn Fn(usize) -> Vec, 93 | title, 94 | ) 95 | }) 96 | .collect() 97 | } 98 | -------------------------------------------------------------------------------- /src/generators/unsigned_u8.rs: -------------------------------------------------------------------------------- 1 | use rand::seq::SliceRandom; 2 | use rand::{thread_rng, Rng}; 3 | use rand_distr::{Distribution, Normal}; 4 | use rayon::prelude::*; 5 | 6 | // Uniform 7 | pub fn helper_random_array_uniform_u8(size: usize) -> Vec { 8 | (0..size) 9 | .into_par_iter() 10 | .map(|_| thread_rng().gen::()) 11 | .collect::>() 12 | } 13 | 14 | // Ascending sawtooth 15 | pub fn helper_random_array_asc_sawtooth_u8(size: usize) -> Vec { 16 | (0..size).into_par_iter().map(|i| i as u8).collect::>() 17 | } 18 | 19 | // Ascending sawtooth 20 | pub fn helper_random_array_desc_sawtooth_u8(size: usize) -> Vec { 21 | (0..size).into_par_iter().map(|i| (size - 1 - i) as u8).collect::>() 22 | } 23 | 24 | // All equals 25 | pub fn helper_random_array_allequals_u8(size: usize) -> Vec { 26 | vec![thread_rng().gen(); size] 27 | } 28 | 29 | // Zipf 30 | pub fn helper_random_array_zipf_u8(size: usize) -> Vec { 31 | let mut array: Vec = Vec::with_capacity(size); 32 | let mut rng = thread_rng(); 33 | let mut quantity = size / 2; 34 | let mut i = 0; 35 | let mut value: u8 = 0; 36 | while quantity > 2 { 37 | value = rng.gen(); 38 | 39 | for _ in 0..quantity { 40 | array.push(value); 41 | i += 1; 42 | } 43 | 44 | quantity = quantity / 2; 45 | } 46 | while i < size { 47 | array.push(value); 48 | i += 1; 49 | } 50 | 51 | array.as_mut_slice().shuffle(&mut rng); 52 | 53 | array 54 | } 55 | 56 | // Normale(0, 2^10) 57 | pub fn helper_random_array_normale_10_u8(size: usize) -> Vec { 58 | let normal = Normal::new(0.0, 1024.0).unwrap(); 59 | (0..size) 60 | .into_par_iter() 61 | .map(|_| normal.sample(&mut thread_rng()) as u8) 62 | .collect::>() 63 | } 64 | 65 | pub fn generators_u8() -> Vec<(&'static dyn Fn(usize) -> Vec, &'static str)> 66 | { 67 | vec![ 68 | (&helper_random_array_uniform_u8, "-- Unif :"), 69 | (&helper_random_array_asc_sawtooth_u8, "-- Asc Saw :"), 70 | (&helper_random_array_desc_sawtooth_u8, "-- Desc Saw :"), 71 | (&helper_random_array_allequals_u8, "-- Equal :"), 72 | (&helper_random_array_zipf_u8, "-- Zipf :"), 73 | (&helper_random_array_normale_10_u8, "-- Normale 10 :"), 74 | ] 75 | } 76 | 77 | #[cfg(target_pointer_width = "8")] 78 | pub fn generators_usize( 79 | ) -> Vec<(&'static dyn Fn(usize) -> Vec, &'static str)> { 80 | generators_u8() 81 | .into_iter() 82 | .map(|(gen, title)| { 83 | let new_gen = move |size: usize| -> Vec { 84 | unsafe { 85 | let arr = gen(size); 86 | std::mem::transmute::, Vec>(arr) 87 | } 88 | }; 89 | 90 | ( 91 | Box::leak(Box::new(new_gen)) 92 | as &'static dyn Fn(usize) -> Vec, 93 | title, 94 | ) 95 | }) 96 | .collect() 97 | } 98 | -------------------------------------------------------------------------------- /results/benchmark_1_2_0/benchmark_results_trait_mt_char: -------------------------------------------------------------------------------- 1 | running 1 test 2 | Number of iterations: 1 3 | Number of threads: 16 4 | With check: true 5 | === Test char === Trait Vora MT Rayon pll uns 6 | Array size: 20_000_000 7 | -- Unif :22786us 0ns (1.14ns) 90912us 0ns (4.55ns) 8 | -- Equal :7259us 0ns (0.36ns) 11694us 0ns (0.58ns) 9 | -- Charset :18421us 0ns (0.92ns) 69050us 0ns (3.45ns) 10 | -- Charset Den:22532us 0ns (1.13ns) 87117us 0ns (4.36ns) 11 | -- Charset VDe:31470us 0ns (1.57ns) 82313us 0ns (4.12ns) 12 | Array size: 50_000_000 13 | -- Unif :62119us 0ns (1.24ns) 202578us 0ns (4.05ns) 14 | -- Equal :22777us 0ns (0.46ns) 28878us 0ns (0.58ns) 15 | -- Charset :41645us 0ns (0.83ns) 187679us 0ns (3.75ns) 16 | -- Charset Den:61520us 0ns (1.23ns) 250144us 0ns (5.00ns) 17 | -- Charset VDe:69567us 0ns (1.39ns) 203425us 0ns (4.07ns) 18 | Array size: 100_000_000 19 | -- Unif :133227us 0ns (1.33ns) 453593us 0ns (4.54ns) 20 | -- Equal :45275us 0ns (0.45ns) 55218us 0ns (0.55ns) 21 | -- Charset :84778us 0ns (0.85ns) 410175us 0ns (4.10ns) 22 | -- Charset Den:130455us 0ns (1.30ns) 459777us 0ns (4.60ns) 23 | -- Charset VDe:139559us 0ns (1.40ns) 436588us 0ns (4.37ns) 24 | Array size: 1_000_000_000 25 | -- Unif :1732135us 0ns (1.73ns) 5248481us 0ns (5.25ns) 26 | -- Equal :453842us 0ns (0.45ns) 572438us 0ns (0.57ns) 27 | -- Charset :918784us 0ns (0.92ns) 4398379us 0ns (4.40ns) 28 | -- Charset Den:1312413us 0ns (1.31ns) 3941584us 0ns (3.94ns) 29 | -- Charset VDe:1446365us 0ns (1.45ns) 3905170us 0ns (3.91ns) 30 | Array size: 2_000_000_000 31 | -- Unif :3491247us 0ns (1.75ns) 11872448us 0ns (5.94ns) 32 | -- Equal :658751us 0ns (0.33ns) 1124791us 0ns (0.56ns) 33 | -- Charset :2035892us 0ns (1.02ns) 7710462us 0ns (3.86ns) 34 | -- Charset Den:2630436us 0ns (1.32ns) 7883963us 0ns (3.94ns) 35 | -- Charset VDe:3029768us 0ns (1.51ns) 7839258us 0ns (3.92ns) 36 | Array size: 5_000_000_000 37 | -- Unif :9285268us 0ns (1.86ns) 32106210us 0ns (6.42ns) 38 | -- Equal :1679663us 0ns (0.34ns) 2460292us 0ns (0.49ns) 39 | -- Charset :5102775us 0ns (1.02ns) 20381138us 0ns (4.08ns) 40 | -- Charset Den:6917731us 0ns (1.38ns) 19509643us 0ns (3.90ns) 41 | -- Charset VDe:8064494us 0ns (1.61ns) 18889841us 0ns (3.78ns) 42 | Array size: 7_000_000_000 43 | -- Unif :14280610us 0ns (2.04ns) 42914316us 0ns (6.13ns) 44 | -- Equal :2352124us 0ns (0.34ns) 3960869us 0ns (0.57ns) 45 | -- Charset :7321949us 0ns (1.05ns) 29033892us 0ns (4.15ns) 46 | -- Charset Den:10479000us 0ns (1.50ns) 28815081us 0ns (4.12ns) 47 | -- Charset VDe:11953183us 0ns (1.71ns) 27986142us 0ns (4.00ns) 48 | Array size: 10_000_000_000 49 | -- Unif :22708642us 0ns (2.27ns) 66106512us 0ns (6.61ns) 50 | -- Equal :3317373us 0ns (0.33ns) 5653910us 0ns (0.57ns) 51 | -- Charset :10906627us 0ns (1.09ns) 40959404us 0ns (4.10ns) 52 | -- Charset Den:16399595us 0ns (1.64ns) 39127113us 0ns (3.91ns) 53 | -- Charset VDe:17826355us 0ns (1.78ns) 38833871us 0ns (3.88ns) 54 | test tests::speed_sort::speed_test_char ... ok 55 | 56 | test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 1 filtered out; finished in 2229.87s 57 | -------------------------------------------------------------------------------- /results/benchmark_1_2_0/benchmark_results_trait_mt_i32: -------------------------------------------------------------------------------- 1 | Number of iterations: 1 2 | Number of threads: 16 3 | With check: false 4 | === Test i32 === Trait Vora MT Rayon pll uns 5 | Array size: 20000000 6 | -- Unif :30976us 0ns (1.55ns) 135218us 0ns (6.76ns) 7 | -- +-10^9 :35444us 0ns (1.77ns) 122915us 0ns (6.15ns) 8 | -- Small :39002us 0ns (1.95ns) 120956us 0ns (6.05ns) 9 | -- Normale 10 :41973us 0ns (2.10ns) 111000us 0ns (5.55ns) 10 | -- Normale 20 :41992us 0ns (2.10ns) 121137us 0ns (6.06ns) 11 | -- Normale 30 :35079us 0ns (1.75ns) 140539us 0ns (7.03ns) 12 | Array size: 50000000 13 | -- Unif :88195us 0ns (1.76ns) 337714us 0ns (6.75ns) 14 | -- +-10^9 :70602us 0ns (1.41ns) 463584us 0ns (9.27ns) 15 | -- Small :94622us 0ns (1.89ns) 330212us 0ns (6.60ns) 16 | -- Normale 10 :105740us 0ns (2.11ns) 362145us 0ns (7.24ns) 17 | -- Normale 20 :93785us 0ns (1.88ns) 338221us 0ns (6.76ns) 18 | -- Normale 30 :72056us 0ns (1.44ns) 351518us 0ns (7.03ns) 19 | Array size: 100000000 20 | -- Unif :137309us 0ns (1.37ns) 728515us 0ns (7.29ns) 21 | -- +-10^9 :159845us 0ns (1.60ns) 724309us 0ns (7.24ns) 22 | -- Small :180028us 0ns (1.80ns) 771581us 0ns (7.72ns) 23 | -- Normale 10 :211781us 0ns (2.12ns) 567168us 0ns (5.67ns) 24 | -- Normale 20 :192071us 0ns (1.92ns) 722103us 0ns (7.22ns) 25 | -- Normale 30 :131360us 0ns (1.31ns) 695101us 0ns (6.95ns) 26 | Array size: 1000000000 27 | -- Unif :1706026us 0ns (1.71ns) 7914877us 0ns (7.91ns) 28 | -- +-10^9 :1702239us 0ns (1.70ns) 8472559us 0ns (8.47ns) 29 | -- Small :2154556us 0ns (2.15ns) 7936913us 0ns (7.94ns) 30 | -- Normale 10 :2281500us 0ns (2.28ns) 8134604us 0ns (8.13ns) 31 | -- Normale 20 :2416547us 0ns (2.42ns) 7337746us 0ns (7.34ns) 32 | -- Normale 30 :1681421us 0ns (1.68ns) 8638530us 0ns (8.64ns) 33 | Array size: 2000000000 34 | -- Unif :3847697us 0ns (1.92ns) 16253900us 0ns (8.13ns) 35 | -- +-10^9 :3714446us 0ns (1.86ns) 16736285us 0ns (8.37ns) 36 | -- Small :4566089us 0ns (2.28ns) 16303297us 0ns (8.15ns) 37 | -- Normale 10 :4487215us 0ns (2.24ns) 14960588us 0ns (7.48ns) 38 | -- Normale 20 :4883694us 0ns (2.44ns) 15618931us 0ns (7.81ns) 39 | -- Normale 30 :3781162us 0ns (1.89ns) 16962622us 0ns (8.48ns) 40 | Array size: 5000000000 41 | -- Unif :11544519us 0ns (2.31ns) 43049751us 0ns (8.61ns) 42 | -- +-10^9 :10010445us 0ns (2.00ns) 43328624us 0ns (8.67ns) 43 | -- Small :14632942us 0ns (2.93ns) 40512934us 0ns (8.10ns) 44 | -- Normale 10 :13814118us 0ns (2.76ns) 43619970us 0ns (8.72ns) 45 | -- Normale 20 :13679703us 0ns (2.74ns) 40082123us 0ns (8.02ns) 46 | -- Normale 30 :11562335us 0ns (2.31ns) 43518908us 0ns (8.70ns) 47 | Array size: 7000000000 48 | -- Unif :16244393us 0ns (2.32ns) 59130733us 0ns (8.45ns) 49 | -- +-10^9 :15045825us 0ns (2.15ns) 62619568us 0ns (8.95ns) 50 | -- Small :21595886us 0ns (3.09ns) 58581062us 0ns (8.37ns) 51 | -- Normale 10 :20399994us 0ns (2.91ns) 53226976us 0ns (7.60ns) 52 | -- Normale 20 :20500678us 0ns (2.93ns) 63674549us 0ns (9.10ns) 53 | -- Normale 30 :16022827us 0ns (2.29ns) 57874928us 0ns (8.27ns) 54 | Array size: 10000000000 55 | -- Unif :25258179us 0ns (2.53ns) 89834083us 0ns (8.98ns) 56 | -- +-10^9 :23664834us 0ns (2.37ns) 89311784us 0ns (8.93ns) 57 | -- Small :32205041us 0ns (3.22ns) 92723392us 0ns (9.27ns) 58 | -- Normale 10 :30283297us 0ns (3.03ns) 78646776us 0ns (7.86ns) 59 | -- Normale 20 :31182816us 0ns (3.12ns) 84266853us 0ns (8.43ns) 60 | -- Normale 30 :24358048us 0ns (2.44ns) 84392827us 0ns (8.44ns) 61 | -------------------------------------------------------------------------------- /PROFILING.md: -------------------------------------------------------------------------------- 1 | # Profiling 2 | 3 | What is "profiling" ? Unlike the comparative sorts (most of them), radix sorts 4 | might need constants. Because an universal perfect sort does not exist, I chose 5 | to use the best sort I have with respect to the given input. 6 | 7 | So we have to take into account the type and the size of the input and the 8 | radix sort algorithm, this latter might needs differents constants. 9 | 10 | For example (because it is always clearer with an example): 11 | 12 | A snippet of the implementation of the `Radixable` trait for the `f64` type. 13 | 14 | ```Rust 15 | fn voracious_sort(&self, arr: &mut [f64]) { 16 | if arr.len() <= 300 { 17 | arr.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()) 18 | } else if arr.len() < 800 { 19 | dlsd_radixsort(arr, 8); 20 | } else { 21 | rollercoaster_sort(arr, 8); 22 | } 23 | } 24 | fn voracious_mt_sort(&self, arr: &mut [Self], thread_n: usize) { 25 | if arr.len() < 800_000 { 26 | arr.par_sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); 27 | } else { 28 | let chunk_size = if arr.len() < 1_000_000 { 29 | 100_000 30 | } else if arr.len() < 5_000_000 { 31 | 200_000 32 | } else if arr.len() < 20_000_000 { 33 | 500_000 34 | } else if arr.len() < 500_000_000 { 35 | 400_000 36 | } else { 37 | 500_000 38 | }; 39 | peeka_sort(arr, 8, chunk_size, thread_n); 40 | } 41 | } 42 | ``` 43 | 44 | As you can see, depending on the input size and the type, I will choose to use 45 | a sort or another. The chosen sort might need differents constants such as the 46 | `radix` size or the `chunk_size` or the number of `thread`. 47 | 48 | Doing this takes a lot of time and is valid only for the computer on which I am 49 | doing the profiling. 50 | 51 | The default profiling in the `voracious_radix_sort` crate is done on an AMD Ryzen 52 | 9 3950x, 32GB RAM DDR4, MB X570 TUF Gaming. 53 | 54 | I will share here, what is done, and what is not done yet. But don't forget that 55 | this profiling is valid for my computer, but a better profiling can be found for 56 | your computer. 57 | 58 | Since doing this takes a lot of time, I will do it one by one. 59 | 60 | If your use case is not done yet. There is a "default default" profile, but 61 | clearly it is not optimized. 62 | 63 | If you are nice, you can do a PR 🙏 64 | 65 | # Profiling table 66 | 67 | | Ryzen 9 3950x | voracious_sort | voracious_stable_sort | voracious_mt_sort | 68 | |:-:|:-:|:-:|:-:| 69 | | bool | ✓ | ✓ | ✓ | 70 | | char | ✓ | ✓ | ✓ | 71 | | f32 | ✓ | ✓ | ✓ | 72 | | f64 | ✓ | ✓ | ✓ | 73 | | u8 | ✓ | ✓ | ✓ | 74 | | u16 | ✗ | ✗ | ✗ | 75 | | u32 | ✓ | ✓ | ✓ | 76 | | u64 | ✓ | ✓ | ✓ | 77 | | u128 | ✗ | ✗ | ✗ | 78 | | usize8 | ✓ | ✓ | ✓ | 79 | | usize16 | ✗ | ✗ | ✗ | 80 | | usize32 | ✓ | ✓ | ✓ | 81 | | usize64 | ✓ | ✓ | ✓ | 82 | | usize128 | ✗ | ✗ | ✗ | 83 | | i8 | ✗ | ✗ | ✗ | 84 | | i16 | ✗ | ✗ | ✗ | 85 | | i32 | ✓ | ✓ | ✓ | 86 | | i64 | ✓ | ✓ | ✓ | 87 | | i128 | ✗ | ✗ | ✗ | 88 | | isize8 | ✗ | ✗ | ✗ | 89 | | isize16 | ✗ | ✗ | ✗ | 90 | | isize32 | ✓ | ✓ | ✓ | 91 | | isize64 | ✓ | ✓ | ✓ | 92 | | isize128 | ✗ | ✗ | ✗ | 93 | | struct (bool) | ✓ | ✓ | ✓ | 94 | | struct (char) | ✓ | ✓ | ✓ | 95 | | struct (f32) | ✓ | ✓ | ✓ | 96 | | struct (f64) | ✓ | ✓ | ✓ | 97 | | struct (u8) | ✗ | ✗ | ✗ | 98 | | struct (u16) | ✗ | ✗ | ✗ | 99 | | struct (u32) | ✗ | ✗ | ✗ | 100 | | struct (u64) | ✗ | ✗ | ✗ | 101 | | struct (u128) | ✗ | ✗ | ✗ | 102 | | struct (usize) | ✗ | ✗ | ✗ | 103 | | struct (i8) | ✗ | ✗ | ✗ | 104 | | struct (i16) | ✗ | ✗ | ✗ | 105 | | struct (i32) | ✗ | ✗ | ✗ | 106 | | struct (i64) | ✗ | ✗ | ✗ | 107 | | struct (i128) | ✗ | ✗ | ✗ | 108 | | struct (isize) | ✗ | ✗ | ✗ | 109 | -------------------------------------------------------------------------------- /src/algo/k_way_merge_mt.rs: -------------------------------------------------------------------------------- 1 | use rayon::{ThreadPoolBuilder, ThreadPool}; 2 | 3 | use super::k_way_merge::merge2; 4 | 5 | fn kway_merge_mt_helper( 6 | pool: &ThreadPool, 7 | arr: &mut [T], 8 | buffer: &mut [T], 9 | separators: &mut Vec, 10 | ) { 11 | pool.scope(|s| { 12 | let half = (separators.len() - 1) / 2; 13 | let mut offset = 0; 14 | let mut rest = arr; 15 | let mut rest_buffer = buffer; 16 | let mut parts = Vec::new(); 17 | let mut buffer_parts = Vec::new(); 18 | for i in 0..half { 19 | let i2 = i * 2; 20 | let sep1 = separators[i2]; 21 | let sep3 = separators[i2 + 2]; 22 | let (part, snd) = rest.split_at_mut(sep3 - offset); 23 | rest = snd; 24 | parts.push(part); 25 | let (part, snd) = rest_buffer.split_at_mut(sep3 - offset); 26 | rest_buffer = snd; 27 | buffer_parts.push(part); 28 | offset += sep3 - sep1; 29 | } 30 | for (i, (part, buffer_part)) in 31 | parts.into_iter().zip(buffer_parts.into_iter()).enumerate() 32 | { 33 | let i2 = i * 2; 34 | let sep1 = separators[i2]; 35 | let sep2 = separators[i2 + 1]; 36 | let sep3 = separators[i2 + 2]; 37 | s.spawn(move |_| { 38 | merge2(part, buffer_part, 0, sep2 - sep1, sep3 - sep1); 39 | }); 40 | } 41 | for i in 0..half { 42 | separators.remove(i + 1); 43 | } 44 | }); 45 | } 46 | 47 | pub fn k_way_merge_mt( 48 | arr: &mut [T], 49 | buffer: &mut [T], 50 | separators: &mut Vec, 51 | thread_n: usize, 52 | ) { 53 | if separators.len() <= 2 { 54 | return; 55 | } 56 | 57 | if separators.len() == 3 { 58 | let min_length = 59 | if separators[1] - separators[0] <= separators[2] - separators[1] { 60 | separators[1] - separators[0] 61 | } else { 62 | separators[2] - separators[1] 63 | }; 64 | 65 | merge2( 66 | arr, 67 | &mut vec![arr[0]; min_length], 68 | separators[0], 69 | separators[1], 70 | separators[2], 71 | ); 72 | return; 73 | } 74 | 75 | let pool = ThreadPoolBuilder::new() 76 | .num_threads(thread_n) 77 | .build() 78 | .unwrap(); 79 | 80 | while separators.len() > 2 { 81 | kway_merge_mt_helper(&pool, arr, buffer, separators); 82 | } 83 | } 84 | 85 | pub fn k_way_merge_mt_with_buffer( 86 | arr: &mut [T], 87 | separators: &mut Vec, 88 | thread_n: usize, 89 | ) { 90 | if separators.len() <= 2 { 91 | return; 92 | } 93 | 94 | if separators.len() == 3 { 95 | let min_length = 96 | if separators[1] - separators[0] <= separators[2] - separators[1] { 97 | separators[1] - separators[0] 98 | } else { 99 | separators[2] - separators[1] 100 | }; 101 | 102 | merge2( 103 | arr, 104 | &mut vec![arr[0]; min_length], 105 | separators[0], 106 | separators[1], 107 | separators[2], 108 | ); 109 | return; 110 | } 111 | 112 | let pool = ThreadPoolBuilder::new() 113 | .num_threads(thread_n) 114 | .build() 115 | .unwrap(); 116 | 117 | let mut buffer: Vec = arr.to_vec(); 118 | let buffer = buffer.as_mut_slice(); 119 | while separators.len() > 2 { 120 | kway_merge_mt_helper(&pool, arr, buffer, separators); 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /src/algo/k_way_merge.rs: -------------------------------------------------------------------------------- 1 | use super::super::sorts::utils::copy_nonoverlapping; 2 | 3 | fn forward_merge2( 4 | arr: &mut [T], 5 | copy: &mut [T], 6 | start: usize, 7 | middle: usize, 8 | end: usize, 9 | ) { 10 | if start < middle && middle < end && arr[middle - 1] <= arr[middle] { 11 | return; 12 | } 13 | 14 | copy_nonoverlapping(&mut arr[start..], copy, middle - start); 15 | 16 | let mut i = 0; 17 | let mut j = middle; 18 | let mut position = start; 19 | 20 | loop { 21 | if i == middle - start { 22 | return; 23 | } 24 | if j == end { 25 | let size = middle - start - i; 26 | let (_, mut rest_copy) = copy.split_at_mut(i); 27 | let (_, mut rest_arr) = arr.split_at_mut(position); 28 | copy_nonoverlapping(&mut rest_copy, &mut rest_arr, size); 29 | return; 30 | } 31 | 32 | if copy[i] <= arr[j] { 33 | arr[position] = copy[i]; 34 | i += 1; 35 | } else { 36 | arr.swap(position, j); 37 | j += 1; 38 | } 39 | position += 1; 40 | } 41 | } 42 | 43 | fn backward_merge2( 44 | arr: &mut [T], 45 | copy: &mut [T], 46 | start: usize, 47 | middle: usize, 48 | end: usize, 49 | ) { 50 | if start < middle && middle < end && arr[middle - 1] <= arr[middle] { 51 | return; 52 | } 53 | 54 | copy_nonoverlapping(&mut arr[middle..], copy, end - middle); 55 | 56 | let mut i: isize = (end - middle - 1) as isize; 57 | let mut j: isize = (middle - 1) as isize; 58 | let mut position = end - 1; 59 | 60 | loop { 61 | if i == -1 { 62 | return; 63 | } 64 | if j == start as isize - 1 { 65 | let (mut rest_copy, _) = copy.split_at_mut(i as usize + 1); 66 | let (_, mut rest_arr) = arr.split_at_mut(start); 67 | copy_nonoverlapping(&mut rest_copy, &mut rest_arr, i as usize + 1); 68 | return; 69 | } 70 | 71 | if copy[i as usize] >= arr[j as usize] { 72 | arr[position] = copy[i as usize]; 73 | i -= 1; 74 | } else { 75 | arr.swap(position, j as usize); 76 | j -= 1; 77 | } 78 | position -= 1; 79 | } 80 | } 81 | 82 | pub fn merge2( 83 | arr: &mut [T], 84 | copy: &mut [T], 85 | start: usize, 86 | middle: usize, 87 | end: usize, 88 | ) { 89 | if middle - start <= end - middle { 90 | forward_merge2(arr, copy, start, middle, end); 91 | } else { 92 | backward_merge2(arr, copy, start, middle, end); 93 | } 94 | } 95 | 96 | pub fn k_way_merge( 97 | arr: &mut [T], 98 | separators: &mut Vec, 99 | ) { 100 | if separators.len() <= 2 { 101 | return; 102 | } 103 | 104 | if separators.len() == 3 { 105 | let min_length = 106 | if separators[1] - separators[0] <= separators[2] - separators[1] { 107 | separators[1] - separators[0] 108 | } else { 109 | separators[2] - separators[1] 110 | }; 111 | 112 | merge2( 113 | arr, 114 | &mut vec![arr[0]; min_length], 115 | separators[0], 116 | separators[1], 117 | separators[2], 118 | ); 119 | return; 120 | } 121 | 122 | let mut copy: Vec = vec![arr[0]; (arr.len() / 2) + 2]; 123 | while separators.len() > 2 { 124 | let half = (separators.len() - 1) / 2; 125 | for i in 0..half { 126 | let i2 = i * 2; 127 | merge2( 128 | arr, 129 | &mut copy, 130 | separators[i2], 131 | separators[i2 + 1], 132 | separators[i2 + 2], 133 | ); 134 | } 135 | for i in 0..half { 136 | separators.remove(i + 1); 137 | } 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /results/benchmark_1_2_0/benchmark_results_trait_u128: -------------------------------------------------------------------------------- 1 | running 1 test 2 | Number of iterations: 5 3 | Number of threads: 16 4 | With check: true 5 | === Test u128 === Trait Vora Uns or stable Trait Vora MT Rust Uns Rust Std Rayon pll uns 6 | Array size: 100 7 | -- Unif :4us 204ns (41.27ns) 4us 210ns (41.80ns) 3us 188ns (36.74ns) 6us 530ns (64.24ns) 3us 158ns (33.53ns) 8 | -- Unif 10^9 :3us 155ns (33.23ns) 3us 156ns (33.63ns) 3us 158ns (33.55ns) 5us 237ns (52.19ns) 3us 147ns (32.63ns) 9 | Array size: 1000 10 | -- Unif :31us 598ns (31.64ns) 28us 417ns (28.50ns) 33us 473ns (33.40ns) 66us 936ns (66.13ns) 32us 463ns (32.52ns) 11 | -- Unif 10^9 :27us 399ns (27.42ns) 25us 369ns (25.90ns) 33us 479ns (33.69ns) 66us 946ns (66.68ns) 32us 459ns (32.32ns) 12 | Array size: 5000 13 | -- Unif :95us 609ns (19.05ns) 95us 607ns (19.10ns) 146us 929ns (29.30ns) 389us 2470ns (77.98ns) 139us 1253ns (27.84ns) 14 | -- Unif 10^9 :94us 597ns (18.81ns) 96us 613ns (19.30ns) 145us 923ns (29.12ns) 389us 2463ns (77.89ns) 134us 1586ns (26.93ns) 15 | Array size: 10_000 16 | -- Unif :181us 924ns (18.19ns) 168us 758ns (16.89ns) 320us 1507ns (32.07ns) 852us 3813ns (85.26ns) 197us 951ns (19.79ns) 17 | -- Unif 10^9 :174us 801ns (17.41ns) 173us 777ns (17.30ns) 294us 1319ns (29.42ns) 845us 3783ns (84.59ns) 212us 1055ns (21.25ns) 18 | Array size: 20_000 19 | -- Unif :342us 1118ns (17.11ns) 327us 1037ns (16.38ns) 580us 1837ns (29.04ns) 1824us 5784ns (91.23ns) 349us 1146ns (17.50ns) 20 | -- Unif 10^9 :402us 2230ns (20.12ns) 334us 1061ns (16.75ns) 583us 1849ns (29.19ns) 1898us 6118ns (94.90ns) 336us 1192ns (16.84ns) 21 | Array size: 50_000 22 | -- Unif :798us 1660ns (15.98ns) 793us 1589ns (15.86ns) 1520us 3043ns (30.42ns) 5005us 10012ns (100.10ns) 537us 1124ns (10.76ns) 23 | -- Unif 10^9 :812us 1630ns (16.25ns) 789us 1586ns (15.79ns) 1518us 3038ns (30.37ns) 4963us 9928ns (99.27ns) 669us 1434ns (13.39ns) 24 | Array size: 100_000 25 | -- Unif :1830us 2600ns (18.31ns) 1828us 2588ns (18.28ns) 3227us 4686ns (32.28ns) 10529us 14891ns (105.30ns) 1060us 1586ns (10.61ns) 26 | -- Unif 10^9 :1869us 2645ns (18.69ns) 1868us 2644ns (18.69ns) 3138us 4449ns (31.38ns) 10547us 14918ns (105.48ns) 980us 1420ns (9.80ns) 27 | Array size: 200_000 28 | -- Unif :4077us 4078ns (20.39ns) 6115us 6272ns (30.58ns) 7434us 7655ns (37.17ns) 22253us 22272ns (111.27ns) 1972us 2028ns (9.86ns) 29 | -- Unif 10^9 :4064us 4066ns (20.32ns) 6922us 6931ns (34.61ns) 6897us 6935ns (34.49ns) 22157us 22159ns (110.79ns) 1875us 1990ns (9.38ns) 30 | Array size: 500_000 31 | -- Unif :9708us 6144ns (19.42ns) 10979us 6994ns (21.96ns) 20031us 13001ns (40.06ns) 59707us 37763ns (119.42ns) 3998us 2549ns (8.00ns) 32 | -- Unif 10^9 :9625us 6091ns (19.25ns) 12719us 8066ns (25.44ns) 19666us 13226ns (39.33ns) 59809us 37827ns (119.62ns) 4221us 2685ns (8.44ns) 33 | Array size: 1_000_000 34 | -- Unif :21451us 9601ns (21.45ns) 11007us 4926ns (11.01ns) 45782us 20522ns (45.78ns) 131327us 58734ns (131.33ns) 7633us 3549ns (7.63ns) 35 | -- Unif 10^9 :22391us 10017ns (22.39ns) 10591us 4805ns (10.59ns) 44325us 19979ns (44.33ns) 130305us 58275ns (130.31ns) 8094us 3664ns (8.09ns) 36 | Array size: 2_000_000 37 | -- Unif :55965us 17806ns (27.98ns) 14906us 4754ns (7.45ns) 94190us 30110ns (47.10ns) 275625us 87171ns (137.81ns) 16397us 5304ns (8.20ns) 38 | -- Unif 10^9 :56637us 18201ns (28.32ns) 14773us 4699ns (7.39ns) 95841us 30356ns (47.92ns) 276095us 87311ns (138.05ns) 17770us 5761ns (8.89ns) 39 | Array size: 5_000_000 40 | -- Unif :142822us 28632ns (28.56ns) 31846us 6408ns (6.37ns) 249059us 50487ns (49.81ns) 739509us 147902ns (147.90ns) 52624us 10697ns (10.52ns) 41 | -- Unif 10^9 :138291us 27792ns (27.66ns) 31728us 6403ns (6.35ns) 252163us 50769ns (50.43ns) 740872us 148275ns (148.17ns) 53252us 11138ns (10.65ns) 42 | Array size: 10_000_000 43 | -- Unif :277892us 39531ns (27.79ns) 70019us 9913ns (7.00ns) 519646us 73911ns (51.96ns) 1511228us 213731ns (151.12ns) 107383us 15280ns (10.74ns) 44 | -- Unif 10^9 :276395us 39101ns (27.64ns) 70771us 10018ns (7.08ns) 515625us 73312ns (51.56ns) 1518822us 214816ns (151.88ns) 106622us 15123ns (10.66ns) 45 | test tests::speed_sort::speed_test_u128 ... ok 46 | 47 | test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 1 filtered out; finished in 53.97s 48 | -------------------------------------------------------------------------------- /results/benchmark_1_2_0/benchmark_results_trait_i128: -------------------------------------------------------------------------------- 1 | running 1 test 2 | Number of iterations: 5 3 | Number of threads: 16 4 | With check: true 5 | === Test i128 === Trait Vora Uns or stable Trait Vora MT Rust Uns Rust Std Rayon pll uns 6 | Array size: 100 7 | -- Unif :3us 159ns (30.08ns) 2us 120ns (26.27ns) 2us 119ns (25.99ns) 5us 285ns (51.27ns) 2us 126ns (27.25ns) 8 | -- +-10^9 :3us 161ns (34.51ns) 3us 172ns (35.36ns) 3us 142ns (31.38ns) 5us 233ns (50.37ns) 2us 120ns (25.84ns) 9 | Array size: 1000 10 | -- Unif :25us 572ns (25.40ns) 19us 273ns (19.10ns) 26us 382ns (27.00ns) 62us 888ns (62.74ns) 27us 389ns (27.50ns) 11 | -- +-10^9 :169us 2972ns (169.92ns) 151us 2150ns (151.55ns) 26us 379ns (26.75ns) 62us 885ns (62.51ns) 26us 379ns (26.72ns) 12 | Array size: 5000 13 | -- Unif :95us 609ns (19.11ns) 93us 595ns (18.70ns) 143us 909ns (28.67ns) 379us 2404ns (75.99ns) 101us 702ns (20.33ns) 14 | -- +-10^9 :659us 4194ns (131.99ns) 669us 4240ns (134.00ns) 143us 908ns (28.71ns) 380us 2409ns (76.15ns) 142us 1927ns (28.54ns) 15 | Array size: 10000 16 | -- Unif :178us 827ns (17.86ns) 170us 780ns (17.10ns) 294us 1317ns (29.43ns) 836us 3740ns (83.60ns) 192us 934ns (19.22ns) 17 | -- +-10^9 :1284us 5772ns (128.46ns) 1258us 5630ns (125.83ns) 361us 1825ns (36.11ns) 839us 3757ns (83.98ns) 199us 974ns (19.99ns) 18 | Array size: 20000 19 | -- Unif :337us 1077ns (16.86ns) 333us 1057ns (16.69ns) 616us 1950ns (30.82ns) 1817us 5749ns (90.88ns) 332us 1128ns (16.64ns) 20 | -- +-10^9 :2470us 7834ns (123.53ns) 2459us 7789ns (122.99ns) 665us 2111ns (33.25ns) 1815us 5740ns (90.76ns) 349us 1137ns (17.46ns) 21 | Array size: 50000 22 | -- Unif :822us 1649ns (16.46ns) 804us 1612ns (16.09ns) 1720us 3463ns (34.41ns) 4962us 9926ns (99.26ns) 587us 1322ns (11.76ns) 23 | -- +-10^9 :6346us 12702ns (126.93ns) 6337us 12683ns (126.75ns) 1844us 3914ns (36.88ns) 4976us 9953ns (99.53ns) 634us 1403ns (12.70ns) 24 | Array size: 100000 25 | -- Unif :1881us 2675ns (18.82ns) 1841us 2605ns (18.42ns) 3777us 5556ns (37.77ns) 10567us 14946ns (105.67ns) 1094us 1980ns (10.95ns) 26 | -- +-10^9 :5596us 7916ns (55.97ns) 5606us 7929ns (56.07ns) 4082us 6653ns (40.82ns) 10580us 14963ns (105.80ns) 1086us 1714ns (10.87ns) 27 | Array size: 200000 28 | -- Unif :4150us 4151ns (20.75ns) 6272us 6594ns (31.36ns) 9827us 10582ns (49.14ns) 22622us 22624ns (113.11ns) 1693us 1729ns (8.47ns) 29 | -- +-10^9 :9508us 9510ns (47.54ns) 7323us 7520ns (36.62ns) 10239us 10342ns (51.20ns) 22597us 22598ns (112.99ns) 1924us 1943ns (9.62ns) 30 | Array size: 500000 31 | -- Unif :10037us 6349ns (20.07ns) 11856us 7528ns (23.71ns) 26868us 17917ns (53.74ns) 60990us 38574ns (121.98ns) 5735us 4286ns (11.47ns) 32 | -- +-10^9 :24286us 15361ns (48.57ns) 15186us 9727ns (30.37ns) 26810us 17464ns (53.62ns) 60602us 38329ns (121.21ns) 4726us 3071ns (9.45ns) 33 | Array size: 1000000 34 | -- Unif :22070us 9872ns (22.07ns) 11439us 5313ns (11.44ns) 60512us 32741ns (60.51ns) 127146us 56871ns (127.15ns) 9489us 4316ns (9.49ns) 35 | -- +-10^9 :53095us 24317ns (53.10ns) 29527us 13689ns (29.53ns) 59490us 27089ns (59.49ns) 127499us 57021ns (127.50ns) 10482us 5694ns (10.48ns) 36 | Array size: 2000000 37 | -- Unif :56553us 17946ns (28.28ns) 15105us 4796ns (7.55ns) 127600us 40767ns (63.80ns) 271425us 85834ns (135.71ns) 25125us 8508ns (12.56ns) 38 | -- +-10^9 :128588us 41545ns (64.29ns) 71225us 23324ns (35.61ns) 128755us 42816ns (64.38ns) 273794us 86584ns (136.90ns) 22211us 7052ns (11.11ns) 39 | Array size: 5000000 40 | -- Unif :134708us 26963ns (26.94ns) 31855us 6485ns (6.37ns) 317921us 64231ns (63.58ns) 711182us 142237ns (142.24ns) 69538us 16506ns (13.91ns) 41 | -- +-10^9 :334157us 66866ns (66.83ns) 96913us 20932ns (19.38ns) 334996us 67463ns (67.00ns) 710658us 142146ns (142.13ns) 66462us 13609ns (13.29ns) 42 | Array size: 10000000 43 | -- Unif :263080us 37224ns (26.31ns) 70916us 10035ns (7.09ns) 676433us 96015ns (67.64ns) 1498794us 211965ns (149.88ns) 144983us 20901ns (14.50ns) 44 | -- +-10^9 :713573us 100925ns (71.36ns) 172709us 24434ns (17.27ns) 678170us 97231ns (67.82ns) test tests::speed_sort::speed_test_i128 has been running for over 60 seconds 45 | 1495750us 211531ns (149.58ns) 133857us 18933ns (13.39ns) 46 | test tests::speed_sort::speed_test_i128 ... ok 47 | 48 | test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 1 filtered out; finished in 62.03s 49 | -------------------------------------------------------------------------------- /results/benchmark_1_2_0/benchmark_results_trait_mt_i64: -------------------------------------------------------------------------------- 1 | Number of iterations: 1 2 | Number of threads: 16 3 | With check: false 4 | === Test i64 === Trait Vora MT Rayon pll uns 5 | Array size: 20000000 6 | -- Unif :55857us 0ns (2.79ns) 140292us 0ns (7.01ns) 7 | -- +-10^9 :87180us 0ns (4.36ns) 139197us 0ns (6.96ns) 8 | -- Normale 10 :106162us 0ns (5.31ns) 158865us 0ns (7.94ns) 9 | -- Normale 20 :108920us 0ns (5.45ns) 137859us 0ns (6.89ns) 10 | -- Normale 30 :101009us 0ns (5.05ns) 153936us 0ns (7.70ns) 11 | -- Normale 40 :93886us 0ns (4.69ns) 137990us 0ns (6.90ns) 12 | -- Normale 51 :96646us 0ns (4.83ns) 130862us 0ns (6.54ns) 13 | -- Normale 63 :66489us 0ns (3.32ns) 155646us 0ns (7.78ns) 14 | Array size: 50000000 15 | -- Unif :142846us 0ns (2.86ns) 409386us 0ns (8.19ns) 16 | -- +-10^9 :215611us 0ns (4.31ns) 482516us 0ns (9.65ns) 17 | -- Normale 10 :275169us 0ns (5.50ns) 410503us 0ns (8.21ns) 18 | -- Normale 20 :252169us 0ns (5.04ns) 373763us 0ns (7.48ns) 19 | -- Normale 30 :234486us 0ns (4.69ns) 375946us 0ns (7.52ns) 20 | -- Normale 40 :240525us 0ns (4.81ns) 471452us 0ns (9.43ns) 21 | -- Normale 51 :205785us 0ns (4.12ns) 474858us 0ns (9.50ns) 22 | -- Normale 63 :132236us 0ns (2.64ns) 391806us 0ns (7.84ns) 23 | Array size: 100000000 24 | -- Unif :266748us 0ns (2.67ns) 837010us 0ns (8.37ns) 25 | -- +-10^9 :632480us 0ns (6.32ns) 835201us 0ns (8.35ns) 26 | -- Normale 10 :609988us 0ns (6.10ns) 835127us 0ns (8.35ns) 27 | -- Normale 20 :569103us 0ns (5.69ns) 863455us 0ns (8.63ns) 28 | -- Normale 30 :523437us 0ns (5.23ns) 888369us 0ns (8.88ns) 29 | -- Normale 40 :516073us 0ns (5.16ns) 909072us 0ns (9.09ns) 30 | -- Normale 51 :501989us 0ns (5.02ns) 899054us 0ns (8.99ns) 31 | -- Normale 63 :319675us 0ns (3.20ns) 902424us 0ns (9.02ns) 32 | Array size: 1000000000 33 | -- Unif :3816261us 0ns (3.82ns) 10738230us 0ns (10.74ns) 34 | -- +-10^9 :6282277us 0ns (6.28ns) 11724745us 0ns (11.72ns) 35 | -- Normale 10 :6163895us 0ns (6.16ns) 10547444us 0ns (10.55ns) 36 | -- Normale 20 :6623726us 0ns (6.62ns) 10828087us 0ns (10.83ns) 37 | -- Normale 30 :6615460us 0ns (6.62ns) 10838655us 0ns (10.84ns) 38 | -- Normale 40 :5986847us 0ns (5.99ns) 11474646us 0ns (11.47ns) 39 | -- Normale 51 :5438124us 0ns (5.44ns) 10769649us 0ns (10.77ns) 40 | -- Normale 63 :3844791us 0ns (3.84ns) 10906720us 0ns (10.91ns) 41 | Array size: 2000000000 42 | -- Unif :8342235us 0ns (4.17ns) 23175690us 0ns (11.59ns) 43 | -- +-10^9 :12694863us 0ns (6.35ns) 22982121us 0ns (11.49ns) 44 | -- Normale 10 :12156192us 0ns (6.08ns) 22312168us 0ns (11.16ns) 45 | -- Normale 20 :13117127us 0ns (6.56ns) 23493798us 0ns (11.75ns) 46 | -- Normale 30 :13730572us 0ns (6.87ns) 23015205us 0ns (11.51ns) 47 | -- Normale 40 :12836134us 0ns (6.42ns) 22674208us 0ns (11.34ns) 48 | -- Normale 51 :11037694us 0ns (5.52ns) 22823053us 0ns (11.41ns) 49 | -- Normale 63 :8477333us 0ns (4.24ns) 22722396us 0ns (11.36ns) 50 | Array size: 5000000000 51 | -- Unif :23854187us 0ns (4.77ns) 62492507us 0ns (12.50ns) 52 | -- +-10^9 :39450639us 0ns (7.89ns) 62853312us 0ns (12.57ns) 53 | -- Normale 10 :38498584us 0ns (7.70ns) 54565220us 0ns (10.91ns) 54 | -- Normale 20 :39333238us 0ns (7.87ns) 61508499us 0ns (12.30ns) 55 | -- Normale 30 :38567870us 0ns (7.71ns) 62446359us 0ns (12.49ns) 56 | -- Normale 40 :37297597us 0ns (7.46ns) 63091893us 0ns (12.62ns) 57 | -- Normale 51 :34175495us 0ns (6.84ns) 62622371us 0ns (12.52ns) 58 | -- Normale 63 :23883239us 0ns (4.78ns) 62318863us 0ns (12.46ns) 59 | Array size: 7000000000 60 | -- Unif :34761564us 0ns (4.97ns) 88681837us 0ns (12.67ns) 61 | -- +-10^9 :56331466us 0ns (8.05ns) 96178579us 0ns (13.74ns) 62 | -- Normale 10 :54813562us 0ns (7.83ns) 79034344us 0ns (11.29ns) 63 | -- Normale 20 :57370777us 0ns (8.20ns) 88490065us 0ns (12.64ns) 64 | -- Normale 30 :55115340us 0ns (7.87ns) 94456082us 0ns (13.49ns) 65 | -- Normale 40 :54535254us 0ns (7.79ns) 88437528us 0ns (12.63ns) 66 | -- Normale 51 :51414554us 0ns (7.34ns) 90428565us 0ns (12.92ns) 67 | -- Normale 63 :34879973us 0ns (4.98ns) 89437490us 0ns (12.78ns) 68 | Array size: 10000000000 69 | -- Unif :52680003us 0ns (5.27ns) 130143162us 0ns (13.01ns) 70 | -- +-10^9 :81203541us 0ns (8.12ns) 130751327us 0ns (13.08ns) 71 | -- Normale 10 :79495653us 0ns (7.95ns) 111830226us 0ns (11.18ns) 72 | -- Normale 20 :83124213us 0ns (8.31ns) 129861673us 0ns (12.99ns) 73 | -- Normale 30 :80708191us 0ns (8.07ns) 132158570us 0ns (13.22ns) 74 | -- Normale 40 :80531710us 0ns (8.05ns) 139493485us 0ns (13.95ns) 75 | -- Normale 51 :77779735us 0ns (7.78ns) 130680004us 0ns (13.07ns) 76 | -- Normale 63 :51661883us 0ns (5.17ns) 128286213us 0ns (12.83ns) 77 | -------------------------------------------------------------------------------- /results/benchmark_1_2_0/benchmark_results_trait_mt_f32: -------------------------------------------------------------------------------- 1 | running 1 test 2 | Number of iterations: 1 3 | Number of threads: 16 4 | With check: false 5 | === Test f32 === Trait Vora MT Rayon pll uns 6 | Array size: 20_000_000 7 | -- Unif :37885us 0ns (1.89ns) 153699us 0ns (7.68ns) 8 | -- Small :33311us 0ns (1.67ns) 145556us 0ns (7.28ns) 9 | -- Pareto 0.75:36793us 0ns (1.84ns) 152100us 0ns (7.61ns) 10 | -- Pareto 1.00:42664us 0ns (2.13ns) 147045us 0ns (7.35ns) 11 | -- Pareto 2.00:38200us 0ns (1.91ns) 139648us 0ns (6.98ns) 12 | -- Normale 10 :39756us 0ns (1.99ns) 157572us 0ns (7.88ns) 13 | -- Normale 20 :35931us 0ns (1.80ns) 148492us 0ns (7.42ns) 14 | -- Normale 30 :33684us 0ns (1.68ns) 149697us 0ns (7.48ns) 15 | Array size: 50_000_000 16 | -- Unif :98018us 0ns (1.96ns) 382918us 0ns (7.66ns) 17 | -- Small :93243us 0ns (1.86ns) 366359us 0ns (7.33ns) 18 | -- Pareto 0.75:93911us 0ns (1.88ns) 374948us 0ns (7.50ns) 19 | -- Pareto 1.00:92473us 0ns (1.85ns) 371892us 0ns (7.44ns) 20 | -- Pareto 2.00:111348us 0ns (2.23ns) 417455us 0ns (8.35ns) 21 | -- Normale 10 :85866us 0ns (1.72ns) 399454us 0ns (7.99ns) 22 | -- Normale 20 :100656us 0ns (2.01ns) 382216us 0ns (7.64ns) 23 | -- Normale 30 :89773us 0ns (1.80ns) 377935us 0ns (7.56ns) 24 | Array size: 100_000_000 25 | -- Unif :167543us 0ns (1.68ns) 768588us 0ns (7.69ns) 26 | -- Small :186551us 0ns (1.87ns) 753142us 0ns (7.53ns) 27 | -- Pareto 0.75:183543us 0ns (1.84ns) 790559us 0ns (7.91ns) 28 | -- Pareto 1.00:171045us 0ns (1.71ns) 768949us 0ns (7.69ns) 29 | -- Pareto 2.00:186219us 0ns (1.86ns) 719392us 0ns (7.19ns) 30 | -- Normale 10 :179081us 0ns (1.79ns) 815346us 0ns (8.15ns) 31 | -- Normale 20 :174820us 0ns (1.75ns) 783596us 0ns (7.84ns) 32 | -- Normale 30 :177835us 0ns (1.78ns) 794128us 0ns (7.94ns) 33 | Array size: 1_000_000_000 34 | -- Unif :2118457us 0ns (2.12ns) 8898425us 0ns (8.90ns) 35 | -- Small :2108885us 0ns (2.11ns) 7726613us 0ns (7.73ns) 36 | -- Pareto 0.75:2201330us 0ns (2.20ns) 7576828us 0ns (7.58ns) 37 | -- Pareto 1.00:2071227us 0ns (2.07ns) 7556629us 0ns (7.56ns) 38 | -- Pareto 2.00:2173539us 0ns (2.17ns) 7759428us 0ns (7.76ns) 39 | -- Normale 10 :2179490us 0ns (2.18ns) 7924790us 0ns (7.92ns) 40 | -- Normale 20 :2154271us 0ns (2.15ns) 7784066us 0ns (7.78ns) 41 | -- Normale 30 :2106249us 0ns (2.11ns) 8016077us 0ns (8.02ns) 42 | Array size: 2_000_000_000 43 | -- Unif :4514503us 0ns (2.26ns) 15962533us 0ns (7.98ns) 44 | -- Small :4678687us 0ns (2.34ns) 15734222us 0ns (7.87ns) 45 | -- Pareto 0.75:4613057us 0ns (2.31ns) 15523214us 0ns (7.76ns) 46 | -- Pareto 1.00:4555260us 0ns (2.28ns) 17151916us 0ns (8.58ns) 47 | -- Pareto 2.00:4830792us 0ns (2.42ns) 15430518us 0ns (7.72ns) 48 | -- Normale 10 :4546046us 0ns (2.27ns) 15767762us 0ns (7.88ns) 49 | -- Normale 20 :4633876us 0ns (2.32ns) 16088779us 0ns (8.04ns) 50 | -- Normale 30 :4608580us 0ns (2.30ns) 16086400us 0ns (8.04ns) 51 | Array size: 5_000_000_000 52 | -- Unif :13935602us 0ns (2.79ns) 40480472us 0ns (8.10ns) 53 | -- Small :13637361us 0ns (2.73ns) 41086303us 0ns (8.22ns) 54 | -- Pareto 0.75:13737197us 0ns (2.75ns) 39759986us 0ns (7.95ns) 55 | -- Pareto 1.00:13585368us 0ns (2.72ns) 40513919us 0ns (8.10ns) 56 | -- Pareto 2.00:13575404us 0ns (2.72ns) 40067346us 0ns (8.01ns) 57 | -- Normale 10 :12891387us 0ns (2.58ns) 41642409us 0ns (8.33ns) 58 | -- Normale 20 :12901479us 0ns (2.58ns) 41019818us 0ns (8.20ns) 59 | -- Normale 30 :12870223us 0ns (2.57ns) 41166356us 0ns (8.23ns) 60 | Array size: 7_000_000_000 61 | -- Unif :20711575us 0ns (2.96ns) 58456225us 0ns (8.35ns) 62 | -- Small :20324665us 0ns (2.90ns) 57440686us 0ns (8.21ns) 63 | -- Pareto 0.75:20570328us 0ns (2.94ns) 60000244us 0ns (8.57ns) 64 | -- Pareto 1.00:20548788us 0ns (2.94ns) 61165571us 0ns (8.74ns) 65 | -- Pareto 2.00:20637999us 0ns (2.95ns) 57118547us 0ns (8.16ns) 66 | -- Normale 10 :19179429us 0ns (2.74ns) 58841411us 0ns (8.41ns) 67 | -- Normale 20 :19203132us 0ns (2.74ns) 62150317us 0ns (8.88ns) 68 | -- Normale 30 :19153203us 0ns (2.74ns) 56539817us 0ns (8.08ns) 69 | Array size: 10_000_000_000 70 | -- Unif :31431343us 0ns (3.14ns) 84526201us 0ns (8.45ns) 71 | -- Small :31273745us 0ns (3.13ns) 82075679us 0ns (8.21ns) 72 | -- Pareto 0.75:30772276us 0ns (3.08ns) 87214745us 0ns (8.72ns) 73 | -- Pareto 1.00:31018503us 0ns (3.10ns) 86807827us 0ns (8.68ns) 74 | -- Pareto 2.00:31400963us 0ns (3.14ns) 85885460us 0ns (8.59ns) 75 | -- Normale 10 :29306760us 0ns (2.93ns) 83905086us 0ns (8.39ns) 76 | -- Normale 20 :29216320us 0ns (2.92ns) 84682612us 0ns (8.47ns) 77 | -- Normale 30 :29267997us 0ns (2.93ns) 85118480us 0ns (8.51ns) 78 | test tests::speed_sort::speed_test_f32 ... ok 79 | 80 | test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 1 filtered out; finished in 2907.10s 81 | -------------------------------------------------------------------------------- /results/benchmark_1_0_0/peeka_sort_vs_regions_sort.md: -------------------------------------------------------------------------------- 1 | # Peeka sort vs Regions sort 2 | 3 | Computer: Ryzen 9 3950x (16 physical cores, 32 threads), 32GB RAM DDR4, MB X570 TUF Gaming 4 | 5 | Peeka sort: 6 | - Language: Rust 7 | - Voracious radix sort v1.0.0 8 | - threads: 16 9 | ``` 10 | Please note that with more than 16 threads given to Rayon threadpool, 11 | performance decreases. 12 | ``` 13 | ``` 14 | Peeka sort can sort all types supported by the crate. A dedicated implementation 15 | for unsigned integer should be faster. 16 | ``` 17 | 18 | Regions sort: Clone from the [repository](https://github.com/omarobeya/parallel-inplace-radixsort). 19 | - Language: C++ 20 | - threads: 32 21 | 22 | ## Results 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 |
u32u64
Array sizeDistributionPeeka sort (5 runs)Regions sort (1 run)Peeka sort (5 runs)Regions sort (1 run)
1_000_000Unif3582us15ms4538us15ms
Unif 10^93781us4ms4445us4ms
5_000_000Unif10677us17ms13904us21ms
Unif 10^99472us8ms15403us9ms
10_000_000Unif15561us20ms24992us30ms
Unif 10^916618us15ms25392us24ms
20_000_000Unif27316us30ms46706us45ms
Unif 10^926586us31ms45298us48ms
50_000_000Unif69710us63ms111295us133ms
Unif 10^962739us66ms106242us123ms
100_000_000Unif117199us143ms194142us254ms
Unif 10^9113328us135ms193344us246ms
200_000_000Unif264979us266ms465165us514ms
Unif 10^9249163us275ms442913us498ms
300_000_000Unif411247us402ms706385us769ms
Unif 10^9420591us393ms691201us749ms
400_000_000Unif489960us556ms980324us1013ms
Unif 10^9485013us514ms969742us996ms
500_000_000Unif632402us711ms1310269us1278ms
Unif 10^9627480us641ms1244246us1238ms
600_000_000Unif771349us870ms1551487us1536ms
Unif 10^9730904us766ms1522778us1479ms
700_000_000Unif925669us1013ms1864540us1796ms
Unif 10^9890647us893ms1840039us1722ms
800_000_000Unif1075767us1124ms2153855us2063ms
Unif 10^91012213us1018ms2121924us1960ms
900_000_000Unif1168676us1290ms2465361us2325ms
Unif 10^91172123us1141ms2427304us2192ms
1_000_000_000Unif1328195us1369ms2798789us2585ms
Unif 10^91316730us1281ms2763619us2422ms
95 | -------------------------------------------------------------------------------- /src/sorts/lsd_sort.rs: -------------------------------------------------------------------------------- 1 | use super::super::algo::k_way_merge::k_way_merge; 2 | use super::super::algo::verge_sort_heuristic::verge_sort_preprocessing; 3 | use super::super::{RadixKey, Radixable}; 4 | use super::counting_sort::counting_sort; 5 | use super::msd_sort::copy_by_histogram; 6 | use super::utils::{ 7 | copy_nonoverlapping, only_one_bucket_filled, prefix_sums, Params, 8 | }; 9 | 10 | pub fn lsd_radixsort_body(arr: &mut [T], p: Params) 11 | where 12 | T: Radixable, 13 | K: RadixKey, 14 | { 15 | if arr.len() <= 128 { 16 | arr.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); 17 | return; 18 | } 19 | 20 | let size = arr.len(); 21 | let dummy = arr[0]; 22 | let mut buffer: Vec = vec![arr[0]; size]; 23 | let mut index = 0; 24 | 25 | let histograms = dummy.get_full_histograms(arr, &p); 26 | 27 | let mut t1 = arr; 28 | let t2 = &mut buffer; 29 | let mut t2 = t2.as_mut_slice(); 30 | 31 | for level in (p.level..p.max_level).rev() { 32 | if only_one_bucket_filled(&histograms[level]) { 33 | continue; 34 | } 35 | 36 | let (mut source, mut destination) = 37 | if index == 0 { (t1, t2) } else { (t2, t1) }; 38 | let (mask, shift) = dummy.get_mask_and_shift(&p.new_level(level)); 39 | let (_, mut heads, _) = prefix_sums(&histograms[level]); 40 | 41 | copy_by_histogram( 42 | source.len(), 43 | &mut source, 44 | &mut destination, 45 | &mut heads, 46 | mask, 47 | shift, 48 | ); 49 | 50 | index = 1 - index; 51 | 52 | if index == 1 { 53 | t1 = source; 54 | t2 = destination; 55 | } else { 56 | t2 = source; 57 | t1 = destination; 58 | } 59 | } 60 | 61 | if index == 1 { 62 | copy_nonoverlapping(t2, t1, size); 63 | } 64 | } 65 | 66 | pub fn lsd_radixsort_aux, K: RadixKey>( 67 | arr: &mut [T], 68 | radix: usize, 69 | heuristic: bool, 70 | min_cs2: usize, 71 | ) { 72 | if arr.len() <= 128 { 73 | arr.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); 74 | return; 75 | } 76 | 77 | let dummy = arr[0]; 78 | let (offset, _) = dummy.compute_offset(arr, radix); 79 | let max_level = dummy.compute_max_level(offset, radix); 80 | 81 | let (offset_reg, _) = dummy.compute_offset(arr, 8); 82 | let max_level_reg = dummy.compute_max_level(offset_reg, 8); 83 | 84 | if max_level == 0 { 85 | return; 86 | } 87 | 88 | let params = Params::new(0, radix, offset, max_level); 89 | 90 | if heuristic { 91 | if max_level_reg == 1 { 92 | counting_sort(arr, 8); 93 | } else if max_level_reg == 2 && arr.len() >= min_cs2 { 94 | counting_sort(arr, 16); 95 | } else { 96 | lsd_radixsort_body(arr, params); 97 | } 98 | } else { 99 | lsd_radixsort_body(arr, params); 100 | } 101 | } 102 | 103 | /// # LSD sort 104 | /// 105 | /// An implementation of the 106 | /// [LSD sort](https://en.wikipedia.org/wiki/Radix_sort) 107 | /// algorithm. 108 | /// 109 | /// Implementation has been deeply optimized: 110 | /// - Small preliminary check to skip prefix zero bits. 111 | /// - Use ping pong copy. 112 | /// - Use vectorization. 113 | /// - Compute histograms in one pass. 114 | /// - Check the number of non-empty buckets, if only one bucket is non-empty, 115 | /// then skip the `copy_by_histogram`. 116 | /// 117 | /// The Verge sort pre-processing heuristic is also added. 118 | /// 119 | /// This LSD sort is an out of place unstable radix sort. The core algorithm is 120 | /// stable, but fallback is unstable. 121 | pub fn lsd_radixsort(arr: &mut [T], radix: usize) 122 | where 123 | T: Radixable, 124 | K: RadixKey, 125 | { 126 | if arr.len() <= 128 { 127 | arr.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); 128 | return; 129 | } 130 | 131 | let mut separators = verge_sort_preprocessing(arr, radix, &|arr, radix| { 132 | lsd_radixsort_aux(arr, radix, false, 0) 133 | }); 134 | k_way_merge(arr, &mut separators); 135 | } 136 | 137 | pub fn lsd_radixsort_heu(arr: &mut [T], radix: usize, min_cs2: usize) 138 | where 139 | T: Radixable, 140 | K: RadixKey, 141 | { 142 | if arr.len() <= 128 { 143 | arr.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); 144 | return; 145 | } 146 | 147 | let mut separators = verge_sort_preprocessing(arr, radix, &|arr, radix| { 148 | lsd_radixsort_aux(arr, radix, true, min_cs2) 149 | }); 150 | k_way_merge(arr, &mut separators); 151 | } 152 | -------------------------------------------------------------------------------- /src/sorts/msd_sort.rs: -------------------------------------------------------------------------------- 1 | use super::super::algo::k_way_merge::k_way_merge; 2 | use super::super::algo::verge_sort_heuristic::verge_sort_preprocessing; 3 | use super::super::{RadixKey, Radixable}; 4 | use super::utils::{get_histogram, prefix_sums, Params}; 5 | 6 | const UNROLL_SIZE: usize = 4; 7 | 8 | #[inline] 9 | pub fn copy_by_histogram( 10 | size: usize, 11 | source: &mut [T], 12 | destination: &mut [T], 13 | heads: &mut Vec, 14 | mask: <>::Key as RadixKey>::Key, 15 | shift: usize, 16 | ) where 17 | T: Radixable, 18 | K: RadixKey, 19 | { 20 | let source = &mut source[0..size]; 21 | let remainder = size % UNROLL_SIZE; 22 | let (source_fst, source_snd) = source.split_at_mut(size - remainder); 23 | 24 | source_fst.chunks_exact(UNROLL_SIZE).for_each(|chunk| unsafe { 25 | let b0 = chunk.get_unchecked(0).extract(mask, shift); 26 | let b1 = chunk.get_unchecked(1).extract(mask, shift); 27 | let b2 = chunk.get_unchecked(2).extract(mask, shift); 28 | let b3 = chunk.get_unchecked(3).extract(mask, shift); 29 | 30 | let d0 = *heads.get_unchecked(b0); 31 | heads[b0] += 1; 32 | let d1 = *heads.get_unchecked(b1); 33 | heads[b1] += 1; 34 | let d2 = *heads.get_unchecked(b2); 35 | heads[b2] += 1; 36 | let d3 = *heads.get_unchecked(b3); 37 | heads[b3] += 1; 38 | 39 | destination[d0] = *chunk.get_unchecked(0); 40 | destination[d1] = *chunk.get_unchecked(1); 41 | destination[d2] = *chunk.get_unchecked(2); 42 | destination[d3] = *chunk.get_unchecked(3); 43 | }); 44 | 45 | for item in source_snd.iter() { 46 | let target_bucket = item.extract(mask, shift); 47 | destination[heads[target_bucket]] = *item; 48 | heads[target_bucket] += 1; 49 | } 50 | } 51 | 52 | pub fn msd_radixsort_rec, K: RadixKey>( 53 | arr: &mut [T], 54 | p: Params, 55 | ) { 56 | if arr.len() <= 128 { 57 | arr.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); 58 | return; 59 | } 60 | 61 | let dummy = arr[0]; 62 | let (mask, shift) = dummy.get_mask_and_shift_from_left(&p); 63 | let histogram = get_histogram(arr, &p, mask, shift); 64 | let (p_sums, mut heads, _) = prefix_sums(&histogram); 65 | 66 | let mut buffer = arr.to_vec(); 67 | 68 | copy_by_histogram(arr.len(), &mut buffer, arr, &mut heads, mask, shift); 69 | 70 | let mut rest = arr; 71 | if p.level < p.max_level - 1 { 72 | for i in 0..(p.radix_range) { 73 | let bucket_end = p_sums[i + 1] - p_sums[i]; 74 | let (first_part, second_part) = rest.split_at_mut(bucket_end); 75 | rest = second_part; 76 | if histogram[i] > 1 { 77 | if first_part.len() <= 128 { 78 | first_part 79 | .sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); 80 | } else { 81 | let new_params = p.new_level(p.level + 1); 82 | msd_radixsort_rec(first_part, new_params); 83 | } 84 | } 85 | } 86 | } 87 | } 88 | 89 | fn msd_radixsort_aux, K: RadixKey>( 90 | arr: &mut [T], 91 | radix: usize, 92 | ) { 93 | if arr.len() <= 128 { 94 | arr.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); 95 | return; 96 | } 97 | 98 | let dummy = arr[0]; 99 | let (_, raw_offset) = dummy.compute_offset(arr, radix); 100 | let max_level = dummy.compute_max_level(raw_offset, radix); 101 | 102 | if max_level == 0 { 103 | return; 104 | } 105 | 106 | let params = Params::new(0, radix, raw_offset, max_level); 107 | 108 | msd_radixsort_rec(arr, params); 109 | } 110 | 111 | /// # MSD sort 112 | /// 113 | /// An implementation of the 114 | /// [MSD sort](https://en.wikipedia.org/wiki/Radix_sort) 115 | /// algorithm. 116 | /// 117 | /// Implementation has been deeply optimized: 118 | /// - Small preliminary check to skip prefix zero bits. 119 | /// - Use vectorization. 120 | /// 121 | /// We choose to use an out of place implementation to have a fast radix sort 122 | /// for small input. This sort is used as a fallback for other radix sort from 123 | /// this crate. 124 | /// 125 | /// The Verge sort pre-processing heuristic is also added. 126 | /// 127 | /// This MSD sort is an out of place unstable radix sort. 128 | pub fn msd_radixsort, K: RadixKey>( 129 | arr: &mut [T], 130 | radix: usize, 131 | ) { 132 | if arr.len() <= 128 { 133 | arr.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); 134 | return; 135 | } 136 | 137 | let mut separators = 138 | verge_sort_preprocessing(arr, radix, &msd_radixsort_aux); 139 | k_way_merge(arr, &mut separators); 140 | } 141 | -------------------------------------------------------------------------------- /src/sorts/ska_sort.rs: -------------------------------------------------------------------------------- 1 | use super::super::{RadixKey, Radixable}; 2 | use super::american_flag_sort::serial_radixsort_rec; 3 | use super::comparative_sort::insertion_sort; 4 | use super::utils::{get_histogram, prefix_sums, Params}; 5 | 6 | const UNROLL_SIZE: usize = 4; 7 | 8 | pub fn ska_swap, K: RadixKey>( 9 | arr: &mut [T], 10 | heads: &mut Vec, 11 | tails: &[usize], 12 | mask: <>::Key as RadixKey>::Key, 13 | shift: usize, 14 | ) { 15 | let mut buckets_size = Vec::new(); 16 | for i in 0..heads.len() { 17 | buckets_size.push((i, tails[i] - heads[i])) 18 | } 19 | buckets_size.sort_unstable_by_key(|elt| elt.1); 20 | buckets_size.pop(); 21 | 22 | while !buckets_size.is_empty() { 23 | let mut to_remove = Vec::new(); 24 | for (i, (computed_index, _)) in buckets_size.iter().enumerate().rev() { 25 | let span = tails[*computed_index] - heads[*computed_index]; 26 | 27 | if span > 0 { 28 | let offset = heads[*computed_index]; 29 | let quotient = span / UNROLL_SIZE; 30 | let remainder = span % UNROLL_SIZE; 31 | 32 | for q in 0..quotient { 33 | let o = offset + q * UNROLL_SIZE; 34 | 35 | unsafe { 36 | let tb0 = arr.get_unchecked(o).extract(mask, shift); 37 | let tb1 = arr.get_unchecked(o + 1).extract(mask, shift); 38 | let tb2 = arr.get_unchecked(o + 2).extract(mask, shift); 39 | let tb3 = arr.get_unchecked(o + 3).extract(mask, shift); 40 | 41 | let dest_index_0 = *heads.get_unchecked(tb0); 42 | heads[tb0] += 1; 43 | let dest_index_1 = *heads.get_unchecked(tb1); 44 | heads[tb1] += 1; 45 | let dest_index_2 = *heads.get_unchecked(tb2); 46 | heads[tb2] += 1; 47 | let dest_index_3 = *heads.get_unchecked(tb3); 48 | heads[tb3] += 1; 49 | 50 | arr.swap(o, dest_index_0); 51 | arr.swap(o + 1, dest_index_1); 52 | arr.swap(o + 2, dest_index_2); 53 | arr.swap(o + 3, dest_index_3); 54 | } 55 | } 56 | 57 | let n_o = offset + UNROLL_SIZE * quotient; 58 | 59 | for i in 0..remainder { 60 | unsafe { 61 | let b = arr.get_unchecked(n_o + i).extract(mask, shift); 62 | arr.swap(n_o + i, heads[b]); 63 | heads[b] += 1; 64 | } 65 | } 66 | } else { 67 | to_remove.push(i); 68 | } 69 | } 70 | 71 | for i in to_remove.iter() { 72 | buckets_size.remove(*i); 73 | } 74 | } 75 | } 76 | 77 | pub fn ska_sort_rec, K: RadixKey>(arr: &mut [T], p: Params) { 78 | if arr.len() <= 64 { 79 | insertion_sort(arr); 80 | return; 81 | } 82 | if arr.len() <= 1024 { 83 | serial_radixsort_rec(arr, p); 84 | return; 85 | } 86 | 87 | let dummy = arr[0]; 88 | let (mask, shift) = dummy.get_mask_and_shift_from_left(&p); 89 | let histogram = get_histogram(arr, &p, mask, shift); 90 | let (p_sums, mut heads, tails) = prefix_sums(&histogram); 91 | 92 | ska_swap(arr, &mut heads, &tails, mask, shift); 93 | 94 | let mut rest = arr; 95 | if p.level < p.max_level - 1 { 96 | for i in 0..(p.radix_range) { 97 | let bucket_end = p_sums[i + 1] - p_sums[i]; 98 | let (first_part, second_part) = rest.split_at_mut(bucket_end); 99 | rest = second_part; 100 | if histogram[i] > 1 { 101 | let new_params = p.new_level(p.level + 1); 102 | ska_sort_rec(first_part, new_params); 103 | } 104 | } 105 | } 106 | } 107 | 108 | /// # Ska sort 109 | /// 110 | /// An implementation of the 111 | /// [Ska sort](https://probablydance.com/2016/12/27/i-wrote-a-faster-sorting-algorithm/) 112 | /// algorithm. 113 | /// 114 | /// The Ska sort is an in place unstable radix sort. 115 | pub fn ska_sort, K: RadixKey>(arr: &mut [T], radix: usize) { 116 | if arr.len() <= 64 { 117 | insertion_sort(arr); 118 | return; 119 | } 120 | 121 | let dummy = arr[0]; 122 | let (_, raw_offset) = dummy.compute_offset(arr, radix); 123 | let max_level = dummy.compute_max_level(raw_offset, radix); 124 | 125 | if max_level == 0 { 126 | return; 127 | } 128 | 129 | let params = Params::new(0, radix, raw_offset, max_level); 130 | 131 | ska_sort_rec(arr, params); 132 | } 133 | -------------------------------------------------------------------------------- /src/sorts/comparative_sort.rs: -------------------------------------------------------------------------------- 1 | use super::super::{RadixKey, Radixable}; 2 | use super::utils::Params; 3 | 4 | const TRY_THRESHOLD: u8 = 32; 5 | 6 | /// # Insertion sort 7 | /// 8 | /// An implementation of the 9 | /// [Insertion sort](https://en.wikipedia.org/wiki/Insertion_sort) 10 | /// algorithm. 11 | pub fn insertion_sort(arr: &mut [T]) { 12 | for i in 1..arr.len() { 13 | if arr[i - 1] > arr[i] { 14 | let mut j = i; 15 | while j > 0 && arr[j - 1] > arr[j] { 16 | arr.swap(j - 1, j); 17 | j -= 1; 18 | } 19 | } 20 | } 21 | } 22 | 23 | fn insertion_sort_start_at(arr: &mut [T], start: usize) { 24 | for i in start..arr.len() { 25 | if arr[i - 1] > arr[i] { 26 | let mut j = i; 27 | while j > 0 && arr[j - 1] > arr[j] { 28 | arr.swap(j - 1, j); 29 | j -= 1; 30 | } 31 | } 32 | } 33 | } 34 | 35 | fn find_end_of_bucket(arr: &mut [T], start: usize, p: &Params) -> usize 36 | where 37 | T: Radixable, 38 | K: RadixKey, 39 | { 40 | let dummy = arr[0]; 41 | let mask = dummy.mask_for_high_bits(p.radix, p.offset, p.max_level); 42 | let high_bits = arr[start].into_key_type() & mask; 43 | 44 | let mut jump = 32; 45 | let mut i = start; 46 | let mut j = start + jump; 47 | 48 | if j >= arr.len() { 49 | j = arr.len() - 1; 50 | } 51 | 52 | while high_bits == (arr[j].into_key_type() & mask) { 53 | jump *= 2; 54 | i = j; 55 | j += jump; 56 | if j >= arr.len() { 57 | j = arr.len() - 1; 58 | 59 | if high_bits == arr[j].into_key_type() & mask { 60 | return j + 1; 61 | } 62 | } 63 | } 64 | 65 | loop { 66 | let mid = (i + j) / 2; 67 | let t_high_bits = arr[mid].into_key_type() & mask; 68 | if high_bits == t_high_bits { 69 | if j == i + 1 { 70 | return j; 71 | } 72 | i = mid; 73 | } else if t_high_bits > high_bits { 74 | j = mid; 75 | } 76 | } 77 | } 78 | 79 | fn find_start_of_bucket(arr: &mut [T], start: usize, p: &Params) -> usize 80 | where 81 | T: Radixable, 82 | K: RadixKey, 83 | { 84 | let dummy = arr[0]; 85 | let mask = dummy.mask_for_high_bits(p.radix, p.offset, p.max_level); 86 | let high_bits = arr[start].into_key_type() & mask; 87 | 88 | let mut jump = 32; 89 | let mut i = start; 90 | 91 | let mut j = if jump > start { 0 } else { start - jump }; 92 | 93 | while high_bits == (arr[j].into_key_type() & mask) { 94 | jump *= 2; 95 | i = j; 96 | j = if jump > j { 0 } else { j - jump }; 97 | if j == 0 && high_bits == arr[j].into_key_type() & mask { 98 | return 0; 99 | } 100 | } 101 | 102 | loop { 103 | let mid = (i + j) / 2; 104 | let t_high_bits = arr[mid].into_key_type() & mask; 105 | 106 | if high_bits == t_high_bits { 107 | i = mid; 108 | } else if high_bits > t_high_bits { 109 | if j == i - 1 { 110 | return i; 111 | } 112 | j = mid; 113 | } 114 | } 115 | } 116 | 117 | pub fn insertion_sort_try, K: RadixKey>( 118 | arr: &mut [T], 119 | p: &Params, 120 | ) -> Vec<(usize, usize)> { 121 | let dummy = arr[0]; 122 | let mask = dummy.mask_for_high_bits(p.radix, p.offset, p.max_level); 123 | 124 | let mut unsorted_parts = Vec::new(); 125 | 126 | let mut i = 1; 127 | let mut high_bits = arr[0].into_key_type() & mask; 128 | let mut misplaced_count = 0; 129 | loop { 130 | if arr[i - 1] > arr[i] { 131 | let current_high_bits = arr[i].into_key_type() & mask; 132 | if current_high_bits == high_bits { 133 | misplaced_count += 1; 134 | } else { 135 | high_bits = current_high_bits; 136 | misplaced_count = 1; 137 | } 138 | 139 | let mut j = i; 140 | while j > 0 && arr[j - 1] > arr[j] { 141 | arr.swap(j - 1, j); 142 | j -= 1; 143 | } 144 | 145 | if misplaced_count > TRY_THRESHOLD { 146 | let end = find_end_of_bucket(arr, i, p); 147 | 148 | if end - i <= TRY_THRESHOLD as usize { 149 | insertion_sort_start_at(&mut arr[..end], i + 1); 150 | } else { 151 | let start = find_start_of_bucket(arr, i, p); 152 | unsorted_parts.push((start, end)); 153 | } 154 | 155 | i = end; 156 | } 157 | } 158 | 159 | i += 1; 160 | if i >= arr.len() { 161 | break; 162 | } 163 | } 164 | 165 | unsorted_parts 166 | } 167 | -------------------------------------------------------------------------------- /src/tests/utils.rs: -------------------------------------------------------------------------------- 1 | use super::super::sorts::utils::{ 2 | compute_max_level, compute_offset, copy_nonoverlapping, 3 | get_empty_histograms, get_histogram, get_partial_histograms, 4 | offset_from_bits, only_one_bucket_filled, prefix_sums, Params, 5 | }; 6 | use super::super::Radixable; 7 | 8 | #[test] 9 | fn test_utils_copy_nonoverlapping() { 10 | let mut arr1 = vec![1, 1, 1, 1]; 11 | let mut arr2 = vec![0, 0, 0, 0]; 12 | copy_nonoverlapping(&mut arr1, &mut arr2, 3); 13 | 14 | assert_eq!(arr1, vec![1, 1, 1, 1]); 15 | assert_eq!(arr2, vec![1, 1, 1, 0]); 16 | } 17 | 18 | #[test] 19 | fn test_utils_prefix_sums() { 20 | let histogram = vec![5, 7, 8, 4]; 21 | let (p_sums, heads, tails) = prefix_sums(&histogram); 22 | 23 | let check_p_sums = vec![0, 5, 12, 20, 24]; 24 | let check_heads = vec![0, 5, 12, 20]; 25 | let check_tails = vec![5, 12, 20, 24]; 26 | 27 | assert_eq!(p_sums, check_p_sums); 28 | assert_eq!(heads, check_heads); 29 | assert_eq!(tails, check_tails); 30 | } 31 | 32 | #[test] 33 | fn test_utils_only_one_bucket_filled() { 34 | let histogram = vec![1, 0, 2, 5, 4, 7, 0, 1]; 35 | assert_eq!(only_one_bucket_filled(&histogram), false); 36 | 37 | let histogram = vec![0, 0, 2, 0, 4, 0, 0, 0]; 38 | assert_eq!(only_one_bucket_filled(&histogram), false); 39 | 40 | let histogram = vec![1, 0, 0, 0, 0, 0]; 41 | assert_eq!(only_one_bucket_filled(&histogram), true); 42 | 43 | let histogram = vec![0, 0, 0, 0, 0, 1]; 44 | assert_eq!(only_one_bucket_filled(&histogram), true); 45 | 46 | let histogram = vec![0, 0, 0, 1, 0, 0]; 47 | assert_eq!(only_one_bucket_filled(&histogram), true); 48 | } 49 | 50 | #[test] 51 | fn test_utils_offset_from_bits() { 52 | let mut arr: Vec = vec![0]; 53 | let max: u32 = 0b0000_0111_0000_0000_0000_0000_0000_0000; 54 | let radix = 8; 55 | let bits = 32; 56 | let zero: u32 = 0; 57 | let one: u32 = 1; 58 | let (offset, raw_offset) = 59 | offset_from_bits(&mut arr, max, radix, bits, zero, one); 60 | assert_eq!(offset, 0); 61 | assert_eq!(raw_offset, 5); 62 | } 63 | 64 | #[test] 65 | fn test_utils_compute_offset() { 66 | let mut arr: Vec = vec![0b0000_0111_0000_0000_0000_0000_0000_0000]; 67 | let (offset, raw_offset) = compute_offset(&mut arr, 8); 68 | assert_eq!(offset, 0); 69 | assert_eq!(raw_offset, 5); 70 | 71 | let mut arr: Vec = vec!['a']; 72 | // 'a': 0b0000_0000_0000_0000_0000_0000_0110_0001 73 | let (offset, raw_offset) = compute_offset(&mut arr, 8); 74 | assert_eq!(offset, 24); 75 | assert_eq!(raw_offset, 25); 76 | } 77 | 78 | #[test] 79 | fn test_utils_compute_max_level() { 80 | assert_eq!(compute_max_level(32, 0, 8), 4); 81 | assert_eq!(compute_max_level(32, 7, 8), 4); 82 | assert_eq!(compute_max_level(32, 8, 8), 3); 83 | assert_eq!(compute_max_level(32, 9, 8), 3); 84 | assert_eq!(compute_max_level(32, 16, 8), 2); 85 | assert_eq!(compute_max_level(32, 20, 8), 2); 86 | assert_eq!(compute_max_level(32, 24, 8), 1); 87 | assert_eq!(compute_max_level(32, 30, 8), 1); 88 | assert_eq!(compute_max_level(32, 32, 8), 0); 89 | } 90 | 91 | #[test] 92 | fn test_utils_get_empty_histograms() { 93 | let h = get_empty_histograms(2, 256); 94 | assert_eq!(h.len(), 2); 95 | assert_eq!(h[0].len(), 256); 96 | assert_eq!(h[1].len(), 256); 97 | for item in h[0].iter() { 98 | assert_eq!(*item, 0); 99 | } 100 | } 101 | 102 | #[test] 103 | fn test_utils_get_histogram() { 104 | let mut v: Vec = 105 | vec![0, 1, 2, 3, 1, 1, 2, 1, 3, 0, 1, 2, 1, 0, 3, 1, 2, 3, 1, 0, 1, 0]; 106 | let mut arr = v.as_mut_slice(); 107 | let p = Params::new(0, 2, 62, 1); // level, radix, offset, max_level 108 | let (mask, shift) = arr[0].get_mask_and_shift(&p); 109 | let h = get_histogram(&mut arr, &p, mask, shift); 110 | 111 | let check = vec![5, 9, 4, 4]; 112 | 113 | assert_eq!(h, check); 114 | } 115 | 116 | #[test] 117 | fn test_utils_get_partial_histograms() { 118 | let p = Params::new(0, 4, 16, 4); // level, radix, offset, max_level 119 | let mut arr: Vec = vec![8, 5, 1024, 512, 256, 16_384, 64, 32]; 120 | let histograms = get_partial_histograms(&mut arr, &p, 4); 121 | 122 | assert_eq!(histograms.len(), 4); 123 | assert_eq!(histograms[0], vec![ 124 | 7, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 125 | ]); 126 | assert_eq!(histograms[1], vec![ 127 | 5, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 128 | ]); 129 | assert_eq!(histograms[2], vec![ 130 | 6, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 131 | ]); 132 | assert_eq!(histograms[3], vec![ 133 | 6, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 134 | ]); 135 | 136 | let histograms = get_partial_histograms(&mut arr, &p, 2); 137 | 138 | assert_eq!(histograms.len(), 2); 139 | assert_eq!(histograms[0], vec![ 140 | 7, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 141 | ]); 142 | assert_eq!(histograms[1], vec![ 143 | 5, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 144 | ]); 145 | } 146 | -------------------------------------------------------------------------------- /RELEASES.md: -------------------------------------------------------------------------------- 1 | # Version **1.2.0** (March 18th 2023) 2 | 3 | ### Dependency: 4 | 5 | - Use Rayon 1.7.0 instead of 1.5.3. 6 | 7 | ### Bug fix: 8 | 9 | - Peeka sort did not scale correctly because of constants. Now it uses dynanmic 10 | values for blocks size for the parallele algorithm. It now scale for arrays whose 11 | size is bigger than 1_000_000_000. 12 | - Dispatcher trait was bugged for the stable sort. If using stable sort and custom 13 | structs, the stable sort called the unstable rust sort as fallback. It now calls 14 | the stable rust sort for the fallback. 15 | 16 | ### Misc: 17 | 18 | - Add disclaimer about memory consumption in the Readme. 19 | - Add disclaimer about array size in the Readme. 20 | - Add human readable benchmark results. 21 | 22 | # Version **1.1.1** (August 20th 2022) 23 | 24 | ### Dependency: 25 | 26 | - Use Rayon 1.5.3 instead of 1.5.0. 27 | 28 | ### Misc: 29 | 30 | - Update Readme. 31 | - Add Github workflows. 32 | 33 | # Version **1.1.0** (November 7th 2020) 34 | 35 | ### Features: 36 | 37 | Now you can choose only the single thread version, without having `rayon` as 38 | dependency. Or the full version, including the multithread version, and with 39 | `rayon` as a dependency so the compilation time is longer. 40 | 41 | You just have to add (or not) the features flag "`voracious_multithread`". See 42 | the doc. 43 | 44 | Since the compilation time was longer for the multithread version, and not 45 | everyone need it, it is now possible to skip it. 46 | 47 | Moreover it means the data you sort do not need anymore to by `Send + Sync` if 48 | you use the single thread version. 49 | 50 | ### Improvement: 51 | 52 | - Update the fallback constant in the Peeka sort. It is a bit faster now. 53 | Instead of "fallbacking" when the chunk is smaller than 20_000 elements, it now 54 | fallbacks when the chunk is smaller than 128_000 elements. 55 | 56 | ### Bugs fixes: 57 | 58 | - Fix the bug with the vergesort pre-processing heuristic. This improves 59 | performances for few distributions. 60 | 61 | ### Other: 62 | 63 | - Bump rayon version to 1.5.0. 64 | - Fix typo in doc. 65 | - Update doc. 66 | - Add more benchmark results. 67 | - Add more distributions. 68 | 69 | # Version **1.0.0** (September 9th 2020) 70 | 71 | ### New single thread sort: 72 | 73 | - Rollercoaster sort (MSD radix sort). 74 | - LSD stable sort (LSD radix sort). 75 | 76 | ### New multi thread sort: 77 | 78 | - Peeka sort (MSD radix sort). An improvement of the MIT's researchers Regions sort. 79 | 80 | ### New dedicated single thread sorts: 81 | 82 | - LSD u32 sort (LSD radix sort for u32). 83 | - Counting sort for u16. 84 | 85 | ### New dependency: 86 | 87 | - Rayon 1.4.0 88 | 89 | ### Improvements: 90 | 91 | - DSLD sort fallback. 92 | - Use Rollercoaster sort for `f32`, `f64`, `i32` and `i64`, which significantly improve performance. 93 | 94 | ### Bugs fixes: 95 | 96 | - Fix the case where a vector (or slice) has only zeros. 97 | - Fix "left shift" for MSD radix sort (American flag sort, MSD sort, MSD stable sort, Ska sort, Voracious sort). 98 | - Fix stable sort in trait. 99 | 100 | ### Other: 101 | 102 | - Add support for **isize** and **usize** types. 103 | - Add multithreading to generate random vectors for tests. 104 | - Use Rayon multi thread sort to check if an array is well sorted in tests. 105 | - Add Pareto distribution for f32 and f64 random vectors for tests. 106 | - Add more pattern for vectors for tests. 107 | - Add tests for dedicated sorts. 108 | - Add tests for new sorts. 109 | - Add missing tests for helpers functions. 110 | - Replace a lot of unsafe code by using `chunks_exact` method. 111 | - Remove useless trait constraints. 112 | - Update documentation and [README.md](https://github.com/lakwet/voracious_sort/blob/master/README.md). 113 | - Add [RELEASES.md](https://github.com/lakwet/voracious_sort/blob/master/RELEASES.md). 114 | - Add [PROFILING.md](https://github.com/lakwet/voracious_sort/blob/master/PROFILING.md). 115 | - Replace obsolete benchmark by new benchmark. 116 | 117 | ### Profiling: 118 | 119 | - Start the profiling for `bool`, `char`, `f32`, `f64`, `u8` on a Ryzen 9 3950x. 120 | - See more in [PROFILING.md](https://github.com/lakwet/voracious_sort/blob/master/PROFILING.md). 121 | 122 | # Version **0.1.0** (March 16th 2020) 123 | 124 | Initial release. 125 | 126 | ### Traits: 127 | 128 | - Dispatcher trait: Which sort is used and how for a given type. 129 | - RadixKey trait: Usefull methods for each type. 130 | - Radixable trait: Main trait, where all the logic is to make sorts generic. 131 | - RadixSort trait: Add sort methods for vector and slice. 132 | 133 | ### Generic single thread sorts: 134 | 135 | - American flag sort (MSD radix sort). 136 | - Boolean sort (other). 137 | - Insertion sort (comparative sort). 138 | - Counting sort (radix sort) 139 | - DLSD sort - Diverting LSD sort - (LSD radix sort). 140 | - LSD sort (radix sort). 141 | - MSD sort (MSD radix sort). 142 | - MSD stable sort (MSD radix sort). 143 | - Ska sort (MSD radix sort). 144 | - Thiel sort (LSD radix sort). 145 | - Voracious sort (MSD radix sort). 146 | 147 | ### Dedicated single thread sorts: (it works only on one type) 148 | 149 | - LSD f32 sort (LSD radix sort for f32). 150 | 151 | ### Benchmarks: 152 | 153 | - First benchmark (in result folder). 154 | -------------------------------------------------------------------------------- /src/sorts/voracious_sort.rs: -------------------------------------------------------------------------------- 1 | use super::super::algo::k_way_merge::k_way_merge; 2 | use super::super::algo::verge_sort_heuristic::{ 3 | explore_simple_forward, verge_sort_preprocessing, Orientation, 4 | }; 5 | use super::super::{RadixKey, Radixable}; 6 | use super::counting_sort::counting_sort; 7 | use super::msd_sort::msd_radixsort_rec; 8 | use super::ska_sort::ska_swap; 9 | use super::utils::{get_histogram, prefix_sums, Params}; 10 | 11 | pub fn voracious_sort_rec, K: RadixKey>( 12 | arr: &mut [T], 13 | p: Params, 14 | zipf_heuristic_count: usize, 15 | ) { 16 | // Small optimization, use PDQ sort (sort implemented in Std Rust Unstable) 17 | // instead of insertion sort for small size array. 18 | if arr.len() <= 128 { 19 | arr.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); 20 | return; 21 | } 22 | // Main optimization is here: better diversion handling. 23 | // Maybe it seems not important, but, really, it is very important. 24 | // What is important is to have a sort that is as fast as possible 25 | // for small size array. msd_radixsort has been designed that way. 26 | // It is unusual to have an out of place msd radix sort (usually msd radix 27 | // sort are in place). 28 | // The threshold has been found by experimental tests. 29 | if arr.len() <= 30_000 { 30 | msd_radixsort_rec(arr, p); 31 | return; 32 | } 33 | 34 | let dummy = arr[0]; 35 | let (mask, shift) = dummy.get_mask_and_shift_from_left(&p); 36 | let histogram = get_histogram(arr, &p, mask, shift); 37 | let (p_sums, mut heads, tails) = prefix_sums(&histogram); 38 | 39 | ska_swap(arr, &mut heads, &tails, mask, shift); 40 | 41 | let mut rest = arr; 42 | if p.level < p.max_level - 1 { 43 | for i in 0..(p.radix_range) { 44 | let bucket_end = p_sums[i + 1] - p_sums[i]; 45 | let (first_part, second_part) = rest.split_at_mut(bucket_end); 46 | rest = second_part; 47 | if histogram[i] > 1 { 48 | // skip slice with only 1 items (already sorted) 49 | let new_params = p.new_level(p.level + 1); 50 | // Other optimization, it costs almost nothing to perform this 51 | // check, and it allows to gain time on some data distributions. 52 | if zipf_heuristic_count > 0 { 53 | match explore_simple_forward(first_part) { 54 | Orientation::IsAsc => (), 55 | Orientation::IsDesc => { 56 | first_part.reverse(); 57 | }, 58 | Orientation::IsPlateau => (), 59 | Orientation::IsNone => { 60 | voracious_sort_rec( 61 | first_part, 62 | new_params, 63 | zipf_heuristic_count - 1, 64 | ); 65 | }, 66 | } 67 | } else { 68 | voracious_sort_rec(first_part, new_params, 0); 69 | } 70 | } 71 | } 72 | } 73 | } 74 | 75 | fn voracious_sort_aux, K: RadixKey>( 76 | arr: &mut [T], 77 | radix: usize, 78 | heuristic: bool, 79 | min_cs2: usize, 80 | ) { 81 | let size = arr.len(); 82 | if size <= 128 { 83 | arr.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); 84 | return; 85 | } 86 | 87 | let dummy = arr[0]; 88 | let (_, raw_offset) = dummy.compute_offset(arr, radix); 89 | let max_level = dummy.compute_max_level(raw_offset, radix); 90 | 91 | if max_level == 0 { 92 | return; 93 | } 94 | 95 | let params = Params::new(0, radix, raw_offset, max_level); 96 | 97 | if heuristic { 98 | // we could add more heuristics, but the idea is to keep an MSD radix 99 | // sort, so there is no additional memory requirement 100 | if max_level == 1 { 101 | counting_sort(arr, params.radix); 102 | } else if max_level == 2 && size >= min_cs2 { 103 | counting_sort(arr, 2 * params.radix); 104 | } else { 105 | voracious_sort_rec(arr, params, 2); 106 | } 107 | } else { 108 | voracious_sort_rec(arr, params, 2); 109 | } 110 | } 111 | 112 | /// # Voracious sort 113 | /// 114 | /// It is an improvement of the 115 | /// [Ska sort](https://probablydance.com/2016/12/27/i-wrote-a-faster-sorting-algorithm/) 116 | /// algorithm. 117 | /// 118 | /// Insertion sort has been replaced by the PDQ sort as a fallback for very 119 | /// small input. 120 | /// 121 | /// We added a second fallback for array smaller or equal to 30_000 elements. 122 | /// For this purpose, we implemented the MSD sort which is the fastest algorithm 123 | /// from this crate for "small" input. 124 | /// 125 | /// Other heuristics have been added. 126 | /// 127 | /// The Verge sort pre-processing heuristic is also added. 128 | /// 129 | /// The Voracious sort is an in place unstable radix sort. For array smaller 130 | /// than 30_000 elements it fallbacks on MSD sort which is out of place, but 131 | /// since the threshold is a constant, this algorithm is in place. 132 | pub fn voracious_sort(arr: &mut [T], radix: usize) 133 | where 134 | T: Radixable, 135 | K: RadixKey, 136 | { 137 | if arr.len() <= 128 { 138 | arr.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); 139 | return; 140 | } 141 | 142 | let mut separators = verge_sort_preprocessing(arr, radix, &|arr, radix| { 143 | voracious_sort_aux(arr, radix, false, 0) 144 | }); 145 | k_way_merge(arr, &mut separators); 146 | } 147 | 148 | pub fn voracious_sort_heu(arr: &mut [T], radix: usize, min_cs2: usize) 149 | where 150 | T: Radixable, 151 | K: RadixKey, 152 | { 153 | if arr.len() <= 128 { 154 | arr.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); 155 | return; 156 | } 157 | 158 | let mut separators = verge_sort_preprocessing(arr, radix, &|arr, radix| { 159 | voracious_sort_aux(arr, radix, true, min_cs2) 160 | }); 161 | k_way_merge(arr, &mut separators); 162 | } 163 | -------------------------------------------------------------------------------- /results/benchmark_1_2_0/benchmark_results_trait_mt_u32: -------------------------------------------------------------------------------- 1 | running 1 test 2 | Number of iterations: 1 3 | Number of threads: 16 4 | With check: false 5 | === Test u32 === Trait Vora MT Rayon pll uns 6 | Array size: 20000000 7 | -- Unif :29729us 0ns (1.49ns) 91908us 0ns (4.60ns) 8 | -- Unif 10^9 :37430us 0ns (1.87ns) 98921us 0ns (4.95ns) 9 | -- Small1 :32552us 0ns (1.63ns) 70818us 0ns (3.54ns) 10 | -- Small2 :31477us 0ns (1.57ns) 89719us 0ns (4.49ns) 11 | -- Small3 :28982us 0ns (1.45ns) 93817us 0ns (4.69ns) 12 | -- Normale 8 :25516us 0ns (1.28ns) 76217us 0ns (3.81ns) 13 | -- Normale 10 :24339us 0ns (1.22ns) 68898us 0ns (3.44ns) 14 | -- Normale 13 :26336us 0ns (1.32ns) 87632us 0ns (4.38ns) 15 | -- Normale 16 :29257us 0ns (1.46ns) 111070us 0ns (5.55ns) 16 | -- Normale 20 :26413us 0ns (1.32ns) 100131us 0ns (5.01ns) 17 | -- Normale 24 :29844us 0ns (1.49ns) 96586us 0ns (4.83ns) 18 | -- Normale 30 :28697us 0ns (1.43ns) 81096us 0ns (4.05ns) 19 | Array size: 50000000 20 | -- Unif :82698us 0ns (1.65ns) 229793us 0ns (4.60ns) 21 | -- Unif 10^9 :81657us 0ns (1.63ns) 259177us 0ns (5.18ns) 22 | -- Small1 :63786us 0ns (1.28ns) 184083us 0ns (3.68ns) 23 | -- Small2 :67328us 0ns (1.35ns) 205490us 0ns (4.11ns) 24 | -- Small3 :82445us 0ns (1.65ns) 276759us 0ns (5.54ns) 25 | -- Normale 8 :61372us 0ns (1.23ns) 204098us 0ns (4.08ns) 26 | -- Normale 10 :60395us 0ns (1.21ns) 201651us 0ns (4.03ns) 27 | -- Normale 13 :65272us 0ns (1.31ns) 196475us 0ns (3.93ns) 28 | -- Normale 16 :70031us 0ns (1.40ns) 198852us 0ns (3.98ns) 29 | -- Normale 20 :66784us 0ns (1.34ns) 236231us 0ns (4.72ns) 30 | -- Normale 24 :71772us 0ns (1.44ns) 232750us 0ns (4.66ns) 31 | -- Normale 30 :66290us 0ns (1.33ns) 234006us 0ns (4.68ns) 32 | Array size: 100000000 33 | -- Unif :140449us 0ns (1.40ns) 464414us 0ns (4.64ns) 34 | -- Unif 10^9 :129651us 0ns (1.30ns) 476254us 0ns (4.76ns) 35 | -- Small1 :111718us 0ns (1.12ns) 455334us 0ns (4.55ns) 36 | -- Small2 :126151us 0ns (1.26ns) 420657us 0ns (4.21ns) 37 | -- Small3 :131173us 0ns (1.31ns) 462890us 0ns (4.63ns) 38 | -- Normale 8 :125218us 0ns (1.25ns) 487710us 0ns (4.88ns) 39 | -- Normale 10 :127304us 0ns (1.27ns) 441531us 0ns (4.42ns) 40 | -- Normale 13 :136551us 0ns (1.37ns) 438418us 0ns (4.38ns) 41 | -- Normale 16 :148157us 0ns (1.48ns) 423325us 0ns (4.23ns) 42 | -- Normale 20 :140102us 0ns (1.40ns) 408919us 0ns (4.09ns) 43 | -- Normale 24 :141393us 0ns (1.41ns) 462922us 0ns (4.63ns) 44 | -- Normale 30 :144584us 0ns (1.45ns) 465112us 0ns (4.65ns) 45 | Array size: 1000000000 46 | -- Unif :1694811us 0ns (1.69ns) 5608677us 0ns (5.61ns) 47 | -- Unif 10^9 :1674353us 0ns (1.67ns) 5873873us 0ns (5.87ns) 48 | -- Small1 :1139877us 0ns (1.14ns) 4260558us 0ns (4.26ns) 49 | -- Small2 :1579917us 0ns (1.58ns) 5222188us 0ns (5.22ns) 50 | -- Small3 :1666842us 0ns (1.67ns) 5466647us 0ns (5.47ns) 51 | -- Normale 8 :1467924us 0ns (1.47ns) 4756631us 0ns (4.76ns) 52 | -- Normale 10 :1560231us 0ns (1.56ns) 5167883us 0ns (5.17ns) 53 | -- Normale 13 :1696100us 0ns (1.70ns) 5236385us 0ns (5.24ns) 54 | -- Normale 16 :1906649us 0ns (1.91ns) 5415666us 0ns (5.42ns) 55 | -- Normale 20 :1900733us 0ns (1.90ns) 5313473us 0ns (5.31ns) 56 | -- Normale 24 :1895846us 0ns (1.90ns) 5758310us 0ns (5.76ns) 57 | -- Normale 30 :1819696us 0ns (1.82ns) 5360408us 0ns (5.36ns) 58 | Array size: 2000000000 59 | -- Unif :3827888us 0ns (1.91ns) 11480506us 0ns (5.74ns) 60 | -- Unif 10^9 :3731662us 0ns (1.87ns) 11611292us 0ns (5.81ns) 61 | -- Small1 :2574989us 0ns (1.29ns) 9113257us 0ns (4.56ns) 62 | -- Small2 :3561719us 0ns (1.78ns) 11836194us 0ns (5.92ns) 63 | -- Small3 :3741928us 0ns (1.87ns) 11877244us 0ns (5.94ns) 64 | -- Normale 8 :3170079us 0ns (1.59ns) 9576992us 0ns (4.79ns) 65 | -- Normale 10 :3267081us 0ns (1.63ns) 10694281us 0ns (5.35ns) 66 | -- Normale 13 :3571614us 0ns (1.79ns) 11691010us 0ns (5.85ns) 67 | -- Normale 16 :3953549us 0ns (1.98ns) 11455444us 0ns (5.73ns) 68 | -- Normale 20 :3926551us 0ns (1.96ns) 11291890us 0ns (5.65ns) 69 | -- Normale 24 :3983412us 0ns (1.99ns) 11983647us 0ns (5.99ns) 70 | -- Normale 30 :4164327us 0ns (2.08ns) 11385310us 0ns (5.69ns) 71 | Array size: 5000000000 72 | -- Unif :11566097us 0ns (2.31ns) 31528046us 0ns (6.31ns) 73 | -- Unif 10^9 :11257106us 0ns (2.25ns) 31314217us 0ns (6.26ns) 74 | -- Small1 :8751310us 0ns (1.75ns) 23971452us 0ns (4.79ns) 75 | -- Small2 :10751355us 0ns (2.15ns) 30698469us 0ns (6.14ns) 76 | -- Small3 :11587489us 0ns (2.32ns) 31005025us 0ns (6.20ns) 77 | -- Normale 8 :9572731us 0ns (1.91ns) 27781022us 0ns (5.56ns) 78 | -- Normale 10 :9728168us 0ns (1.95ns) 27342272us 0ns (5.47ns) 79 | -- Normale 13 :10237506us 0ns (2.05ns) 30123893us 0ns (6.02ns) 80 | -- Normale 16 :10847442us 0ns (2.17ns) 30655997us 0ns (6.13ns) 81 | -- Normale 20 :10779306us 0ns (2.16ns) 30136275us 0ns (6.03ns) 82 | -- Normale 24 :10991099us 0ns (2.20ns) 30596142us 0ns (6.12ns) 83 | -- Normale 30 :13188646us 0ns (2.64ns) 34239741us 0ns (6.85ns) 84 | Array size: 7000000000 85 | -- Unif :16199459us 0ns (2.31ns) 45632075us 0ns (6.52ns) 86 | -- Unif 10^9 :16031448us 0ns (2.29ns) 44165060us 0ns (6.31ns) 87 | -- Small1 :11068432us 0ns (1.58ns) 30825665us 0ns (4.40ns) 88 | -- Small2 :14918492us 0ns (2.13ns) 43824040us 0ns (6.26ns) 89 | -- Small3 :16215028us 0ns (2.32ns) 47503420us 0ns (6.79ns) 90 | -- Normale 8 :13206236us 0ns (1.89ns) 34339170us 0ns (4.91ns) 91 | -- Normale 10 :13484666us 0ns (1.93ns) 38363423us 0ns (5.48ns) 92 | -- Normale 13 :14204818us 0ns (2.03ns) 43080656us 0ns (6.15ns) 93 | -- Normale 16 :15421691us 0ns (2.20ns) 43961107us 0ns (6.28ns) 94 | -- Normale 20 :15343383us 0ns (2.19ns) 43905453us 0ns (6.27ns) 95 | -- Normale 24 :16593304us 0ns (2.37ns) 43775873us 0ns (6.25ns) 96 | -- Normale 30 :17378189us 0ns (2.48ns) 43925828us 0ns (6.28ns) 97 | Array size: 10000000000 98 | -- Unif :24862120us 0ns (2.49ns) 66108146us 0ns (6.61ns) 99 | -- Unif 10^9 :24670670us 0ns (2.47ns) 65288788us 0ns (6.53ns) 100 | -- Small1 :14701331us 0ns (1.47ns) 44643396us 0ns (4.46ns) 101 | -- Small2 :21981390us 0ns (2.20ns) 64797984us 0ns (6.48ns) 102 | -- Small3 :23952212us 0ns (2.40ns) 64639553us 0ns (6.46ns) 103 | -- Normale 8 :19402169us 0ns (1.94ns) 50756912us 0ns (5.08ns) 104 | -- Normale 10 :19803898us 0ns (1.98ns) 55096120us 0ns (5.51ns) 105 | -- Normale 13 :20908190us 0ns (2.09ns) 63885756us 0ns (6.39ns) 106 | -- Normale 16 :23427883us 0ns (2.34ns) 63000206us 0ns (6.30ns) 107 | -- Normale 20 :23329669us 0ns (2.33ns) 65234543us 0ns (6.52ns) 108 | -- Normale 24 :25173363us 0ns (2.52ns) 66176679us 0ns (6.62ns) 109 | -- Normale 30 :24417648us 0ns (2.44ns) 66356399us 0ns (6.64ns) 110 | -------------------------------------------------------------------------------- /results/benchmark_1_2_0/benchmark_results_trait_mt_u64: -------------------------------------------------------------------------------- 1 | Number of iterations: 1 2 | Number of threads: 16 3 | With check: false 4 | === Test u64 === Trait Vora MT Rayon pll uns 5 | Array size: 20000000 6 | -- Unif :56649us 0ns (2.83ns) 108306us 0ns (5.42ns) 7 | -- Unif 10^9 :57815us 0ns (2.89ns) 109463us 0ns (5.47ns) 8 | -- Small 4 :57319us 0ns (2.87ns) 102977us 0ns (5.15ns) 9 | -- Small 5 :54608us 0ns (2.73ns) 134971us 0ns (6.75ns) 10 | -- Small 6 :55460us 0ns (2.77ns) 115309us 0ns (5.77ns) 11 | -- Small 7 :56013us 0ns (2.80ns) 124137us 0ns (6.21ns) 12 | -- Normale 10 :53019us 0ns (2.65ns) 95649us 0ns (4.78ns) 13 | -- Normale 20 :56191us 0ns (2.81ns) 125451us 0ns (6.27ns) 14 | -- Normale 30 :73553us 0ns (3.68ns) 106359us 0ns (5.32ns) 15 | -- Normale 40 :70935us 0ns (3.55ns) 111450us 0ns (5.57ns) 16 | -- Normale 51 :74400us 0ns (3.72ns) 109835us 0ns (5.49ns) 17 | -- Normale 63 :58562us 0ns (2.93ns) 108113us 0ns (5.41ns) 18 | Array size: 50000000 19 | -- Unif :142904us 0ns (2.86ns) 327798us 0ns (6.56ns) 20 | -- Unif 10^9 :146952us 0ns (2.94ns) 318820us 0ns (6.38ns) 21 | -- Small 4 :132385us 0ns (2.65ns) 309114us 0ns (6.18ns) 22 | -- Small 5 :130999us 0ns (2.62ns) 336955us 0ns (6.74ns) 23 | -- Small 6 :135588us 0ns (2.71ns) 312589us 0ns (6.25ns) 24 | -- Small 7 :129108us 0ns (2.58ns) 293029us 0ns (5.86ns) 25 | -- Normale 10 :121328us 0ns (2.43ns) 305707us 0ns (6.11ns) 26 | -- Normale 20 :137004us 0ns (2.74ns) 289951us 0ns (5.80ns) 27 | -- Normale 30 :164169us 0ns (3.28ns) 302088us 0ns (6.04ns) 28 | -- Normale 40 :148315us 0ns (2.97ns) 298711us 0ns (5.97ns) 29 | -- Normale 51 :152047us 0ns (3.04ns) 317073us 0ns (6.34ns) 30 | -- Normale 63 :137800us 0ns (2.76ns) 299735us 0ns (5.99ns) 31 | Array size: 100000000 32 | -- Unif :255559us 0ns (2.56ns) 663216us 0ns (6.63ns) 33 | -- Unif 10^9 :244847us 0ns (2.45ns) 666756us 0ns (6.67ns) 34 | -- Small 4 :251521us 0ns (2.52ns) 667105us 0ns (6.67ns) 35 | -- Small 5 :256195us 0ns (2.56ns) 685900us 0ns (6.86ns) 36 | -- Small 6 :261571us 0ns (2.62ns) 671619us 0ns (6.72ns) 37 | -- Small 7 :253397us 0ns (2.53ns) 681442us 0ns (6.81ns) 38 | -- Normale 10 :259827us 0ns (2.60ns) 690037us 0ns (6.90ns) 39 | -- Normale 20 :302256us 0ns (3.02ns) 660953us 0ns (6.61ns) 40 | -- Normale 30 :314571us 0ns (3.15ns) 675203us 0ns (6.75ns) 41 | -- Normale 40 :302193us 0ns (3.02ns) 671479us 0ns (6.71ns) 42 | -- Normale 51 :294274us 0ns (2.94ns) 690333us 0ns (6.90ns) 43 | -- Normale 63 :313707us 0ns (3.14ns) 693244us 0ns (6.93ns) 44 | Array size: 1000000000 45 | -- Unif :3727723us 0ns (3.73ns) 9096140us 0ns (9.10ns) 46 | -- Unif 10^9 :3708252us 0ns (3.71ns) 9224008us 0ns (9.22ns) 47 | -- Small 4 :3713801us 0ns (3.71ns) 9103911us 0ns (9.10ns) 48 | -- Small 5 :3765813us 0ns (3.77ns) 9110270us 0ns (9.11ns) 49 | -- Small 6 :3744118us 0ns (3.74ns) 8993026us 0ns (8.99ns) 50 | -- Small 7 :3729128us 0ns (3.73ns) 9147403us 0ns (9.15ns) 51 | -- Normale 10 :3360389us 0ns (3.36ns) 8388091us 0ns (8.39ns) 52 | -- Normale 20 :4151019us 0ns (4.15ns) 9100961us 0ns (9.10ns) 53 | -- Normale 30 :4181739us 0ns (4.18ns) 9192653us 0ns (9.19ns) 54 | -- Normale 40 :4206328us 0ns (4.21ns) 9186113us 0ns (9.19ns) 55 | -- Normale 51 :4170241us 0ns (4.17ns) 9099056us 0ns (9.10ns) 56 | -- Normale 63 :4185455us 0ns (4.19ns) 9287599us 0ns (9.29ns) 57 | Array size: 2000000000 58 | -- Unif :8329972us 0ns (4.16ns) 19807813us 0ns (9.90ns) 59 | -- Unif 10^9 :8309000us 0ns (4.15ns) 19736719us 0ns (9.87ns) 60 | -- Small 4 :8382469us 0ns (4.19ns) 19491446us 0ns (9.75ns) 61 | -- Small 5 :8346728us 0ns (4.17ns) 19496043us 0ns (9.75ns) 62 | -- Small 6 :8352254us 0ns (4.18ns) 21251437us 0ns (10.63ns) 63 | -- Small 7 :8380480us 0ns (4.19ns) 19688922us 0ns (9.84ns) 64 | -- Normale 10 :6913997us 0ns (3.46ns) 17027970us 0ns (8.51ns) 65 | -- Normale 20 :8335745us 0ns (4.17ns) 19263043us 0ns (9.63ns) 66 | -- Normale 30 :8609927us 0ns (4.30ns) 19543697us 0ns (9.77ns) 67 | -- Normale 40 :8612230us 0ns (4.31ns) 19596353us 0ns (9.80ns) 68 | -- Normale 51 :8688785us 0ns (4.34ns) 19825087us 0ns (9.91ns) 69 | -- Normale 63 :8901033us 0ns (4.45ns) 19630567us 0ns (9.82ns) 70 | Array size: 5000000000 71 | -- Unif :23777234us 0ns (4.76ns) 54101019us 0ns (10.82ns) 72 | -- Unif 10^9 :23590486us 0ns (4.72ns) 55170125us 0ns (11.03ns) 73 | -- Small 4 :23707568us 0ns (4.74ns) 54407903us 0ns (10.88ns) 74 | -- Small 5 :23738543us 0ns (4.75ns) 53653733us 0ns (10.73ns) 75 | -- Small 6 :23805812us 0ns (4.76ns) 53697996us 0ns (10.74ns) 76 | -- Small 7 :23791342us 0ns (4.76ns) 54215738us 0ns (10.84ns) 77 | -- Normale 10 :19509215us 0ns (3.90ns) 43451917us 0ns (8.69ns) 78 | -- Normale 20 :23040398us 0ns (4.61ns) 54736001us 0ns (10.95ns) 79 | -- Normale 30 :24561635us 0ns (4.91ns) 53641769us 0ns (10.73ns) 80 | -- Normale 40 :25049735us 0ns (5.01ns) 53761060us 0ns (10.75ns) 81 | -- Normale 51 :23545256us 0ns (4.71ns) 53330158us 0ns (10.67ns) 82 | -- Normale 63 :24700959us 0ns (4.94ns) 53730818us 0ns (10.75ns) 83 | Array size: 7000000000 84 | -- Unif :34858289us 0ns (4.98ns) 77902733us 0ns (11.13ns) 85 | -- Unif 10^9 :34648391us 0ns (4.95ns) 78522195us 0ns (11.22ns) 86 | -- Small 4 :34627855us 0ns (4.95ns) 83247619us 0ns (11.89ns) 87 | -- Small 5 :34764971us 0ns (4.97ns) 78955622us 0ns (11.28ns) 88 | -- Small 6 :34767707us 0ns (4.97ns) 78287871us 0ns (11.18ns) 89 | -- Small 7 :34767923us 0ns (4.97ns) 77402362us 0ns (11.06ns) 90 | -- Normale 10 :28322838us 0ns (4.05ns) 60087827us 0ns (8.58ns) 91 | -- Normale 20 :34090362us 0ns (4.87ns) 77715298us 0ns (11.10ns) 92 | -- Normale 30 :35911522us 0ns (5.13ns) 78203899us 0ns (11.17ns) 93 | -- Normale 40 :36764353us 0ns (5.25ns) 77023908us 0ns (11.00ns) 94 | -- Normale 51 :36035886us 0ns (5.15ns) 78764569us 0ns (11.25ns) 95 | -- Normale 63 :34916909us 0ns (4.99ns) 77340244us 0ns (11.05ns) 96 | Array size: 10000000000 97 | -- Unif :52202525us 0ns (5.22ns) 115314917us 0ns (11.53ns) 98 | -- Unif 10^9 :51320204us 0ns (5.13ns) 114967105us 0ns (11.50ns) 99 | -- Small 4 :51273717us 0ns (5.13ns) 114365629us 0ns (11.44ns) 100 | -- Small 5 :52651409us 0ns (5.27ns) 118878145us 0ns (11.89ns) 101 | -- Small 6 :54030514us 0ns (5.40ns) 114343261us 0ns (11.43ns) 102 | -- Small 7 :51988105us 0ns (5.20ns) 116605855us 0ns (11.66ns) 103 | -- Normale 10 :41733271us 0ns (4.17ns) 87405309us 0ns (8.74ns) 104 | -- Normale 20 :51356683us 0ns (5.14ns) 113100041us 0ns (11.31ns) 105 | -- Normale 30 :57894808us 0ns (5.79ns) 114626679us 0ns (11.46ns) 106 | -- Normale 40 :58777690us 0ns (5.88ns) 115124931us 0ns (11.51ns) 107 | -- Normale 51 :57205916us 0ns (5.72ns) 114703518us 0ns (11.47ns) 108 | -- Normale 63 :51775659us 0ns (5.18ns) 114348869us 0ns (11.43ns) 109 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "autocfg" 7 | version = "1.1.0" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 10 | 11 | [[package]] 12 | name = "cfg-if" 13 | version = "1.0.0" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 16 | 17 | [[package]] 18 | name = "crossbeam-channel" 19 | version = "0.5.7" 20 | source = "registry+https://github.com/rust-lang/crates.io-index" 21 | checksum = "cf2b3e8478797446514c91ef04bafcb59faba183e621ad488df88983cc14128c" 22 | dependencies = [ 23 | "cfg-if", 24 | "crossbeam-utils", 25 | ] 26 | 27 | [[package]] 28 | name = "crossbeam-deque" 29 | version = "0.8.3" 30 | source = "registry+https://github.com/rust-lang/crates.io-index" 31 | checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" 32 | dependencies = [ 33 | "cfg-if", 34 | "crossbeam-epoch", 35 | "crossbeam-utils", 36 | ] 37 | 38 | [[package]] 39 | name = "crossbeam-epoch" 40 | version = "0.9.14" 41 | source = "registry+https://github.com/rust-lang/crates.io-index" 42 | checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695" 43 | dependencies = [ 44 | "autocfg", 45 | "cfg-if", 46 | "crossbeam-utils", 47 | "memoffset", 48 | "scopeguard", 49 | ] 50 | 51 | [[package]] 52 | name = "crossbeam-utils" 53 | version = "0.8.15" 54 | source = "registry+https://github.com/rust-lang/crates.io-index" 55 | checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b" 56 | dependencies = [ 57 | "cfg-if", 58 | ] 59 | 60 | [[package]] 61 | name = "either" 62 | version = "1.8.1" 63 | source = "registry+https://github.com/rust-lang/crates.io-index" 64 | checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" 65 | 66 | [[package]] 67 | name = "getrandom" 68 | version = "0.1.16" 69 | source = "registry+https://github.com/rust-lang/crates.io-index" 70 | checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" 71 | dependencies = [ 72 | "cfg-if", 73 | "libc", 74 | "wasi", 75 | ] 76 | 77 | [[package]] 78 | name = "hermit-abi" 79 | version = "0.2.6" 80 | source = "registry+https://github.com/rust-lang/crates.io-index" 81 | checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" 82 | dependencies = [ 83 | "libc", 84 | ] 85 | 86 | [[package]] 87 | name = "libc" 88 | version = "0.2.140" 89 | source = "registry+https://github.com/rust-lang/crates.io-index" 90 | checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c" 91 | 92 | [[package]] 93 | name = "libm" 94 | version = "0.2.6" 95 | source = "registry+https://github.com/rust-lang/crates.io-index" 96 | checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb" 97 | 98 | [[package]] 99 | name = "memoffset" 100 | version = "0.8.0" 101 | source = "registry+https://github.com/rust-lang/crates.io-index" 102 | checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1" 103 | dependencies = [ 104 | "autocfg", 105 | ] 106 | 107 | [[package]] 108 | name = "num-traits" 109 | version = "0.2.15" 110 | source = "registry+https://github.com/rust-lang/crates.io-index" 111 | checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" 112 | dependencies = [ 113 | "autocfg", 114 | "libm", 115 | ] 116 | 117 | [[package]] 118 | name = "num_cpus" 119 | version = "1.15.0" 120 | source = "registry+https://github.com/rust-lang/crates.io-index" 121 | checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" 122 | dependencies = [ 123 | "hermit-abi", 124 | "libc", 125 | ] 126 | 127 | [[package]] 128 | name = "ppv-lite86" 129 | version = "0.2.17" 130 | source = "registry+https://github.com/rust-lang/crates.io-index" 131 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" 132 | 133 | [[package]] 134 | name = "rand" 135 | version = "0.7.3" 136 | source = "registry+https://github.com/rust-lang/crates.io-index" 137 | checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" 138 | dependencies = [ 139 | "getrandom", 140 | "libc", 141 | "rand_chacha", 142 | "rand_core", 143 | "rand_hc", 144 | ] 145 | 146 | [[package]] 147 | name = "rand_chacha" 148 | version = "0.2.2" 149 | source = "registry+https://github.com/rust-lang/crates.io-index" 150 | checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" 151 | dependencies = [ 152 | "ppv-lite86", 153 | "rand_core", 154 | ] 155 | 156 | [[package]] 157 | name = "rand_core" 158 | version = "0.5.1" 159 | source = "registry+https://github.com/rust-lang/crates.io-index" 160 | checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" 161 | dependencies = [ 162 | "getrandom", 163 | ] 164 | 165 | [[package]] 166 | name = "rand_distr" 167 | version = "0.3.0" 168 | source = "registry+https://github.com/rust-lang/crates.io-index" 169 | checksum = "c9e9532ada3929fb8b2e9dbe28d1e06c9b2cc65813f074fcb6bd5fbefeff9d56" 170 | dependencies = [ 171 | "num-traits", 172 | "rand", 173 | ] 174 | 175 | [[package]] 176 | name = "rand_hc" 177 | version = "0.2.0" 178 | source = "registry+https://github.com/rust-lang/crates.io-index" 179 | checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" 180 | dependencies = [ 181 | "rand_core", 182 | ] 183 | 184 | [[package]] 185 | name = "rayon" 186 | version = "1.7.0" 187 | source = "registry+https://github.com/rust-lang/crates.io-index" 188 | checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" 189 | dependencies = [ 190 | "either", 191 | "rayon-core", 192 | ] 193 | 194 | [[package]] 195 | name = "rayon-core" 196 | version = "1.11.0" 197 | source = "registry+https://github.com/rust-lang/crates.io-index" 198 | checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" 199 | dependencies = [ 200 | "crossbeam-channel", 201 | "crossbeam-deque", 202 | "crossbeam-utils", 203 | "num_cpus", 204 | ] 205 | 206 | [[package]] 207 | name = "scopeguard" 208 | version = "1.1.0" 209 | source = "registry+https://github.com/rust-lang/crates.io-index" 210 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" 211 | 212 | [[package]] 213 | name = "voracious_radix_sort" 214 | version = "1.2.0" 215 | dependencies = [ 216 | "rand", 217 | "rand_distr", 218 | "rayon", 219 | ] 220 | 221 | [[package]] 222 | name = "wasi" 223 | version = "0.9.0+wasi-snapshot-preview1" 224 | source = "registry+https://github.com/rust-lang/crates.io-index" 225 | checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" 226 | -------------------------------------------------------------------------------- /src/tests/regions_graph.rs: -------------------------------------------------------------------------------- 1 | use super::super::algo::regions_graph::RegionsGraph; 2 | 3 | #[test] 4 | fn test_regions_graph() { 5 | let h1 = vec![3, 5, 4, 4]; 6 | let h2 = vec![2, 1, 6, 8]; 7 | let h3 = vec![2, 9, 0, 3]; 8 | let histograms = vec![h1, h2, h3]; 9 | 10 | // 11 1111 11 1 122 222 2222233 3 33 333334444 444 11 | // 012 3456 7 8901 2345 67 8 901 234 5678901 2 34 567890123 456 12 | // | | | | blocks 13 | // 000 1111 1 2222 3333 00 1 222 222 3333333 3 00 111111111 333 14 | // | | | | | | | | | | | | | | | regions 15 | // | | | | | countries 16 | 17 | let mut g = RegionsGraph::new(4); 18 | g.build_regions_graph(&histograms); 19 | 20 | let mut check = RegionsGraph::new(4); 21 | check.add(0, 1, (4, 3)); 22 | check.add(1, 2, (4, 8)); 23 | check.add(1, 3, (4, 12)); 24 | check.add(1, 0, (2, 16)); 25 | check.add(1, 2, (3, 19)); 26 | check.add(2, 3, (7, 25)); 27 | check.add(3, 0, (2, 33)); 28 | check.add(3, 1, (9, 35)); 29 | // RegionsGraph { countries: [ 30 | // ([(1, 16, 2), (3, 33, 2)], [(1, 3, 4)]), 31 | // ([(0, 3, 4), (3, 35, 9)], [(2, 8, 4), (3, 12, 4), (0, 16, 2), (2, 19, 32 | // 3)]), ([(1, 8, 4), (1, 19, 3)], [(3, 25, 7)]), 33 | // ([(1, 12, 4), (2, 25, 7)], [(0, 33, 2), (1, 35, 9)]) 34 | // ] } 35 | 36 | assert_eq!(g, check); 37 | } 38 | 39 | #[test] 40 | fn test_regions_graph_bis() { 41 | let h1 = vec![0, 1, 0, 2]; 42 | let h2 = vec![0, 2, 1, 0]; 43 | let h3 = vec![0, 1, 0, 0]; 44 | let histograms = vec![h1, h2, h3]; 45 | 46 | let mut g = RegionsGraph::new(4); 47 | g.build_regions_graph(&histograms); 48 | 49 | let mut check = RegionsGraph::new(4); 50 | check.add(1, 3, (2, 1)); 51 | check.add(2, 1, (1, 4)); 52 | check.add(3, 2, (1, 5)); 53 | check.add(3, 1, (1, 6)); 54 | // RegionsGraph { countries: [ 55 | // ([], []), 56 | // ([(2, 4, 1), (3, 6, 1)], [(3, 1, 2)]), 57 | // ([(3, 5, 1)], [(1, 4, 1)]), 58 | // ([(1, 1, 2)], [(2, 5, 1), (1, 6, 1)]) 59 | // ] } 60 | 61 | assert_eq!(g, check); 62 | } 63 | 64 | #[test] 65 | fn test_regions_graph_two_cycle_1() { 66 | // let arr = vec![1, 3, 3, 1, 1, 2, 1]; 67 | let h1 = vec![0, 1, 0, 2]; 68 | let h2 = vec![0, 2, 1, 0]; 69 | let h3 = vec![0, 1, 0, 0]; 70 | let histograms = vec![h1, h2, h3]; 71 | 72 | let mut g = RegionsGraph::new(4); 73 | g.build_regions_graph(&histograms); 74 | // RegionsGraph { countries: [ 75 | // ([], []), 76 | // ([(2, 4, 1), (3, 6, 1)], [(3, 1, 2)]), 77 | // ([(3, 5, 1)], [(1, 4, 1)]), 78 | // ([(1, 1, 2)], [(2, 5, 1), (1, 6, 1)]) 79 | // ] } 80 | 81 | let swaps = g.two_cycle(1); 82 | assert_eq!(swaps, vec![(1, (3, 1), (3, 6))]); 83 | 84 | let mut check = RegionsGraph::new(4); 85 | check.add(1, 3, (1, 2)); 86 | check.add(2, 1, (1, 4)); 87 | check.add(3, 2, (1, 5)); 88 | // RegionsGraph { countries: [ 89 | // [[], []], 90 | // [[(2, 4, 1)], [(3, 2, 1)]], 91 | // [[(3, 5, 1)], [(1, 4, 1)]], 92 | // [[(1, 2, 1)], [(2, 5, 1)]], 93 | // ] } 94 | assert_eq!(g, check); 95 | } 96 | 97 | #[test] 98 | fn test_regions_graph_two_cycle_2() { 99 | // let mut arr = vec![1, 3, 3, 1, 1, 2, 1]; 100 | let h1 = vec![0, 1, 0, 2]; 101 | let h2 = vec![0, 2, 1, 0]; 102 | let h3 = vec![0, 1, 0, 0]; 103 | let histograms = vec![h1, h2, h3]; 104 | 105 | let mut g = RegionsGraph::new(4); 106 | g.build_regions_graph(&histograms); 107 | // RegionsGraph { countries: [ 108 | // ([], []), 109 | // ([(2, 4, 1), (3, 6, 1)], [(3, 1, 2)]), 110 | // ([(3, 5, 1)], [(1, 4, 1)]), 111 | // ([(1, 1, 2)], [(2, 5, 1), (1, 6, 1)]) 112 | // ] } 113 | 114 | let swaps = g.two_cycle(3); 115 | assert_eq!(swaps, vec![(1, (1, 6), (1, 1))]); 116 | 117 | let mut check = RegionsGraph::new(4); 118 | check.add(1, 3, (1, 2)); 119 | check.add(2, 1, (1, 4)); 120 | check.add(3, 2, (1, 5)); 121 | // RegionsGraph { countries: [ 122 | // ([], []), 123 | // ([(2, 4, 1)], [(3, 2, 1)]), 124 | // ([(3, 5, 1)], [(1, 4, 1)]), 125 | // ([(1, 2, 1)], [(2, 5, 1)]) 126 | // ] } 127 | assert_eq!(g, check); 128 | } 129 | 130 | #[test] 131 | fn test_regions_graph_two_cycle_3() { 132 | // let mut arr = vec![1, 1, 3, 1, 1, 2, 1]; 133 | let h1 = vec![0, 2, 0, 1]; 134 | let h2 = vec![0, 2, 1, 0]; 135 | let h3 = vec![0, 1, 0, 0]; 136 | let histograms = vec![h1, h2, h3]; 137 | 138 | let mut g = RegionsGraph::new(4); 139 | g.build_regions_graph(&histograms); 140 | // RegionsGraph { countries: [ 141 | // ([], []), 142 | // ([(3, 6, 1)], [(3, 2, 1)]), 143 | // ([], []), 144 | // ([(1, 2, 1)], [(1, 6, 1)]) 145 | // ] } 146 | 147 | let swaps = g.two_cycle(1); 148 | assert_eq!(swaps, vec![(1, (3, 2), (3, 6))]); 149 | 150 | let check = RegionsGraph::new(4); 151 | // RegionsGraph { countries: [ 152 | // ([], []), 153 | // ([], []), 154 | // ([], []), 155 | // ([], []), 156 | // ] } 157 | assert_eq!(g, check); 158 | } 159 | 160 | #[test] 161 | fn test_regions_graph_two_path_1() { 162 | // let mut arr = vec![1, 3, 3, 1, 1, 2, 1]; 163 | let h1 = vec![0, 1, 0, 2]; 164 | let h2 = vec![0, 2, 1, 0]; 165 | let h3 = vec![0, 1, 0, 0]; 166 | let histograms = vec![h1, h2, h3]; 167 | 168 | let mut g = RegionsGraph::new(4); 169 | g.build_regions_graph(&histograms); 170 | // RegionsGraph { countries: [ 171 | // ([], []), 172 | // ([(2, 4, 1), (3, 6, 1)], [(3, 1, 2)]), 173 | // ([(3, 5, 1)], [(1, 4, 1)]), 174 | // ([(1, 1, 2)], [(2, 5, 1), (1, 6, 1)]) 175 | // ] } 176 | 177 | let swaps = g.two_path(2); 178 | assert_eq!(swaps, vec![(1, (1, 4), (3, 5))]); 179 | 180 | let mut check = RegionsGraph::new(4); 181 | check.add(1, 3, (2, 1)); 182 | check.add(3, 1, (1, 6)); 183 | check.add(3, 1, (1, 5)); 184 | // RegionsGraph { countries: [ 185 | // [[], []], 186 | // [[(3, 6, 1), (3, 5, 1)], [(3, 1, 2)]], 187 | // [[], []], 188 | // [[(1, 1, 2)], [(1, 6, 1), (1, 5, 1)]] 189 | // ] } 190 | assert_eq!(g, check); 191 | } 192 | 193 | #[test] 194 | fn test_regions_graph_two_path_2() { 195 | // let mut arr = vec![1, 3, 3, 1, 1, 2, 1]; 196 | let h1 = vec![0, 1, 0, 2]; 197 | let h2 = vec![0, 2, 1, 0]; 198 | let h3 = vec![0, 1, 0, 0]; 199 | let histograms = vec![h1, h2, h3]; 200 | 201 | let mut g = RegionsGraph::new(4); 202 | g.build_regions_graph(&histograms); 203 | // RegionsGraph { countries: [ 204 | // ([], []), 205 | // ([(2, 4, 1), (3, 6, 1)], [(3, 1, 2)]), 206 | // ([(3, 5, 1)], [(1, 4, 1)]), 207 | // ([(1, 1, 2)], [(2, 5, 1), (1, 6, 1)]) 208 | // ] } 209 | 210 | let swaps = g.two_path(1); 211 | assert_eq!(swaps, vec![(1, (3, 1), (2, 4)), (1, (3, 2), (3, 6))]); 212 | 213 | let mut check = RegionsGraph::new(4); 214 | check.add(2, 3, (1, 4)); 215 | check.add(3, 2, (1, 5)); 216 | // RegionsGraph { countries: [ 217 | // ([], []), 218 | // ([], []), 219 | // ([(3, 5, 1)], [(3, 4, 1)]), 220 | // ([(2, 4, 1)], [(2, 5, 1)]) 221 | // ] } 222 | assert_eq!(g, check); 223 | } 224 | -------------------------------------------------------------------------------- /src/sorts/rollercoaster_sort.rs: -------------------------------------------------------------------------------- 1 | use super::super::algo::k_way_merge::k_way_merge; 2 | use super::super::algo::verge_sort_heuristic::{ 3 | explore_simple_forward, verge_sort_preprocessing, Orientation, 4 | }; 5 | use super::super::{RadixKey, RadixSort, Radixable}; 6 | use super::counting_sort::counting_sort; 7 | use super::dlsd_sort::dlsd_radixsort_body; 8 | use super::lsd_sort::lsd_radixsort_body; 9 | use super::ska_sort::ska_swap; 10 | use super::utils::{get_histogram, prefix_sums, Params}; 11 | 12 | pub fn fallback, K: RadixKey>(arr: &mut [T], p: Params) { 13 | let size = arr.len(); 14 | // It is a fallback, we don't want to sort big array. 15 | assert!(size <= 128_000); 16 | 17 | if size <= 256 { 18 | arr.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); 19 | return; 20 | } 21 | 22 | let remaining_level = p.max_level - p.level; 23 | if remaining_level < 4 { 24 | let new_offset = ((p.offset / p.radix) * p.radix) + (p.level * p.radix); 25 | let p = Params::new(0, p.radix, new_offset, remaining_level); 26 | lsd_radixsort_body(arr, p); 27 | } else { 28 | let new_offset = p.level * p.radix + p.offset; 29 | let new_max_level = if size <= 65_536 { 2 } else { 3 }; 30 | let new_params = Params::new(0, p.radix, new_offset, new_max_level); 31 | dlsd_radixsort_body(arr, new_params, new_max_level, true); 32 | } 33 | } 34 | 35 | fn rollercoaster_sort_rec, K: RadixKey>( 36 | arr: &mut [T], 37 | p: Params, 38 | zhc: usize, // zipf heuristic count 39 | first_pass: bool, 40 | ) { 41 | if !first_pass && arr.len() <= 128_000 { 42 | fallback(arr, p); 43 | return; 44 | } 45 | 46 | let dummy = arr[0]; 47 | let (mask, shift) = dummy.get_mask_and_shift_from_left(&p); 48 | let histogram = get_histogram(arr, &p, mask, shift); 49 | let (p_sums, mut heads, tails) = prefix_sums(&histogram); 50 | 51 | ska_swap(arr, &mut heads, &tails, mask, shift); 52 | 53 | let mut rest = arr; 54 | if p.level < p.max_level - 1 { 55 | for i in 0..(p.radix_range) { 56 | let bucket_end = p_sums[i + 1] - p_sums[i]; 57 | let (first_part, second_part) = rest.split_at_mut(bucket_end); 58 | rest = second_part; 59 | if histogram[i] > 1 { 60 | // Heuristic for signed integer with at least or more than 61 | // 64bits. 62 | // The idea is to skip all the non used bits after the signed 63 | // bit. 64 | if first_pass && p.radix == 8 && i == 128 && dummy.is_i32() { 65 | unsafe { 66 | let arr_u32 = 67 | &mut *(first_part as *mut [T] as *mut [u32]); 68 | arr_u32.voracious_sort(); 69 | } 70 | } else if first_pass 71 | && p.radix == 8 72 | && i == 128 73 | && dummy.is_i64() 74 | { 75 | unsafe { 76 | let arr_u64 = 77 | &mut *(first_part as *mut [T] as *mut [u64]); 78 | arr_u64.voracious_sort(); 79 | } 80 | } else if first_pass 81 | && p.radix == 8 82 | && i == 128 83 | && dummy.is_i128() 84 | { 85 | unsafe { 86 | let arr_u128 = 87 | &mut *(first_part as *mut [T] as *mut [u128]); 88 | arr_u128.voracious_sort(); 89 | } 90 | } else { 91 | let new_params = p.new_level(p.level + 1); 92 | if zhc > 0 { 93 | match explore_simple_forward(first_part) { 94 | Orientation::IsAsc => (), 95 | Orientation::IsDesc => { 96 | first_part.reverse(); 97 | }, 98 | Orientation::IsPlateau => (), 99 | Orientation::IsNone => { 100 | rollercoaster_sort_rec( 101 | first_part, 102 | new_params, 103 | zhc - 1, 104 | false, 105 | ); 106 | }, 107 | } 108 | } else { 109 | rollercoaster_sort_rec( 110 | first_part, new_params, 0, false, 111 | ); 112 | } 113 | } 114 | } 115 | } 116 | } 117 | } 118 | 119 | fn rollercoaster_sort_aux, K: RadixKey>( 120 | arr: &mut [T], 121 | radix: usize, 122 | heuristic: bool, 123 | min_cs2: usize, 124 | ) { 125 | let size = arr.len(); 126 | if size <= 128 { 127 | arr.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); 128 | return; 129 | } 130 | 131 | let dummy = arr[0]; 132 | let (_, raw_offset) = dummy.compute_offset(arr, radix); 133 | let max_level = dummy.compute_max_level(raw_offset, radix); 134 | 135 | let (offset_reg, _) = dummy.compute_offset(arr, 8); 136 | let max_level_reg = dummy.compute_max_level(offset_reg, 8); 137 | 138 | if max_level == 0 { 139 | return; 140 | } 141 | 142 | let params = Params::new(0, radix, raw_offset, max_level); 143 | 144 | if heuristic { 145 | if max_level_reg == 1 { 146 | counting_sort(arr, 8); 147 | } else if max_level_reg == 2 && arr.len() >= min_cs2 { 148 | counting_sort(arr, 16); 149 | } else { 150 | rollercoaster_sort_rec(arr, params, 2, true); 151 | } 152 | } else { 153 | rollercoaster_sort_rec(arr, params, 2, true); 154 | } 155 | } 156 | 157 | /// # Rollercoaster sort 158 | /// 159 | /// This sort is this crate's author invention. This is a Voracious sort (in its 160 | /// single thread version) which immediatly fallbacks on a DLSD sort. A new 161 | /// fallback strategy for small chunks has been found. 162 | /// 163 | /// The name is because this sort can switch between a LSD or a MSD strategy. 164 | /// 165 | /// The Verge sort pre-processing heuristic is also added. 166 | /// 167 | /// This Rollercoaster sort is an out of place unstable radix sort. 168 | pub fn rollercoaster_sort(arr: &mut [T], radix: usize) 169 | where 170 | T: Radixable, 171 | K: RadixKey, 172 | { 173 | if arr.len() <= 128 { 174 | arr.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); 175 | return; 176 | } 177 | 178 | let mut separators = verge_sort_preprocessing(arr, radix, &|arr, radix| { 179 | rollercoaster_sort_aux(arr, radix, false, 0) 180 | }); 181 | k_way_merge(arr, &mut separators); 182 | } 183 | 184 | pub fn rollercoaster_sort_heu(arr: &mut [T], radix: usize, min_cs2: usize) 185 | where 186 | T: Radixable, 187 | K: RadixKey, 188 | { 189 | if arr.len() <= 128 { 190 | arr.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); 191 | return; 192 | } 193 | 194 | let mut separators = verge_sort_preprocessing(arr, radix, &|arr, radix| { 195 | rollercoaster_sort_aux(arr, radix, true, min_cs2) 196 | }); 197 | k_way_merge(arr, &mut separators); 198 | } 199 | -------------------------------------------------------------------------------- /src/sorts/peeka_sort.rs: -------------------------------------------------------------------------------- 1 | use rayon::{ThreadPool, ThreadPoolBuilder}; 2 | 3 | use std::sync::mpsc::channel; 4 | 5 | use super::super::algo::k_way_merge::k_way_merge; 6 | use super::super::algo::regions_graph::{swap_countries, RegionsGraph}; 7 | use super::super::algo::verge_sort_heuristic::verge_sort_preprocessing; 8 | use super::super::{RadixKey, RadixSort, Radixable}; 9 | use super::rollercoaster_sort::fallback; 10 | use super::ska_sort::ska_swap; 11 | use super::utils::{get_histogram, prefix_sums, Params}; 12 | 13 | const FALLBACK_THRESHOLD: usize = 128_000; 14 | 15 | fn local_sorting( 16 | arr: &mut [T], 17 | p: &Params, 18 | block_size: usize, 19 | pool: &ThreadPool, 20 | ) -> Vec> 21 | where 22 | T: Radixable, 23 | K: RadixKey, 24 | { 25 | let dummy = arr[0]; 26 | let (mask, shift) = dummy.get_mask_and_shift_from_left(&p); 27 | let mut histograms: Vec> = Vec::new(); 28 | let mut receivers = Vec::new(); 29 | 30 | pool.scope(|s| { 31 | let mut rest = arr; 32 | while !rest.is_empty() { 33 | let (mut fst, snd) = if block_size < rest.len() { 34 | rest.split_at_mut(block_size) 35 | } else { 36 | (rest, &mut [] as &mut [T]) 37 | }; 38 | rest = snd; 39 | 40 | let (sender, receiver) = channel(); 41 | receivers.push(receiver); 42 | s.spawn(move |_| { 43 | let h = get_histogram(fst, p, mask, shift); 44 | let (_, mut heads, tails) = prefix_sums(&h); 45 | 46 | ska_swap(&mut fst, &mut heads, &tails, mask, shift); 47 | 48 | sender.send(h).unwrap(); 49 | }); 50 | } 51 | }); 52 | 53 | for receiver in receivers.iter() { 54 | histograms.push(receiver.recv().unwrap()); 55 | } 56 | 57 | histograms 58 | } 59 | 60 | fn peeka_sort_rec, K: RadixKey>( 61 | arr: &mut [T], 62 | p: Params, 63 | pool: &ThreadPool, 64 | previous_block_count: usize, 65 | init_size: usize, 66 | ) { 67 | let mut block_count = if previous_block_count * arr.len() / init_size == 0 { 68 | 1 69 | } else { 70 | previous_block_count * arr.len() / init_size 71 | }; 72 | let mut block_size = if arr.len() / block_count < FALLBACK_THRESHOLD { 73 | FALLBACK_THRESHOLD 74 | } else { 75 | arr.len() / block_count 76 | }; 77 | if init_size < 5_000_000_000 { 78 | block_size = previous_block_count; 79 | block_count = previous_block_count; 80 | } 81 | 82 | if arr.len() <= FALLBACK_THRESHOLD { 83 | fallback(arr, p); 84 | return; 85 | } 86 | 87 | let dummy = arr[0]; 88 | 89 | // Local Sorting Phase for each block 90 | let histograms = if arr.len() <= block_size { 91 | let (mask, shift) = dummy.get_mask_and_shift_from_left(&p); 92 | let histogram = get_histogram(arr, &p, mask, shift); 93 | let (_, mut heads, tails) = prefix_sums(&histogram); 94 | 95 | ska_swap(arr, &mut heads, &tails, mask, shift); 96 | 97 | vec![histogram] 98 | } else { 99 | local_sorting(arr, &p, block_size, pool) 100 | }; 101 | 102 | // Graph Construction Phase 103 | let mut regions_graph = RegionsGraph::new(p.radix_range); 104 | let global_histogram = regions_graph.build_regions_graph(&histograms); 105 | 106 | // let sorted_countries = sort_countries(&global_histogram); 107 | let (p_sums, _, _) = prefix_sums(&global_histogram); 108 | 109 | // Global Sorting Phase and early recursion 110 | let mut countries = Vec::new(); 111 | let mut rest = arr; 112 | let mut country_map = vec![0; p.radix_range]; 113 | 114 | for country_id in 0..p.radix_range { 115 | let end = p_sums[country_id + 1] - p_sums[country_id]; 116 | let (country, snd) = rest.split_at_mut(end); 117 | countries.push((country_id, country, p_sums[country_id])); 118 | rest = snd; 119 | } 120 | countries.sort_unstable_by(|(_, a, _), (_, b, _)| { 121 | a.len() 122 | .partial_cmp(&b.len()) 123 | .expect("[Regions sort -> countries sorting] Bad implementation.") 124 | }); 125 | countries.iter().enumerate().for_each(|(i, &(country_id, _, _))| { 126 | country_map[country_id] = i; 127 | }); 128 | 129 | pool.scope(|s| { 130 | let mut smalls = Vec::new(); 131 | for _ in 0..p.radix_range { 132 | let (bro_id, mut broker, bro_offset) = countries 133 | .pop() 134 | .expect("[Regions sort -> swapping] Bad implementation."); 135 | 136 | let swaps = regions_graph.two_cycle(bro_id); 137 | swap_countries( 138 | swaps, 139 | &mut broker, 140 | &mut countries, 141 | &country_map, 142 | bro_offset, 143 | ); 144 | 145 | let swaps = regions_graph.two_path(bro_id); 146 | swap_countries( 147 | swaps, 148 | &mut broker, 149 | &mut countries, 150 | &country_map, 151 | bro_offset, 152 | ); 153 | 154 | if p.level < p.max_level - 1 { 155 | if broker.len() > 3000 { 156 | s.spawn(move |_| { 157 | let new_params = p.new_level(p.level + 1); 158 | peeka_sort_rec( 159 | &mut broker, 160 | new_params, 161 | pool, 162 | block_count, 163 | init_size, 164 | ); 165 | }); 166 | } else { 167 | smalls.push(broker); 168 | } 169 | } 170 | } 171 | 172 | for mut small_array in smalls.into_iter() { 173 | fallback(&mut small_array, p.new_level(p.level + 1)); 174 | } 175 | }); 176 | } 177 | 178 | /// # Peek Regions sort (Peekasort) 179 | /// 180 | /// This is an improvement of the 181 | /// [Regions sort](https://github.com/omarobeya/parallel-inplace-radixsort) and 182 | /// the [research article](https://people.csail.mit.edu/jshun/RegionsSort.pdf). 183 | /// 184 | /// The Verge sort pre-processing heuristic has been added. 185 | /// 186 | /// This sort is an inplace unstable radix sort. 187 | /// 188 | /// For "small" arrays, this sort fallbacks on the single thread Voracious sort. 189 | /// In the trait implementation, there is a first fallback on the Rayon 190 | /// parallel quicksort. 191 | pub fn peeka_sort( 192 | arr: &mut [T], 193 | radix: usize, 194 | blocks_info: usize, 195 | thread_n: usize, 196 | ) where 197 | T: Radixable, 198 | K: RadixKey, 199 | { 200 | let size = arr.len(); 201 | if size <= FALLBACK_THRESHOLD { 202 | arr.voracious_sort(); 203 | return; 204 | } 205 | 206 | let pool = ThreadPoolBuilder::new().num_threads(thread_n).build().unwrap(); 207 | 208 | let dummy = arr[0]; 209 | let mut separators = verge_sort_preprocessing(arr, radix, &|array, rdx| { 210 | let (_offset, raw_offset) = dummy.compute_offset_mt(array, rdx); 211 | let max_level = dummy.compute_max_level(raw_offset, rdx); 212 | 213 | if max_level > 0 { 214 | let params = Params::new(0, rdx, raw_offset, max_level); 215 | 216 | peeka_sort_rec(array, params, &pool, blocks_info, size); 217 | } 218 | }); 219 | 220 | k_way_merge(arr, &mut separators); 221 | } 222 | -------------------------------------------------------------------------------- /src/generators/float_32.rs: -------------------------------------------------------------------------------- 1 | use rand::{thread_rng, Rng}; 2 | use rand_distr::{Distribution, Normal, Pareto}; 3 | use rayon::prelude::*; 4 | 5 | // Uniform 6 | pub fn helper_random_array_uniform_f32(size: usize) -> Vec { 7 | (0..size) 8 | .into_par_iter() 9 | .map(|_| thread_rng().gen::()) 10 | .collect::>() 11 | } 12 | 13 | // Small 14 | pub fn helper_random_array_small_f32(size: usize) -> Vec { 15 | (0..size) 16 | .into_par_iter() 17 | .map(|_| thread_rng().gen_range(-1_000_000.0, 1_000_000.0)) 18 | .collect::>() 19 | } 20 | 21 | // Asc 22 | pub fn helper_random_array_asc_f32(size: usize) -> Vec { 23 | (0..size).into_par_iter().map(|i| i as f32 + 0.5).collect::>() 24 | } 25 | 26 | // Desc 27 | pub fn helper_random_array_desc_f32(size: usize) -> Vec { 28 | (0..size).into_par_iter().map(|i| -(i as f32 + 0.5)).collect::>() 29 | } 30 | 31 | // Equal 32 | pub fn helper_random_array_equal_f32(size: usize) -> Vec { 33 | vec![thread_rng().gen(); size] 34 | } 35 | 36 | fn helper_pareto(size: usize, arg: f32) -> Vec { 37 | let pareto = Pareto::new(0.1, arg).unwrap(); 38 | (0..size) 39 | .into_par_iter() 40 | .map(|_| pareto.sample(&mut thread_rng())) 41 | .collect::>() 42 | } 43 | 44 | // Pareto 45 | pub fn helper_random_array_pareto075_f32(size: usize) -> Vec { 46 | helper_pareto(size, 0.75) 47 | } 48 | 49 | // Pareto 50 | pub fn helper_random_array_pareto100_f32(size: usize) -> Vec { 51 | helper_pareto(size, 1.0) 52 | } 53 | 54 | // Pareto 55 | pub fn helper_random_array_pareto200_f32(size: usize) -> Vec { 56 | helper_pareto(size, 2.0) 57 | } 58 | 59 | fn helper_normal(size: usize, standard_deviation: f32) -> Vec { 60 | let normal = Normal::new(0.0, standard_deviation).unwrap(); 61 | (0..size) 62 | .into_par_iter() 63 | .map(|_| normal.sample(&mut thread_rng())) 64 | .collect::>() 65 | } 66 | 67 | // Normale(0, 2^10) 68 | pub fn helper_random_array_normale_10_f32(size: usize) -> Vec { 69 | helper_normal(size, 1024.0) 70 | } 71 | 72 | // Normale(0, 2^20) 73 | pub fn helper_random_array_normale_20_f32(size: usize) -> Vec { 74 | helper_normal(size, 1_000_000.0) 75 | } 76 | 77 | // Normale(0, 2^30) 78 | pub fn helper_random_array_normale_30_f32(size: usize) -> Vec { 79 | helper_normal(size, 1_000_000_000.0) 80 | } 81 | 82 | // Sqrt 83 | pub fn helper_random_array_sqrt_f32(size: usize) -> Vec { 84 | let mut rng = thread_rng(); 85 | let sqrt = (size as f64).sqrt() as usize; 86 | let mut array: Vec = Vec::with_capacity(size); 87 | 88 | let mut i = 0; 89 | let mut value: f32 = 0.0; 90 | for _ in 0..sqrt { 91 | value = rng.gen(); 92 | for _ in 0..sqrt { 93 | array.push(value); 94 | i += 1; 95 | } 96 | } 97 | while i < size { 98 | array.push(value); 99 | i += 1; 100 | } 101 | 102 | array 103 | } 104 | 105 | // Almost sorted ascending 106 | pub fn helper_random_array_almost_asc_f32(size: usize) -> Vec { 107 | if size == 0 { 108 | return Vec::new(); 109 | } 110 | if size < 4 { 111 | return helper_random_array_uniform_f32(size); 112 | } 113 | if size == 0 { 114 | return Vec::new(); 115 | } 116 | if size < 4 { 117 | return helper_random_array_uniform_f32(size); 118 | } 119 | let mut array = helper_random_array_asc_f32(size); 120 | 121 | for _ in 0..((size as f64).log2() as usize) { 122 | let i = thread_rng().gen_range(0, size); 123 | let j = thread_rng().gen_range(0, size); 124 | array.swap(i, j); 125 | } 126 | 127 | array 128 | } 129 | 130 | // Almost sorted descending 131 | pub fn helper_random_array_almost_desc_f32(size: usize) -> Vec { 132 | if size == 0 { 133 | return Vec::new(); 134 | } 135 | if size < 4 { 136 | return helper_random_array_uniform_f32(size); 137 | } 138 | 139 | let mut array = helper_random_array_desc_f32(size); 140 | 141 | for _ in 0..((size as f64).log2() as usize) { 142 | let i = thread_rng().gen_range(0, size); 143 | let j = thread_rng().gen_range(0, size); 144 | array.swap(i, j); 145 | } 146 | 147 | array 148 | } 149 | 150 | // Ascending sawtooth 151 | pub fn helper_random_array_asc_sawtooth_f32(size: usize) -> Vec { 152 | if size == 0 { 153 | return Vec::new(); 154 | } 155 | if size < 4 { 156 | return helper_random_array_uniform_f32(size); 157 | } 158 | 159 | let limit = (size as f64 / ((size as f64).log2() * 0.9)) as f32; 160 | (0..size).into_par_iter().map(|i| i as f32 % limit).collect::>() 161 | } 162 | 163 | // Descending sawtooth 164 | pub fn helper_random_array_desc_sawtooth_f32(size: usize) -> Vec { 165 | if size == 0 { 166 | return Vec::new(); 167 | } 168 | if size < 4 { 169 | return helper_random_array_uniform_f32(size); 170 | } 171 | 172 | let limit = (size as f64 / ((size as f64).log2() * 0.9)) as f32; 173 | (0..size) 174 | .into_par_iter() 175 | .map(|i| (size - 1 - i) as f32 % limit) 176 | .collect::>() 177 | } 178 | 179 | // Pipe Organ 180 | pub fn helper_random_array_pipe_organ_f32(size: usize) -> Vec { 181 | let middle = size / 2; 182 | (0..size) 183 | .into_par_iter() 184 | .map(|i| if i < middle { i as f32 } else { (size - i) as f32 }) 185 | .collect::>() 186 | } 187 | 188 | // Push Front 189 | pub fn helper_random_array_push_front_f32(size: usize) -> Vec { 190 | let mut array = 191 | (0..size).into_par_iter().map(|i| i as f32).collect::>(); 192 | 193 | if size > 0 { 194 | array[size - 1] = 0.0; 195 | } 196 | 197 | array 198 | } 199 | 200 | // Push middle 201 | pub fn helper_random_array_push_middle_f32(size: usize) -> Vec { 202 | let mut array = 203 | (0..size).into_par_iter().map(|i| i as f32).collect::>(); 204 | 205 | if size > 0 { 206 | array[size - 1] = (size / 2) as f32; 207 | } 208 | 209 | array 210 | } 211 | 212 | pub fn generators_f32( 213 | ) -> Vec<(&'static dyn Fn(usize) -> Vec, &'static str)> { 214 | vec![ 215 | (&helper_random_array_uniform_f32, "-- Unif :"), 216 | (&helper_random_array_small_f32, "-- Small :"), 217 | (&helper_random_array_asc_f32, "-- Asc :"), 218 | (&helper_random_array_desc_f32, "-- Desc :"), 219 | (&helper_random_array_equal_f32, "-- Equal :"), 220 | (&helper_random_array_almost_asc_f32, "-- Almost Asc :"), 221 | (&helper_random_array_almost_desc_f32, "-- Almost Desc:"), 222 | (&helper_random_array_asc_sawtooth_f32, "-- Asc Saw :"), 223 | (&helper_random_array_desc_sawtooth_f32, "-- Desc Saw :"), 224 | (&helper_random_array_sqrt_f32, "-- Sqrt :"), 225 | (&helper_random_array_pipe_organ_f32, "-- Pipe Organ :"), 226 | (&helper_random_array_push_front_f32, "-- Front :"), 227 | (&helper_random_array_push_middle_f32, "-- Middle :"), 228 | (&helper_random_array_pareto075_f32, "-- Pareto 0.75:"), 229 | (&helper_random_array_pareto100_f32, "-- Pareto 1.00:"), 230 | (&helper_random_array_pareto200_f32, "-- Pareto 2.00:"), 231 | (&helper_random_array_normale_10_f32, "-- Normale 10 :"), 232 | (&helper_random_array_normale_20_f32, "-- Normale 20 :"), 233 | (&helper_random_array_normale_30_f32, "-- Normale 30 :"), 234 | ] 235 | } 236 | -------------------------------------------------------------------------------- /src/sorts/dlsd_sort.rs: -------------------------------------------------------------------------------- 1 | use super::super::algo::k_way_merge::k_way_merge; 2 | use super::super::algo::verge_sort_heuristic::verge_sort_preprocessing; 3 | use super::super::{RadixKey, Radixable}; 4 | use super::comparative_sort::insertion_sort_try; 5 | use super::lsd_sort::lsd_radixsort_body; 6 | use super::msd_sort::copy_by_histogram; 7 | use super::utils::{ 8 | copy_nonoverlapping, get_partial_histograms, offset_from_bits, 9 | only_one_bucket_filled, prefix_sums, Params, 10 | }; 11 | use super::voracious_sort::voracious_sort_rec; 12 | 13 | const EFST: f64 = 0.1; // Estimated Final Size Threshold 14 | const NRT: f64 = 0.35; // Next Radix Threshold 15 | 16 | fn get_best_radix_size_and_runs(size: usize) -> (usize, usize) { 17 | let mut results = Vec::new(); 18 | 19 | for r in 7..10 { 20 | let diversion_threshold = (2usize.pow(r as u32) as f64) as usize; 21 | let mut required_bytes = 22 | (((size as f64) / (diversion_threshold as f64)).log2() / r as f64) 23 | .ceil() as usize; 24 | let mut estimated_final_size = (size as f64) 25 | / 2usize.pow(r as u32).pow(required_bytes as u32) as f64; 26 | if estimated_final_size > 1.0 { 27 | required_bytes += 1; 28 | estimated_final_size = (size as f64) 29 | / 2usize.pow(r as u32).pow(required_bytes as u32) as f64; 30 | } 31 | results.push((required_bytes, estimated_final_size, r)); 32 | } 33 | 34 | results.sort_by(|a, b| a.partial_cmp(b).unwrap()); 35 | 36 | if results[0].0 == results[2].0 && results[2].1 <= EFST { 37 | (results[2].2, results[2].0) 38 | } else if (results[0].0 == results[1].0 && results[1].1 <= EFST) 39 | || results[0].1 > NRT 40 | { 41 | (results[1].2, results[1].0) 42 | } else { 43 | (results[0].2, results[0].0) 44 | } 45 | } 46 | 47 | pub fn dlsd_radixsort_body, K: RadixKey>( 48 | arr: &mut [T], 49 | p: Params, 50 | rbd: usize, // runs before diversion 51 | diversion: bool, 52 | ) { 53 | let size = arr.len(); 54 | 55 | if size <= 128 { 56 | arr.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); 57 | return; 58 | } 59 | 60 | let dummy = arr[0]; 61 | let mut index = 0; 62 | 63 | let mut buffer: Vec = vec![arr[0]; size]; 64 | 65 | let histograms = if diversion { 66 | get_partial_histograms(arr, &p, rbd) 67 | } else { 68 | dummy.get_full_histograms(arr, &p) 69 | }; 70 | 71 | let mut t1 = arr; 72 | let t2 = &mut buffer; 73 | let mut t2 = t2.as_mut_slice(); 74 | 75 | // Swap elements the right amount of time to reach diversion threshold 76 | for level in (p.level..p.max_level).rev() { 77 | if only_one_bucket_filled(&histograms[level]) { 78 | continue; 79 | } 80 | 81 | let (mut source, mut destination) = 82 | if index == 0 { (t1, t2) } else { (t2, t1) }; 83 | let (mask, shift) = if diversion { 84 | dummy.get_mask_and_shift_from_left(&p.new_level(level)) 85 | } else { 86 | dummy.get_mask_and_shift(&p.new_level(level)) 87 | }; 88 | let (_, mut heads, _) = prefix_sums(&histograms[level]); 89 | 90 | copy_by_histogram( 91 | size, 92 | &mut source, 93 | &mut destination, 94 | &mut heads, 95 | mask, 96 | shift, 97 | ); 98 | 99 | index = 1 - index; 100 | 101 | if index == 1 { 102 | t1 = source; 103 | t2 = destination; 104 | } else { 105 | t2 = source; 106 | t1 = destination; 107 | } 108 | } 109 | 110 | // Ensure data is at the right place 111 | if index == 1 { 112 | copy_nonoverlapping(t2, t1, size); 113 | } 114 | 115 | if diversion && dummy.type_size() - p.offset >= p.radix * p.max_level { 116 | let unsorted_parts = insertion_sort_try(&mut t1, &p); 117 | 118 | let radix = 8; 119 | let raw_offset = p.max_level * p.radix + p.offset; 120 | let new_max_level = dummy.compute_max_level(raw_offset, radix); 121 | 122 | let new_params_msd = Params::new(0, radix, raw_offset, new_max_level); 123 | 124 | let offset_lsd = dummy.type_size() - new_max_level * radix; 125 | let new_params_lsd = Params::new(0, radix, offset_lsd, new_max_level); 126 | 127 | unsorted_parts.iter().for_each(|(i, j)| { 128 | if j - i <= 250 { 129 | t1[*i..*j].sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); 130 | } else if j - i > 3000 && new_max_level <= 4 { 131 | lsd_radixsort_body(&mut t1[*i..*j], new_params_lsd); 132 | } else { 133 | voracious_sort_rec(&mut t1[*i..*j], new_params_msd, 0); 134 | } 135 | }); 136 | } 137 | } 138 | 139 | fn dlsd_radixsort_aux(arr: &mut [T], radix: usize) 140 | where 141 | T: Radixable, 142 | K: RadixKey, 143 | { 144 | if arr.len() <= 128 { 145 | arr.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); 146 | return; 147 | } 148 | 149 | let dummy = arr[0]; 150 | 151 | let (sugg_radix, required_bytes) = get_best_radix_size_and_runs(arr.len()); 152 | 153 | let max_key = dummy.get_max_key(arr); 154 | let bits = dummy.type_size(); 155 | let zero = dummy.default_key(); 156 | let one = dummy.one(); 157 | 158 | let (_, sugg_raw_offset) = 159 | offset_from_bits(arr, max_key, sugg_radix, bits, zero, one); 160 | let (offset, _) = offset_from_bits(arr, max_key, radix, bits, zero, one); 161 | let max_level = dummy.compute_max_level(offset, radix); 162 | 163 | if max_level == 0 { 164 | return; 165 | } 166 | 167 | let sugg_max_level = dummy.compute_max_level(sugg_raw_offset, sugg_radix); 168 | 169 | let (params, diversion, rbd) = if required_bytes < sugg_max_level { 170 | ( 171 | Params::new(0, sugg_radix, sugg_raw_offset, required_bytes), 172 | true, 173 | required_bytes, 174 | ) 175 | } else if sugg_radix > radix { 176 | ( 177 | Params::new(0, sugg_radix, sugg_raw_offset, sugg_max_level), 178 | false, 179 | sugg_max_level, 180 | ) 181 | } else { 182 | (Params::new(0, radix, offset, max_level), false, max_level) 183 | }; 184 | 185 | dlsd_radixsort_body(arr, params, rbd, diversion); 186 | } 187 | 188 | /// # DLSD sort: Diverting LSD sort 189 | /// 190 | /// A simpler version of the 191 | /// [DFR sort](https://github.com/ramou/dfr) 192 | /// algorithm. 193 | /// 194 | /// Several changes have been made. Diversion is different, and only one out of 195 | /// the three ideas from the DFR sort is implemented. So it is less dependent on 196 | /// the uniformly distributed input hypothesis. Moreover a variable radix is 197 | /// added. 198 | /// 199 | /// The core idea of this algorithm is, actually, an heuristic. An estimation 200 | /// of the number of required passes is computed, and then diversion occurs. 201 | /// Which is unusual for a LSD sort algorithm. 202 | /// 203 | /// The Verge sort pre-processing heuristic is also added. 204 | /// 205 | /// The DLSD sort is an out of place unstable radix sort. The core algorithm 206 | /// is stable but fallback and diversion are unstable. 207 | pub fn dlsd_radixsort(arr: &mut [T], radix: usize) 208 | where 209 | T: Radixable, 210 | K: RadixKey, 211 | { 212 | if arr.len() <= 128 { 213 | arr.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); 214 | return; 215 | } 216 | 217 | let mut separators = verge_sort_preprocessing(arr, radix, &|arr, radix| { 218 | dlsd_radixsort_aux(arr, radix) 219 | }); 220 | k_way_merge(arr, &mut separators); 221 | } 222 | -------------------------------------------------------------------------------- /results/benchmark_1_0_0/human_readable/benchmark_1_0_0_ryzen_9_3950x_cmp_structf32: -------------------------------------------------------------------------------- 1 | running 1 test 2 | Number of iterations: 20 3 | === Test struct{isize, f32} === Rust Uns Voracious v1.0.0 Voracious v0.1.0 4 | Array size: 500 5 | -- Unif :35us 869ns (70.57ns) 29us 670ns (58.53ns) 20us 802ns (40.48ns) 6 | -- +/-10^9 :20us 190ns (41.33ns) 18us 237ns (36.74ns) 29us 1921ns (59.52ns) 7 | -- Pareto 0.75:42us 1499ns (85.25ns) 18us 58ns (36.79ns) 18us 227ns (37.04ns) 8 | -- Pareto 1.00:20us 177ns (41.46ns) 18us 204ns (36.94ns) 19us 105ns (38.24ns) 9 | -- Pareto 2.00:20us 96ns (41.14ns) 18us 66ns (37.52ns) 18us 82ns (37.73ns) 10 | -- Normale 10 :20us 70ns (41.08ns) 18us 151ns (37.88ns) 18us 64ns (37.37ns) 11 | -- Normale 20 :20us 107ns (41.04ns) 19us 120ns (38.61ns) 18us 46ns (37.79ns) 12 | -- Normale 30 :20us 88ns (41.35ns) 18us 49ns (37.19ns) 18us 79ns (36.89ns) 13 | Array size: 1000 14 | -- Unif :42us 194ns (42.01ns) 31us 459ns (31.33ns) 28us 111ns (28.48ns) 15 | -- +/-10^9 :41us 315ns (41.88ns) 27us 244ns (27.11ns) 27us 86ns (27.82ns) 16 | -- Pareto 0.75:42us 156ns (42.03ns) 29us 155ns (29.06ns) 28us 111ns (28.09ns) 17 | -- Pareto 1.00:42us 164ns (42.45ns) 29us 165ns (29.16ns) 28us 74ns (28.44ns) 18 | -- Pareto 2.00:42us 91ns (42.44ns) 28us 78ns (28.80ns) 28us 51ns (28.70ns) 19 | -- Normale 10 :42us 94ns (42.66ns) 28us 70ns (28.37ns) 28us 105ns (28.35ns) 20 | -- Normale 20 :43us 194ns (43.22ns) 28us 49ns (28.58ns) 28us 74ns (28.19ns) 21 | -- Normale 30 :42us 89ns (42.59ns) 28us 160ns (28.68ns) 28us 114ns (28.56ns) 22 | Array size: 2000 23 | -- Unif :84us 278ns (42.36ns) 46us 180ns (23.03ns) 45us 147ns (22.84ns) 24 | -- +/-10^9 :84us 203ns (42.02ns) 43us 135ns (21.98ns) 44us 155ns (22.27ns) 25 | -- Pareto 0.75:83us 172ns (41.99ns) 44us 158ns (22.47ns) 43us 262ns (21.80ns) 26 | -- Pareto 1.00:84us 159ns (42.14ns) 43us 237ns (21.57ns) 43us 151ns (21.77ns) 27 | -- Pareto 2.00:83us 198ns (41.99ns) 43us 349ns (21.66ns) 44us 321ns (22.23ns) 28 | -- Normale 10 :84us 139ns (42.30ns) 42us 300ns (21.22ns) 42us 246ns (21.18ns) 29 | -- Normale 20 :84us 204ns (42.10ns) 42us 232ns (21.05ns) 42us 203ns (21.44ns) 30 | -- Normale 30 :84us 166ns (42.23ns) 41us 273ns (20.89ns) 42us 248ns (21.26ns) 31 | Array size: 5000 32 | -- Unif :224us 882ns (44.82ns) 118us 1300ns (23.61ns) 99us 1470ns (19.95ns) 33 | -- +/-10^9 :286us 2895ns (57.30ns) 114us 289ns (23.00ns) 118us 441ns (23.67ns) 34 | -- Pareto 0.75:270us 4254ns (54.10ns) 119us 287ns (23.91ns) 82us 225ns (16.56ns) 35 | -- Pareto 1.00:218us 388ns (43.67ns) 84us 143ns (16.93ns) 83us 240ns (16.76ns) 36 | -- Pareto 2.00:218us 439ns (43.71ns) 87us 458ns (17.52ns) 86us 209ns (17.34ns) 37 | -- Normale 10 :221us 485ns (44.22ns) 85us 467ns (17.04ns) 82us 219ns (16.48ns) 38 | -- Normale 20 :221us 613ns (44.39ns) 81us 222ns (16.37ns) 82us 167ns (16.58ns) 39 | -- Normale 30 :219us 309ns (43.85ns) 98us 1084ns (19.75ns) 115us 283ns (23.16ns) 40 | Array size: 10000 41 | -- Unif :534us 5355ns (53.46ns) 153us 127ns (15.32ns) 153us 194ns (15.38ns) 42 | -- +/-10^9 :449us 982ns (44.95ns) 150us 139ns (15.05ns) 148us 130ns (14.88ns) 43 | -- Pareto 0.75:443us 175ns (44.31ns) 153us 149ns (15.36ns) 154us 241ns (15.48ns) 44 | -- Pareto 1.00:438us 217ns (43.89ns) 150us 114ns (15.01ns) 150us 109ns (15.01ns) 45 | -- Pareto 2.00:445us 1287ns (44.54ns) 154us 196ns (15.45ns) 157us 202ns (15.77ns) 46 | -- Normale 10 :450us 986ns (45.08ns) 198us 1271ns (19.80ns) 151us 150ns (15.16ns) 47 | -- Normale 20 :446us 325ns (44.62ns) 231us 1234ns (23.16ns) 172us 1688ns (17.25ns) 48 | -- Normale 30 :636us 5958ns (63.70ns) 202us 1474ns (20.24ns) 218us 883ns (21.87ns) 49 | Array size: 50000 50 | -- Unif :3224us 20445ns (64.50ns) 888us 2807ns (17.76ns) 671us 988ns (13.44ns) 51 | -- +/-10^9 :2671us 3685ns (53.44ns) 813us 3260ns (16.27ns) 638us 125ns (12.76ns) 52 | -- Pareto 0.75:3204us 22960ns (64.08ns) 831us 2613ns (16.63ns) 877us 2413ns (17.55ns) 53 | -- Pareto 1.00:2878us 18330ns (57.57ns) 657us 249ns (13.15ns) 655us 221ns (13.11ns) 54 | -- Pareto 2.00:2765us 10514ns (55.32ns) 772us 2375ns (15.46ns) 880us 2577ns (17.61ns) 55 | -- Normale 10 :2794us 14377ns (55.89ns) 752us 2840ns (15.05ns) 683us 1897ns (13.67ns) 56 | -- Normale 20 :2806us 13943ns (56.13ns) 643us 266ns (12.88ns) 646us 221ns (12.94ns) 57 | -- Normale 30 :3117us 21270ns (62.36ns) 805us 4909ns (16.11ns) 640us 128ns (12.80ns) 58 | Array size: 100000 59 | -- Unif :5604us 19583ns (56.05ns) 1453us 4371ns (14.54ns) 1247us 202ns (12.47ns) 60 | -- +/-10^9 :6183us 25686ns (61.84ns) 1215us 250ns (12.15ns) 1778us 4308ns (17.79ns) 61 | -- Pareto 0.75:5759us 24940ns (57.59ns) 1242us 227ns (12.42ns) 1547us 6574ns (15.48ns) 62 | -- Pareto 1.00:5854us 16882ns (58.54ns) 1405us 2955ns (14.06ns) 1667us 4200ns (16.67ns) 63 | -- Pareto 2.00:6094us 23257ns (60.94ns) 1540us 5809ns (15.41ns) 1442us 4343ns (14.42ns) 64 | -- Normale 10 :6223us 23155ns (62.23ns) 1203us 277ns (12.03ns) 1420us 3629ns (14.20ns) 65 | -- Normale 20 :5549us 12682ns (55.50ns) 1603us 3976ns (16.04ns) 1620us 4199ns (16.20ns) 66 | -- Normale 30 :6202us 15938ns (62.03ns) 1318us 2843ns (13.19ns) 1535us 3979ns (15.35ns) 67 | Array size: 500000 68 | -- Unif :30948us 12966ns (61.90ns) 6759us 355ns (13.52ns) 6040us 5390ns (12.08ns) 69 | -- +/-10^9 :30346us 11391ns (60.69ns) 6186us 10798ns (12.37ns) 6195us 6329ns (12.39ns) 70 | -- Pareto 0.75:31096us 13491ns (62.19ns) 7056us 9299ns (14.11ns) 6077us 3868ns (12.15ns) 71 | -- Pareto 1.00:31282us 15752ns (62.56ns) 6359us 497ns (12.72ns) 6098us 4248ns (12.20ns) 72 | -- Pareto 2.00:31009us 12318ns (62.02ns) 6572us 2127ns (13.14ns) 6383us 5546ns (12.77ns) 73 | -- Normale 10 :31773us 12048ns (63.55ns) 7408us 676ns (14.82ns) 5782us 1373ns (11.57ns) 74 | -- Normale 20 :31012us 11688ns (62.03ns) 7412us 479ns (14.82ns) 6065us 5909ns (12.13ns) 75 | -- Normale 30 :30679us 15419ns (61.36ns) 7906us 6438ns (15.81ns) 6171us 5903ns (12.34ns) 76 | Array size: 1000000 77 | -- Unif :64155us 9271ns (64.16ns) 12248us 3663ns (12.25ns) 16961us 7510ns (16.96ns) 78 | -- +/-10^9 :65388us 12107ns (65.39ns) 13735us 5754ns (13.74ns) 16120us 6139ns (16.12ns) 79 | -- Pareto 0.75:63620us 9382ns (63.62ns) 13534us 6837ns (13.53ns) 16057us 5510ns (16.06ns) 80 | -- Pareto 1.00:66186us 14491ns (66.19ns) 14482us 8883ns (14.48ns) 15944us 6444ns (15.94ns) 81 | -- Pareto 2.00:66491us 15251ns (66.49ns) 12954us 8298ns (12.95ns) 16111us 5636ns (16.11ns) 82 | -- Normale 10 :64384us 12764ns (64.38ns) 15510us 7470ns (15.51ns) 16043us 5182ns (16.04ns) 83 | -- Normale 20 :66579us 9922ns (66.58ns) 15858us 6554ns (15.86ns) 16436us 5814ns (16.44ns) 84 | -- Normale 30 :65273us 8786ns (65.27ns) 15183us 3937ns (15.18ns) 16627us 4348ns (16.63ns) 85 | Array size: 5000000 86 | -- Unif :390059us 19515ns (78.01ns) 66006us 3318ns (13.20ns) 92286us 2698ns (18.46ns) 87 | -- +/-10^9 :383332us 15467ns (76.67ns) 64817us 2347ns (12.96ns) 96614us 6732ns (19.32ns) 88 | -- Pareto 0.75:382296us 15299ns (76.46ns) 64466us 4939ns (12.89ns) 92882us 4973ns (18.58ns) 89 | -- Pareto 1.00:395776us 21417ns (79.16ns) 65014us 3927ns (13.00ns) 93213us 3752ns (18.64ns) 90 | -- Pareto 2.00:390470us 20253ns (78.09ns) 64195us 3063ns (12.84ns) 91272us 2188ns (18.25ns) 91 | -- Normale 10 :377191us 14125ns (75.44ns) 63863us 2637ns (12.77ns) 91178us 3931ns (18.24ns) 92 | -- Normale 20 :377491us 17031ns (75.50ns) 64814us 2529ns (12.96ns) 93513us 8369ns (18.70ns) 93 | -- Normale 30 :380730us 19450ns (76.15ns) 63901us 2410ns (12.78ns) 91008us 3375ns (18.20ns) 94 | Array size: 10000000 95 | -- Unif :821728us 20805ns (82.17ns) 133571us 1986ns (13.36ns) 180987us 3492ns (18.10ns) 96 | -- +/-10^9 :812332us 19028ns (81.23ns) 131795us 1098ns (13.18ns) 185853us 3032ns (18.59ns) 97 | -- Pareto 0.75:811605us 20763ns (81.16ns) 126969us 694ns (12.70ns) 182246us 3536ns (18.22ns) 98 | -- Pareto 1.00:824444us 18975ns (82.44ns) 127052us 1346ns (12.71ns) 183136us 2698ns (18.31ns) 99 | -- Pareto 2.00:806578us 11807ns (80.66ns) 130228us 1042ns (13.02ns) 185861us 1768ns (18.59ns) 100 | -- Normale 10 :815621us 16496ns (81.56ns) 128106us 2204ns (12.81ns) 180114us 2169ns (18.01ns) 101 | -- Normale 20 :822014us 20514ns (82.20ns) 128298us 2797ns (12.83ns) 183010us 4580ns (18.30ns) 102 | -- Normale 30 :823920us 17771ns (82.39ns) 131555us 4812ns (13.16ns) 187392us 5552ns (18.74ns) 103 | test tests::speed_sort::speed_test_structf32 ... ok 104 | 105 | test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 5 filtered out 106 | -------------------------------------------------------------------------------- /src/tests/types.rs: -------------------------------------------------------------------------------- 1 | use super::super::sorts::utils::Params; 2 | use super::super::Radixable; 3 | 4 | #[test] 5 | fn test_types_compute_offset() { 6 | let mut arr: Vec = vec![350]; 7 | let dummy = arr[0]; 8 | 9 | // 350: 0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0001_0010_1101 10 | 11 | let (offset, raw_offset) = dummy.compute_offset(&mut arr, 8); 12 | assert_eq!(offset, 48); 13 | assert_eq!(raw_offset, 55); 14 | 15 | let (offset, raw_offset) = dummy.compute_offset(&mut arr, 2); 16 | assert_eq!(offset, 54); 17 | assert_eq!(raw_offset, 55); 18 | 19 | let (offset, raw_offset) = dummy.compute_offset(&mut arr, 3); 20 | assert_eq!(offset, 55); 21 | assert_eq!(raw_offset, 55); 22 | } 23 | 24 | #[test] 25 | fn test_types_compute_offset_mt() { 26 | let mut arr: Vec = vec![350]; 27 | let dummy = arr[0]; 28 | 29 | // 350: 0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0001_0010_1101 30 | 31 | let (offset, raw_offset) = dummy.compute_offset_mt(&mut arr, 8); 32 | assert_eq!(offset, 48); 33 | assert_eq!(raw_offset, 55); 34 | 35 | let (offset, raw_offset) = dummy.compute_offset_mt(&mut arr, 2); 36 | assert_eq!(offset, 54); 37 | assert_eq!(raw_offset, 55); 38 | 39 | let (offset, raw_offset) = dummy.compute_offset_mt(&mut arr, 3); 40 | assert_eq!(offset, 55); 41 | assert_eq!(raw_offset, 55); 42 | } 43 | 44 | #[test] 45 | fn test_types_extract() { 46 | let p = Params::new(6, 8, 0, 8); // level, radix, offset, max_level 47 | let number: u64 = 0b1010_0010_0100_1000_0000; 48 | // ^^^^ ^^^^ 49 | let (mask, shift) = number.get_mask_and_shift(&p); 50 | 51 | assert_eq!(number.extract(mask, shift), 0b0010_0100); 52 | } 53 | 54 | #[test] 55 | fn test_types_mask_for_high_bits() { 56 | let p = Params::new(0, 8, 24, 3); // level, radix, offset, max_level 57 | let number: u64 = 0b1111_0001_0010_1011_1011_0101_0010_0100_1110_0101; 58 | // ^^^^ ^^^^ ^^^^ ^^^^ ^^^^ ^^^^ 59 | 60 | let high_mask = number.mask_for_high_bits(p.radix, p.offset, p.max_level); 61 | 62 | assert_eq!( 63 | high_mask, 64 | 0b1111_1111_1111_1111_1111_1111_0000_0000_0000_0000u64 65 | ); 66 | 67 | assert_eq!( 68 | high_mask & number, 69 | 0b1111_0001_0010_1011_1011_0101_0000_0000_0000_0000u64, 70 | ); 71 | 72 | let p = Params::new(0, 9, 24, 2); // level, radix, offset, max_level 73 | let number: u64 = 0b11_1111_0001_0010_1011_1011_0101_0010_0100_1110_0101; 74 | // ^^^^ ^^^^ ^^^^ ^^^^ ^^ 75 | 76 | let high_mask = number.mask_for_high_bits(p.radix, p.offset, p.max_level); 77 | 78 | assert_eq!( 79 | high_mask, 80 | 0b00_1111_1111_1111_1111_1100_0000_0000_0000_0000_0000u64 81 | ); 82 | 83 | assert_eq!( 84 | high_mask & number, 85 | 0b00_1111_0001_0010_1011_1000_0000_0000_0000_0000_0000, 86 | ); 87 | } 88 | 89 | #[test] 90 | fn test_types_compute_max_level() { 91 | let arr: Vec = vec!['a']; 92 | assert_eq!(arr[0].compute_max_level(0, 8), 4); 93 | assert_eq!(arr[0].compute_max_level(0, 7), 5); 94 | assert_eq!(arr[0].compute_max_level(0, 6), 6); 95 | assert_eq!(arr[0].compute_max_level(6, 8), 4); 96 | assert_eq!(arr[0].compute_max_level(9, 8), 3); 97 | 98 | let arr: Vec = vec![10]; 99 | assert_eq!(arr[0].compute_max_level(0, 8), 4); 100 | assert_eq!(arr[0].compute_max_level(0, 7), 5); 101 | assert_eq!(arr[0].compute_max_level(0, 6), 6); 102 | assert_eq!(arr[0].compute_max_level(6, 8), 4); 103 | assert_eq!(arr[0].compute_max_level(8, 8), 3); 104 | assert_eq!(arr[0].compute_max_level(9, 8), 3); 105 | } 106 | 107 | #[test] 108 | fn test_types_get_mask_and_shift_from_left() { 109 | /* 110 | * max_level is not used in get_mask_and_shift_from_left method. 111 | */ 112 | let arr: Vec = vec![10]; 113 | let dummy = arr[0]; 114 | 115 | let p = Params::new(0, 7, 3, 2); // level, radix, offset, max_level 116 | let (mask, shift) = dummy.get_mask_and_shift_from_left(&p); 117 | // 10 : 0000_0000_0000_0000_0000_0000_0000_1010 118 | // levels: ^0 ^1 ^2 ^3 ^4 119 | // mask : ^ ^^^^ ^^ 120 | // shift : ^^ ^^^^ ^^^^ ^^^^ ^^^^ ^^^^ = 22 121 | assert_eq!(mask, 0b0001_1111_1100_0000_0000_0000_0000_0000u32); 122 | assert_eq!(shift, 22); 123 | 124 | let p = Params::new(1, 7, 3, 2); // level, radix, offset, max_level 125 | let (mask, shift) = dummy.get_mask_and_shift_from_left(&p); 126 | // 10 : 0000_0000_0000_0000_0000_0000_0000_1010 127 | // levels: ^0 ^1 ^2 ^3 ^4 128 | // mask : ^^ ^^^^ ^ 129 | // shift : ^^^ ^^^^ ^^^^ ^^^^ = 15 130 | assert_eq!(mask, 0b0000_0000_0011_1111_1000_0000_0000_0000u32); 131 | assert_eq!(shift, 15); 132 | 133 | let p = Params::new(2, 7, 3, 2); // level, radix, offset, max_level 134 | let (mask, shift) = dummy.get_mask_and_shift_from_left(&p); 135 | // 10 : 0000_0000_0000_0000_0000_0000_0000_1010 136 | // levels: ^0 ^1 ^2 ^3 ^4 137 | // mask : ^^^ ^^^^ 138 | // shift : ^^^^ ^^^^ = 8 139 | assert_eq!(mask, 0b0000_0000_0000_0000_0111_1111_0000_0000u32); 140 | assert_eq!(shift, 8); 141 | 142 | let p = Params::new(3, 7, 3, 2); // level, radix, offset, max_level 143 | let (mask, shift) = dummy.get_mask_and_shift_from_left(&p); 144 | // 10 : 0000_0000_0000_0000_0000_0000_0000_1010 145 | // levels: ^0 ^1 ^2 ^3 ^4 146 | // mask : ^^^^ ^^^ 147 | // shift : ^ = 1 148 | assert_eq!(mask, 0b0000_0000_0000_0000_0000_0000_1111_1110u32); 149 | assert_eq!(shift, 1); 150 | 151 | let p = Params::new(4, 7, 3, 2); // level, radix, offset, max_level 152 | let (mask, shift) = dummy.get_mask_and_shift_from_left(&p); 153 | // 10 : 0000_0000_0000_0000_0000_0000_0000_1010 154 | // levels: ^0 ^1 ^2 ^3 ^4 155 | // mask : ^ 156 | // shift : = 0 157 | assert_eq!(mask, 0b0000_0000_0000_0000_0000_0000_0000_0001u32); 158 | assert_eq!(shift, 0); 159 | } 160 | 161 | #[test] 162 | fn test_types_get_mask_and_shift() { 163 | let arr: Vec = vec![10]; 164 | let dummy = arr[0]; 165 | 166 | let p = Params::new(3, 7, 3, 4); // level, radix, offset, max_level 167 | let (mask, shift) = dummy.get_mask_and_shift(&p); 168 | // 10 : 0000_0000_0000_0000_0000_0000_0000_1010 169 | // levels: ^0 ^1 ^2 ^3 170 | // mask : ^^^ ^^^^ 171 | // shift : = 0 172 | assert_eq!(mask, 0b0000_0000_0000_0000_0000_0000_0111_1111u32); 173 | assert_eq!(shift, 0); 174 | 175 | let p = Params::new(2, 7, 3, 4); // level, radix, offset, max_level 176 | let (mask, shift) = dummy.get_mask_and_shift(&p); 177 | // 10 : 0000_0000_0000_0000_0000_0000_0000_1010 178 | // levels: ^0 ^1 ^2 ^3 179 | // mask : ^^ ^^^^ ^ 180 | // shift : ^^^ ^^^^ = 7 181 | assert_eq!(mask, 0b0000_0000_0000_0000_0011_1111_1000_0000u32); 182 | assert_eq!(shift, 7); 183 | 184 | let p = Params::new(1, 7, 3, 4); // level, radix, offset, max_level 185 | let (mask, shift) = dummy.get_mask_and_shift(&p); 186 | // 10 : 0000_0000_0000_0000_0000_0000_0000_1010 187 | // levels: ^0 ^1 ^2 ^3 188 | // mask : ^ ^^^^ ^^ 189 | // shift : ^^ ^^^^ ^^^^ ^^^^ = 14 190 | assert_eq!(mask, 0b0000_0000_0001_1111_1100_0000_0000_0000u32); 191 | assert_eq!(shift, 14); 192 | 193 | let p = Params::new(0, 7, 3, 4); // level, radix, offset, max_level 194 | let (mask, shift) = dummy.get_mask_and_shift(&p); 195 | // 10 : 0000_0000_0000_0000_0000_0000_0000_1010 196 | // levels: ^0 ^1 ^2 ^3 197 | // mask : ^^^^ ^^^ 198 | // shift : ^ ^^^^ ^^^^ ^^^^ ^^^^ ^^^^ = 21 199 | assert_eq!(mask, 0b0000_1111_1110_0000_0000_0000_0000_0000u32); 200 | assert_eq!(shift, 21); 201 | } 202 | -------------------------------------------------------------------------------- /results/benchmark_1_2_0/benchmark_results_trait_char: -------------------------------------------------------------------------------- 1 | running 1 test 2 | Number of iterations: 5 3 | Number of threads: 16 4 | With check: true 5 | === Test char === Trait Vora Uns or stable Trait Vora MT Rust Uns Rust Std Rayon pll uns 6 | Array size: 100 7 | -- Unif :2us 155ns (25.07ns) 2us 133ns (25.25ns) 2us 147ns (22.61ns) 2us 134ns (27.37ns) 2us 118ns (22.04ns) 8 | -- Equal :0us 8ns (1.05ns) 0us 8ns (1.15ns) 0us 6ns (0.97ns) 0us 10ns (1.42ns) 0us 6ns (1.05ns) 9 | -- Charset :2us 99ns (21.26ns) 2us 104ns (23.21ns) 2us 99ns (21.41ns) 2us 118ns (25.64ns) 2us 106ns (22.49ns) 10 | -- Charset Den:2us 94ns (20.74ns) 2us 102ns (22.09ns) 2us 100ns (21.66ns) 2us 114ns (25.17ns) 2us 95ns (21.01ns) 11 | -- Charset VDe:2us 98ns (21.31ns) 2us 99ns (21.96ns) 2us 92ns (20.33ns) 2us 112ns (24.72ns) 1us 89ns (19.58ns) 12 | Array size: 1000 13 | -- Unif :16us 346ns (16.17ns) 22us 323ns (22.76ns) 21us 308ns (21.78ns) 33us 474ns (33.44ns) 21us 310ns (21.88ns) 14 | -- Equal :0us 11ns (0.60ns) 0us 6ns (0.42ns) 0us 6ns (0.41ns) 0us 9ns (0.62ns) 0us 6ns (0.42ns) 15 | -- Charset :2us 49ns (2.86ns) 14us 204ns (14.39ns) 14us 204ns (14.28ns) 32us 467ns (32.98ns) 14us 206ns (14.47ns) 16 | -- Charset Den:13us 203ns (13.32ns) 14us 207ns (14.63ns) 14us 204ns (14.37ns) 33us 467ns (33.01ns) 14us 201ns (14.07ns) 17 | -- Charset VDe:17us 281ns (17.95ns) 15us 220ns (15.53ns) 15us 221ns (15.58ns) 32us 463ns (32.74ns) 15us 213ns (15.01ns) 18 | Array size: 5000 19 | -- Unif :48us 316ns (9.62ns) 106us 1387ns (21.26ns) 117us 744ns (23.51ns) 191us 1212ns (38.31ns) 78us 535ns (15.64ns) 20 | -- Equal :1us 12ns (0.36ns) 2us 13ns (0.42ns) 2us 13ns (0.42ns) 2us 15ns (0.46ns) 2us 13ns (0.42ns) 21 | -- Charset :6us 41ns (1.29ns) 59us 467ns (11.84ns) 52us 331ns (10.43ns) 190us 1207ns (38.16ns) 50us 379ns (10.04ns) 22 | -- Charset Den:41us 269ns (8.27ns) 53us 454ns (10.64ns) 52us 332ns (10.42ns) 191us 1209ns (38.21ns) 61us 551ns (12.22ns) 23 | -- Charset VDe:58us 374ns (11.73ns) 63us 446ns (12.77ns) 53us 336ns (10.61ns) 190us 1208ns (38.18ns) 57us 540ns (11.41ns) 24 | Array size: 10_000 25 | -- Unif :91us 442ns (9.12ns) 142us 699ns (14.24ns) 244us 1096ns (24.46ns) 411us 1841ns (41.17ns) 152us 790ns (15.27ns) 26 | -- Equal :3us 15ns (0.34ns) 4us 19ns (0.42ns) 4us 19ns (0.42ns) 4us 20ns (0.45ns) 4us 19ns (0.42ns) 27 | -- Charset :9us 43ns (0.96ns) 85us 406ns (8.57ns) 91us 413ns (9.18ns) 411us 1846ns (41.18ns) 86us 406ns (8.62ns) 28 | -- Charset Den:93us 638ns (9.37ns) 95us 433ns (9.56ns) 155us 702ns (15.58ns) 703us 3146ns (70.35ns) 103us 498ns (10.38ns) 29 | -- Charset VDe:175us 785ns (17.54ns) 103us 548ns (10.33ns) 165us 741ns (16.51ns) 703us 3147ns (70.36ns) 92us 419ns (9.21ns) 30 | Array size: 20_000 31 | -- Unif :310us 1014ns (15.53ns) 236us 876ns (11.83ns) 873us 2762ns (43.66ns) 1501us 4749ns (75.07ns) 282us 918ns (14.14ns) 32 | -- Equal :11us 35ns (0.55ns) 13us 43ns (0.68ns) 13us 43ns (0.69ns) 16us 60ns (0.83ns) 13us 43ns (0.68ns) 33 | -- Charset :24us 78ns (1.22ns) 157us 512ns (7.88ns) 306us 977ns (15.31ns) 1496us 4732ns (74.81ns) 150us 491ns (7.50ns) 34 | -- Charset Den:107us 554ns (5.35ns) 143us 463ns (7.19ns) 302us 964ns (15.14ns) 1492us 4720ns (74.62ns) 155us 529ns (7.75ns) 35 | -- Charset VDe:255us 808ns (12.77ns) 155us 542ns (7.76ns) 309us 988ns (15.47ns) 1598us 5314ns (79.92ns) 162us 546ns (8.13ns) 36 | Array size: 50_000 37 | -- Unif :615us 1236ns (12.31ns) 450us 925ns (9.02ns) 2362us 4802ns (47.25ns) 4217us 8436ns (84.35ns) 505us 1050ns (10.11ns) 38 | -- Equal :26us 53ns (0.53ns) 32us 64ns (0.64ns) 32us 67ns (0.66ns) 37us 76ns (0.76ns) 34us 71ns (0.68ns) 39 | -- Charset :55us 112ns (1.12ns) 338us 718ns (6.76ns) 750us 1543ns (15.01ns) 4311us 8860ns (86.24ns) 381us 853ns (7.62ns) 40 | -- Charset Den:177us 367ns (3.56ns) 307us 630ns (6.14ns) 724us 1450ns (14.49ns) 4170us 8342ns (83.41ns) 324us 689ns (6.49ns) 41 | -- Charset VDe:630us 1263ns (12.61ns) 313us 631ns (6.27ns) 814us 1652ns (16.28ns) 3679us 7417ns (73.59ns) 298us 613ns (5.98ns) 42 | Array size: 100_000 43 | -- Unif :1039us 1484ns (10.40ns) 817us 1170ns (8.18ns) 4161us 5940ns (41.61ns) 5601us 10013ns (56.02ns) 786us 1167ns (7.86ns) 44 | -- Equal :30us 43ns (0.30ns) 35us 51ns (0.36ns) 39us 68ns (0.39ns) 47us 76ns (0.47ns) 35us 51ns (0.36ns) 45 | -- Charset :65us 93ns (0.66ns) 579us 985ns (5.80ns) 841us 1204ns (8.42ns) 5075us 7179ns (50.76ns) 545us 806ns (5.45ns) 46 | -- Charset Den:143us 204ns (1.43ns) 598us 1010ns (5.98ns) 834us 1185ns (8.34ns) 5075us 7177ns (50.75ns) 533us 827ns (5.33ns) 47 | -- Charset VDe:723us 1025ns (7.24ns) 485us 717ns (4.85ns) 888us 1290ns (8.89ns) 5118us 7240ns (51.18ns) 583us 831ns (5.83ns) 48 | Array size: 200_000 49 | -- Unif :1373us 1380ns (6.87ns) 1429us 1613ns (7.15ns) 5640us 5641ns (28.20ns) 10742us 10745ns (53.71ns) 1368us 1455ns (6.84ns) 50 | -- Equal :59us 60ns (0.30ns) 73us 75ns (0.37ns) 71us 72ns (0.36ns) 88us 88ns (0.44ns) 73us 74ns (0.37ns) 51 | -- Charset :131us 131ns (0.66ns) 950us 980ns (4.75ns) 1742us 1987ns (8.71ns) 10739us 10740ns (53.70ns) 1015us 1094ns (5.08ns) 52 | -- Charset Den:214us 216ns (1.07ns) 959us 1096ns (4.80ns) 1739us 1842ns (8.70ns) 10763us 10764ns (53.82ns) 950us 986ns (4.75ns) 53 | -- Charset VDe:1426us 1445ns (7.13ns) 955us 1074ns (4.78ns) 1696us 1728ns (8.48ns) 10747us 10749ns (53.74ns) 1097us 1324ns (5.49ns) 54 | Array size: 500_000 55 | -- Unif :3308us 2108ns (6.62ns) 3426us 2237ns (6.85ns) 15867us 11214ns (31.74ns) 28672us 18138ns (57.35ns) 3377us 2160ns (6.76ns) 56 | -- Equal :158us 105ns (0.32ns) 176us 112ns (0.35ns) 177us 112ns (0.36ns) 220us 141ns (0.44ns) 179us 114ns (0.36ns) 57 | -- Charset :296us 188ns (0.59ns) 2489us 1632ns (4.98ns) 6866us 5373ns (13.73ns) 28330us 17918ns (56.66ns) 2477us 1618ns (4.95ns) 58 | -- Charset Den:386us 245ns (0.77ns) 2316us 1705ns (4.63ns) 6351us 4939ns (12.70ns) 28396us 17961ns (56.79ns) 2511us 1674ns (5.02ns) 59 | -- Charset VDe:3411us 2159ns (6.82ns) 2378us 1671ns (4.76ns) 6457us 4202ns (12.91ns) 29086us 18725ns (58.17ns) 2397us 1857ns (4.80ns) 60 | Array size: 1_000_000 61 | -- Unif :6739us 3026ns (6.74ns) 2849us 1554ns (2.85ns) 30942us 14361ns (30.94ns) 57613us 25770ns (57.61ns) 5303us 2443ns (5.30ns) 62 | -- Equal :295us 133ns (0.30ns) 622us 281ns (0.62ns) 356us 161ns (0.36ns) 423us 190ns (0.42ns) 349us 157ns (0.35ns) 63 | -- Charset :594us 266ns (0.59ns) 2398us 1160ns (2.40ns) 9243us 4409ns (9.24ns) 59603us 26656ns (59.60ns) 5146us 2723ns (5.15ns) 64 | -- Charset Den:915us 514ns (0.92ns) 2604us 1254ns (2.60ns) 9763us 4447ns (9.76ns) 59980us 26826ns (59.98ns) 4563us 2238ns (4.56ns) 65 | -- Charset VDe:7084us 3172ns (7.08ns) 4566us 2220ns (4.57ns) 8652us 3908ns (8.65ns) 58343us 26096ns (58.34ns) 4396us 2201ns (4.40ns) 66 | Array size: 2_000_000 67 | -- Unif :15143us 4800ns (7.57ns) 4413us 1414ns (2.21ns) 70850us 23581ns (35.43ns) 124197us 39277ns (62.10ns) 13581us 4344ns (6.79ns) 68 | -- Equal :663us 211ns (0.33ns) 1139us 369ns (0.57ns) 789us 250ns (0.39ns) 921us 291ns (0.46ns) 806us 263ns (0.40ns) 69 | -- Charset :1415us 514ns (0.71ns) 3790us 1221ns (1.90ns) 20904us 6952ns (10.45ns) 121875us 38549ns (60.94ns) 9736us 3326ns (4.87ns) 70 | -- Charset Den:2979us 1019ns (1.49ns) 4163us 1330ns (2.08ns) 21695us 9447ns (10.85ns) 122220us 38653ns (61.11ns) 10853us 3484ns (5.43ns) 71 | -- Charset VDe:19960us 8062ns (9.98ns) 5895us 1934ns (2.95ns) 25552us 12692ns (12.78ns) 125172us 39584ns (62.59ns) 8180us 2864ns (4.09ns) 72 | Array size: 5_000_000 73 | -- Unif :53830us 12023ns (10.77ns) 9090us 1854ns (1.82ns) 188512us 38022ns (37.70ns) 311811us 62436ns (62.36ns) 32364us 6740ns (6.47ns) 74 | -- Equal :2897us 730ns (0.58ns) 2479us 504ns (0.50ns) 2537us 510ns (0.51ns) 2385us 477ns (0.48ns) 2719us 545ns (0.54ns) 75 | -- Charset :6756us 1358ns (1.35ns) 7365us 1500ns (1.47ns) 56548us 11367ns (11.31ns) 313353us 62719ns (62.67ns) 28128us 6864ns (5.63ns) 76 | -- Charset Den:9348us 1891ns (1.87ns) 9005us 1824ns (1.80ns) 58845us 11898ns (11.77ns) 317389us 63494ns (63.48ns) 25431us 6239ns (5.09ns) 77 | -- Charset VDe:42847us 9037ns (8.57ns) 13184us 2718ns (2.64ns) 60010us 12778ns (12.00ns) 312144us 62441ns (62.43ns) 27999us 7914ns (5.60ns) 78 | Array size: 10_000_000 79 | -- Unif :99392us 14195ns (9.94ns) 15812us 2256ns (1.58ns) 343465us 49328ns (34.35ns) 651511us 92157ns (65.15ns) 57966us 8637ns (5.80ns) 80 | -- Equal :5594us 797ns (0.56ns) 5947us 842ns (0.59ns) 5690us 817ns (0.57ns) 5192us 747ns (0.52ns) 5910us 836ns (0.59ns) 81 | -- Charset :15793us 2260ns (1.58ns) 13739us 1978ns (1.37ns) 119933us 17398ns (11.99ns) 649607us 91871ns (64.96ns) 44606us 6622ns (4.46ns) 82 | -- Charset Den:15912us 2275ns (1.59ns) 17864us 2581ns (1.79ns) 115609us 16742ns (11.56ns) 674221us 95350ns (67.42ns) 51625us 8533ns (5.16ns) 83 | -- Charset VDe:86476us 12372ns (8.65ns) 25060us 3737ns (2.51ns) 123484us 17591ns (12.35ns) 673078us 95414ns (67.31ns) 48100us 8193ns (4.81ns) 84 | test tests::speed_sort::speed_test_char ... ok 85 | 86 | test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 1 filtered out; finished in 44.51s 87 | -------------------------------------------------------------------------------- /src/generators/signed_i32.rs: -------------------------------------------------------------------------------- 1 | use rand::seq::SliceRandom; 2 | use rand::{thread_rng, Rng}; 3 | use rand_distr::{Distribution, Normal}; 4 | use rayon::prelude::*; 5 | 6 | // Uniform 7 | pub fn helper_random_array_uniform_i32(size: usize) -> Vec { 8 | (0..size) 9 | .into_par_iter() 10 | .map(|_| thread_rng().gen::()) 11 | .collect::>() 12 | } 13 | 14 | // 10^9 values 15 | pub fn helper_random_array_109_i32(size: usize) -> Vec { 16 | (0..size) 17 | .into_par_iter() 18 | .map(|_| thread_rng().gen_range(-1_000_000_000, 1_000_000_000)) 19 | .collect::>() 20 | } 21 | 22 | // Small values 23 | pub fn helper_random_array_small_i32(size: usize) -> Vec { 24 | (0..size) 25 | .into_par_iter() 26 | .map(|_| thread_rng().gen_range(-64_000, 64_000)) 27 | .collect::>() 28 | } 29 | 30 | // Ascending 31 | pub fn helper_random_array_ascending_i32(size: usize) -> Vec { 32 | (0..(size as i32)).into_par_iter().collect::>() 33 | } 34 | 35 | // Descending 36 | pub fn helper_random_array_descending_i32(size: usize) -> Vec { 37 | (0..size).into_par_iter().map(|i| -(i as i32)).collect::>() 38 | } 39 | 40 | // All equals 41 | pub fn helper_random_array_allequals_i32(size: usize) -> Vec { 42 | vec![thread_rng().gen(); size] 43 | } 44 | 45 | // Alternating 16 values 46 | pub fn helper_random_array_alternating16_i32(size: usize) -> Vec { 47 | (0..size) 48 | .into_par_iter() 49 | .map(|i| { 50 | if i % 2 == 0 { 51 | thread_rng().gen_range(0, 16) as i32 52 | } else { 53 | -(thread_rng().gen_range(0, 16) as i32) 54 | } 55 | }) 56 | .collect::>() 57 | } 58 | 59 | // Zipf 60 | pub fn helper_random_array_zipf_i32(size: usize) -> Vec { 61 | let mut array: Vec = Vec::with_capacity(size); 62 | let mut rng = thread_rng(); 63 | let mut quantity = size / 2; 64 | let mut i = 0; 65 | let mut value: i32 = 0; 66 | while quantity > 2 { 67 | value = rng.gen(); 68 | 69 | for _ in 0..quantity { 70 | array.push(value); 71 | i += 1; 72 | } 73 | 74 | quantity = quantity / 2; 75 | } 76 | while i < size { 77 | array.push(value); 78 | i += 1; 79 | } 80 | 81 | array.as_mut_slice().shuffle(&mut rng); 82 | 83 | array 84 | } 85 | 86 | // Sqrt 87 | pub fn helper_random_array_sqrt_i32(size: usize) -> Vec { 88 | let mut rng = thread_rng(); 89 | let sqrt = (size as f64).sqrt() as usize; 90 | let mut array: Vec = Vec::with_capacity(size); 91 | 92 | let mut i = 0; 93 | let mut value: i32 = 0; 94 | for _ in 0..sqrt { 95 | value = rng.gen(); 96 | for _ in 0..sqrt { 97 | array.push(value); 98 | i += 1; 99 | } 100 | } 101 | while i < size { 102 | array.push(value); 103 | i += 1; 104 | } 105 | 106 | array 107 | } 108 | 109 | // Almost sorted ascending 110 | pub fn helper_random_array_almost_asc_i32(size: usize) -> Vec { 111 | if size == 0 { 112 | return Vec::new(); 113 | } 114 | if size < 4 { 115 | return helper_random_array_uniform_i32(size); 116 | } 117 | 118 | let mut array = helper_random_array_ascending_i32(size); 119 | 120 | for _ in 0..((size as f64).sqrt() as usize) { 121 | let i = thread_rng().gen_range(0, size); 122 | let j = thread_rng().gen_range(0, size); 123 | array.swap(i, j); 124 | } 125 | 126 | array 127 | } 128 | 129 | // Almost sorted descending 130 | pub fn helper_random_array_almost_desc_i32(size: usize) -> Vec { 131 | if size == 0 { 132 | return Vec::new(); 133 | } 134 | if size < 4 { 135 | return helper_random_array_uniform_i32(size); 136 | } 137 | 138 | let mut array = helper_random_array_descending_i32(size); 139 | 140 | for _ in 0..((size as f64).sqrt() as usize) { 141 | let i = thread_rng().gen_range(0, size); 142 | let j = thread_rng().gen_range(0, size); 143 | array.swap(i, j); 144 | } 145 | 146 | array 147 | } 148 | 149 | // Ascending sawtooth 150 | pub fn helper_random_array_asc_sawtooth_i32(size: usize) -> Vec { 151 | if size == 0 { 152 | return Vec::new(); 153 | } 154 | if size < 4 { 155 | return helper_random_array_uniform_i32(size); 156 | } 157 | 158 | let limit = (size as f64 / ((size as f64).log2() * 0.9)) as i32; 159 | (0..size).into_par_iter().map(|i| i as i32 % limit).collect::>() 160 | } 161 | 162 | // Descending sawtooth 163 | pub fn helper_random_array_desc_sawtooth_i32(size: usize) -> Vec { 164 | if size == 0 { 165 | return Vec::new(); 166 | } 167 | if size < 4 { 168 | return helper_random_array_uniform_i32(size); 169 | } 170 | 171 | let limit = (size as f64 / ((size as f64).log2() * 0.9)) as i32; 172 | (0..size) 173 | .into_par_iter() 174 | .map(|i| (size - 1 - i) as i32 % limit) 175 | .collect::>() 176 | } 177 | 178 | // Pipe Organ 179 | pub fn helper_random_array_pipe_organ_i32(size: usize) -> Vec { 180 | let middle = size / 2; 181 | (0..size) 182 | .into_par_iter() 183 | .map(|i| if i < middle { i as i32 } else { (size - i) as i32 }) 184 | .collect::>() 185 | } 186 | 187 | // Push Front 188 | pub fn helper_random_array_push_front_i32(size: usize) -> Vec { 189 | let mut array = 190 | (0..size).into_par_iter().map(|i| i as i32).collect::>(); 191 | 192 | if size > 0 { 193 | array[size - 1] = 0; 194 | } 195 | 196 | array 197 | } 198 | 199 | // Push middle 200 | pub fn helper_random_array_push_middle_i32(size: usize) -> Vec { 201 | let mut array = 202 | (0..size).into_par_iter().map(|i| i as i32).collect::>(); 203 | 204 | if size > 0 { 205 | array[size - 1] = (size / 2) as i32; 206 | } 207 | 208 | array 209 | } 210 | 211 | fn helper_normal(size: usize, range: f32) -> Vec { 212 | let normal = Normal::new(0.0, range).unwrap(); 213 | (0..size) 214 | .into_par_iter() 215 | .map(|_| normal.sample(&mut thread_rng()) as i32) 216 | .collect::>() 217 | } 218 | 219 | // Normale(0, 2^10) 220 | pub fn helper_random_array_normale_10_i32(size: usize) -> Vec { 221 | helper_normal(size, 1024.0) 222 | } 223 | 224 | // Normale(0, 2^20) 225 | pub fn helper_random_array_normale_20_i32(size: usize) -> Vec { 226 | helper_normal(size, 1_000_000.0) 227 | } 228 | 229 | // Normale(0, 2^30) 230 | pub fn helper_random_array_normale_30_i32(size: usize) -> Vec { 231 | helper_normal(size, 1_000_000_000.0) 232 | } 233 | 234 | pub fn generators_i32( 235 | ) -> Vec<(&'static dyn Fn(usize) -> Vec, &'static str)> { 236 | vec![ 237 | (&helper_random_array_uniform_i32, "-- Unif :"), 238 | (&helper_random_array_109_i32, "-- +-10^9 :"), 239 | (&helper_random_array_small_i32, "-- Small :"), 240 | (&helper_random_array_ascending_i32, "-- Asc :"), 241 | (&helper_random_array_descending_i32, "-- Desc :"), 242 | (&helper_random_array_allequals_i32, "-- Equal :"), 243 | (&helper_random_array_alternating16_i32, "-- Alt16 :"), 244 | (&helper_random_array_zipf_i32, "-- Zipf :"), 245 | (&helper_random_array_almost_asc_i32, "-- Almost Asc :"), 246 | (&helper_random_array_almost_desc_i32, "-- Almost Desc:"), 247 | (&helper_random_array_asc_sawtooth_i32, "-- Asc Saw :"), 248 | (&helper_random_array_desc_sawtooth_i32, "-- Desc Saw :"), 249 | (&helper_random_array_sqrt_i32, "-- Sqrt :"), 250 | (&helper_random_array_pipe_organ_i32, "-- Pipe Organ :"), 251 | (&helper_random_array_push_front_i32, "-- Front :"), 252 | (&helper_random_array_push_middle_i32, "-- Middle :"), 253 | (&helper_random_array_normale_10_i32, "-- Normale 10 :"), 254 | (&helper_random_array_normale_20_i32, "-- Normale 20 :"), 255 | (&helper_random_array_normale_30_i32, "-- Normale 30 :"), 256 | ] 257 | } 258 | 259 | #[cfg(target_pointer_width = "32")] 260 | pub fn generators_isize( 261 | ) -> Vec<(&'static dyn Fn(usize) -> Vec, &'static str)> { 262 | generators_i32() 263 | .into_iter() 264 | .map(|(gen, title)| { 265 | let new_gen = move |size: usize| -> Vec { 266 | unsafe { 267 | let arr = gen(size); 268 | std::mem::transmute::, Vec>(arr) 269 | } 270 | }; 271 | 272 | ( 273 | Box::leak(Box::new(new_gen)) 274 | as &'static dyn Fn(usize) -> Vec, 275 | title, 276 | ) 277 | }) 278 | .collect() 279 | } 280 | --------------------------------------------------------------------------------