├── .gitignore ├── Cargo.toml ├── LICENCE ├── examples ├── sort.rs └── sort_indirect.rs ├── benches └── sort.rs ├── README.md ├── src ├── utils.rs ├── radix_sort.wgsl └── lib.rs ├── tests └── sort.rs └── Cargo.lock /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | .vscode/ -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "wgpu_sort" 3 | version = "0.1.0" 4 | edition = "2021" 5 | authors = ["Simon Niedermayr", "Josef Stumpfegger"] 6 | license = "BSD-2-Clause" 7 | description = " WebGPU/wgpu Radix Key-Value Sort " 8 | repository = "https://github.com/KeKsBoTer/wgpu_sort" 9 | homepage = "https://github.com/KeKsBoTer/wgpu_sort" 10 | documentation = "https://docs.rs/wgpu_sort" 11 | keywords = ["wgpu", "gpu", "sort","radxi","wgpu"] 12 | categories = ["rendering","algorithms"] 13 | readme = "README.md" 14 | 15 | 16 | [package.metadata.docs.rs] 17 | all-features = true 18 | 19 | [dependencies] 20 | wgpu = { version = "27.0.1" } 21 | # wgpu = { git = "https://github.com/gfx-rs/wgpu.git" } #, features = ["webgl"] } 22 | bytemuck = { version = "1.13.0", features = ["derive"] } 23 | futures-intrusive = "0.5.0" 24 | 25 | log = "0.4" 26 | env_logger = "0.11" 27 | 28 | 29 | [dev-dependencies] 30 | rand = "0.8.5" 31 | pollster = { version = "0.3.0", features = ["macro"] } 32 | float-ord = "0.3.2" 33 | criterion = { version = "0.4", features = ["html_reports"] } 34 | 35 | 36 | [[bench]] 37 | name = "sort" 38 | harness = false 39 | 40 | 41 | [[example]] 42 | name = "sort" 43 | 44 | [[example]] 45 | name = "sort_indirect" 46 | 47 | [[test]] 48 | name = "sort" 49 | -------------------------------------------------------------------------------- /LICENCE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2024, Simon Niedermayr, Josef Stumpfegger 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | -------------------------------------------------------------------------------- /examples/sort.rs: -------------------------------------------------------------------------------- 1 | // this example creates an array with 10 key-value (u32,f32) pairs and sorts them on the gpu 2 | use std::num::NonZeroU32; 3 | 4 | use wgpu_sort::{ 5 | utils::{download_buffer, download_buffer2, guess_workgroup_size, upload_to_buffer}, 6 | GPUSorter, 7 | }; 8 | 9 | #[pollster::main] 10 | async fn main() { 11 | let instance = wgpu::Instance::new(&wgpu::InstanceDescriptor::default()); 12 | 13 | let adapter = wgpu::util::initialize_adapter_from_env_or_default(&instance, None) 14 | .await 15 | .unwrap(); 16 | 17 | let (device, queue) = adapter 18 | .request_device(&wgpu::DeviceDescriptor { 19 | label: None, 20 | required_features: wgpu::Features::empty(), 21 | required_limits: wgpu::Limits::default(), 22 | experimental_features: wgpu::ExperimentalFeatures::disabled(), 23 | memory_hints: wgpu::MemoryHints::Performance, 24 | trace: wgpu::Trace::Off, 25 | }) 26 | .await 27 | .unwrap(); 28 | let subgroup_size = guess_workgroup_size(&device, &queue) 29 | .await 30 | .expect("could not find a valid subgroup size"); 31 | println!("using subgroup size {subgroup_size}"); 32 | let sorter = GPUSorter::new(&device, subgroup_size); 33 | 34 | let n = 36; 35 | let sort_buffers = sorter.create_sort_buffers(&device, NonZeroU32::new(n).unwrap()); 36 | 37 | let keys_scrambled: Vec = vec![ 38 | 65085, 130621, 196157, 261693, 327229, 392765, 458301, 523837, 589373, 65337, 130873, 39 | 196409, 261945, 327481, 393017, 458553, 524089, 589625, 130944, 196480, 327552, 393088, 40 | 65442, 130978, 196514, 262050, 327586, 393122, 458658, 524194, 589730, 589824, 589824, 41 | 589824, 589824, 589824, 42 | ]; 43 | 44 | let values_scrambled: Vec = keys_scrambled.iter().map(|v| 5).collect(); 45 | 46 | loop { 47 | let mut encoder = 48 | device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); 49 | 50 | upload_to_buffer( 51 | &mut encoder, 52 | &sort_buffers.keys(), 53 | &device, 54 | keys_scrambled.as_slice(), 55 | ); 56 | upload_to_buffer( 57 | &mut encoder, 58 | &sort_buffers.values(), 59 | &device, 60 | values_scrambled.as_slice(), 61 | ); 62 | 63 | println!( 64 | "before: {:?}", 65 | keys_scrambled 66 | .iter() 67 | .zip(values_scrambled.iter()) 68 | .collect::>() 69 | ); 70 | 71 | // sorter.sort(&mut encoder, &sort_buffers); 72 | sorter.sort(&mut encoder, &queue, &sort_buffers, None); 73 | 74 | // wait for sorter to finish 75 | let idx = queue.submit([encoder.finish()]); 76 | /* 77 | device 78 | .poll(wgpu::PollType::Wait { 79 | submission_index: Some(idx), 80 | timeout: None, 81 | }) 82 | .unwrap(); 83 | */ 84 | 85 | // keys buffer has padding at the end 86 | // so we only download the "valid" data 87 | download_buffer2(&sort_buffers.keys(), &device, &queue); 88 | download_buffer2(&sort_buffers.values(), &device, &queue); 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /benches/sort.rs: -------------------------------------------------------------------------------- 1 | use std::{num::NonZeroU32, time::Duration}; 2 | 3 | use wgpu_sort::{utils::{download_buffer, guess_workgroup_size}, GPUSorter, SortBuffers}; 4 | 5 | struct SortStuff{ 6 | device:wgpu::Device, 7 | queue:wgpu::Queue, 8 | query_set:wgpu::QuerySet, 9 | query_buffer:wgpu::Buffer, 10 | } 11 | 12 | async fn setup()-> SortStuff{ 13 | let instance = wgpu::Instance::new(wgpu::InstanceDescriptor::default()); 14 | 15 | let adapter = wgpu::util::initialize_adapter_from_env_or_default(&instance, None) 16 | .await 17 | .unwrap(); 18 | 19 | let (device, queue) = adapter 20 | .request_device( 21 | &wgpu::DeviceDescriptor { 22 | required_features: wgpu::Features::TIMESTAMP_QUERY, 23 | required_limits: wgpu::Limits{ 24 | max_buffer_size:1<<30, 25 | max_storage_buffer_binding_size:1<<30, 26 | ..Default::default() 27 | }, 28 | label: None, 29 | }, 30 | None, 31 | ) 32 | .await 33 | .unwrap(); 34 | 35 | let capacity = 2; 36 | let query_set = device.create_query_set(&wgpu::QuerySetDescriptor { 37 | label: Some("time stamp query set"), 38 | ty: wgpu::QueryType::Timestamp, 39 | count: capacity, 40 | }); 41 | 42 | 43 | let query_buffer = device.create_buffer(&wgpu::BufferDescriptor { 44 | label: Some("query set buffer"), 45 | size: capacity as u64 * std::mem::size_of::() as u64, 46 | usage: wgpu::BufferUsages::QUERY_RESOLVE | wgpu::BufferUsages::COPY_SRC, 47 | mapped_at_creation: false, 48 | }); 49 | 50 | return SortStuff{device,queue,query_set,query_buffer} 51 | 52 | } 53 | 54 | async fn sort(context:&SortStuff,sorter:&GPUSorter,buffers:&SortBuffers,n:u32,iters:u32) -> Duration { 55 | 56 | let mut encoder = context.device.create_command_encoder(&wgpu::CommandEncoderDescriptor { 57 | label: None, 58 | }); 59 | 60 | encoder.write_timestamp(&context.query_set, 0); 61 | 62 | for _ in 0..iters{ 63 | sorter.sort(&mut encoder,&context.queue,buffers,Some(n)); 64 | } 65 | 66 | encoder.write_timestamp(&context.query_set, 1); 67 | encoder.resolve_query_set( 68 | &context.query_set, 69 | 0..2, 70 | &context.query_buffer, 71 | 0, 72 | ); 73 | let idx = context.queue.submit([encoder.finish()]); 74 | context.device.poll(wgpu::Maintain::WaitForSubmissionIndex(idx)); 75 | 76 | let timestamps : Vec = pollster::block_on(download_buffer(&context.query_buffer, &context.device, &context.queue, ..)); 77 | let diff_ticks = timestamps[1] - timestamps[0]; 78 | let period = context.queue.get_timestamp_period(); 79 | let diff_time = Duration::from_nanos((diff_ticks as f32 * period / iters as f32) as u64); 80 | 81 | return diff_time; 82 | } 83 | 84 | 85 | 86 | #[pollster::main] 87 | async fn main() { 88 | 89 | let context = setup().await; 90 | 91 | let subgroup_size = guess_workgroup_size(&context.device, &context.queue).await.expect("could not find a valid subgroup size"); 92 | 93 | let sorter = GPUSorter::new(&context.device, subgroup_size); 94 | 95 | 96 | for n in [10_000,100_000,1_000_000,8_000_000,20_000_000]{ 97 | let buffers = sorter.create_sort_buffers(&context.device, NonZeroU32::new(n).unwrap()); 98 | let d = sort(&context,&sorter, &buffers,n,10000).await; 99 | println!("{n}: {d:?}"); 100 | } 101 | } 102 | 103 | -------------------------------------------------------------------------------- /examples/sort_indirect.rs: -------------------------------------------------------------------------------- 1 | // this example creates an array with 10 key-value (f32,u32) pairs and sorts them on the gpu 2 | // Important: sorting by f32 keys only works for non negative key values. Also NaN and inf values give unexpected results 3 | use std::num::NonZeroU32; 4 | 5 | use bytemuck::bytes_of; 6 | use wgpu::util::DeviceExt; 7 | use wgpu_sort::{utils::{download_buffer, guess_workgroup_size, upload_to_buffer}, GPUSorter, HISTO_BLOCK_KVS}; 8 | 9 | 10 | #[pollster::main] 11 | async fn main(){ 12 | let instance = wgpu::Instance::new(wgpu::InstanceDescriptor::default()); 13 | 14 | let adapter = wgpu::util::initialize_adapter_from_env_or_default(&instance, None) 15 | .await 16 | .unwrap(); 17 | 18 | let (device, queue) = adapter 19 | .request_device( 20 | &wgpu::DeviceDescriptor { 21 | required_features: wgpu::Features::empty(), 22 | required_limits: wgpu::Limits::default(), 23 | label: None, 24 | }, 25 | None, 26 | ) 27 | .await 28 | .unwrap(); 29 | let subgroup_size = guess_workgroup_size(&device, &queue).await.expect("could not find a valid subgroup size"); 30 | println!("using subgroup size {subgroup_size}"); 31 | let sorter = GPUSorter::new(&device, subgroup_size); 32 | 33 | let n = 10; 34 | let sort_buffers = sorter.create_sort_buffers(&device, NonZeroU32::new(n).unwrap()); 35 | 36 | 37 | let keys_scrambled: Vec = (1..=n).map(|v| 1./v as f32).collect(); 38 | let values_scrambled:Vec = (1..=n).collect(); 39 | 40 | 41 | let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { 42 | label: Some("GPURSSorter test_sort"), 43 | }); 44 | 45 | upload_to_buffer( 46 | &mut encoder, 47 | &sort_buffers.keys(), 48 | &device, 49 | keys_scrambled.as_slice(), 50 | ); 51 | upload_to_buffer( 52 | &mut encoder, 53 | &sort_buffers.values(), 54 | &device, 55 | values_scrambled.as_slice(), 56 | ); 57 | 58 | println!("before: {:?}",keys_scrambled.iter().zip(values_scrambled.iter()).collect::>()); 59 | 60 | // round to next larger multiple of HISTO_BLOCK_KVS 61 | let num_wg = (n + HISTO_BLOCK_KVS- 1)/HISTO_BLOCK_KVS; 62 | 63 | let dispatch_indirect = wgpu::util::DispatchIndirectArgs{ 64 | x: num_wg, 65 | y: 1, 66 | z: 1 67 | }; 68 | 69 | queue.write_buffer(sort_buffers.state_buffer(), 0, bytes_of(&n)); 70 | 71 | let dispatch_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor{ 72 | label: Some("dispatch indirect buffer"), 73 | contents: dispatch_indirect.as_bytes(), 74 | usage: wgpu::BufferUsages::INDIRECT, 75 | }); 76 | 77 | sorter.sort_indirect(&mut encoder, &sort_buffers,&dispatch_buffer); 78 | 79 | // wait for sorter to fininsh 80 | let idx = queue.submit([encoder.finish()]); 81 | device.poll(wgpu::Maintain::WaitForSubmissionIndex(idx)); 82 | 83 | // keys buffer has padding at the end 84 | // so we only download the "valid" data 85 | let keys_sorted = download_buffer::( 86 | &sort_buffers.keys(), 87 | &device, 88 | &queue, 89 | 0..sort_buffers.keys_valid_size() 90 | ) 91 | .await; 92 | let value_sorted = download_buffer::( 93 | &sort_buffers.values(), 94 | &device, 95 | &queue, 96 | .. 97 | ) 98 | .await; 99 | 100 | println!("after: {:?}",keys_sorted.iter().zip(value_sorted.iter()).collect::>()); 101 | } 102 | 103 | 104 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # WebGPU Radix Key-Value Sort 2 | 3 | [![crates.io version](https://img.shields.io/crates/v/wgpu_sort.svg)](https://crates.io/crates/wgpu_sort) [![Documentation](https://docs.rs/wgpu_sort/badge.svg)](https://docs.rs/wgpu_sort/) 4 | 5 | 6 | This package implements a GPU version of radix sort. A good introduction to general purpose radix sort can be found here: 7 | 8 | The GPU radix sort implemented here is a re-implementation of the Vulkan radix sort found in the fuchsia repos: . 9 | 10 | Currently only the sorting for 32-bit key-value pairs is implemented. 11 | It can be used to sort unsigned integers and non negative float numbers. See [Limitations](#limitations) for more details. 12 | The keys are sorted in ascending order. 13 | 14 | It was originally implemented for [our 3D Gaussian Splatting Renderer](https://github.com/KeKsBoTer/web-splat) to sort splats according to their depth in real time. It can be seen in action in this [web demo](https://keksboter.github.io/web-splat/demo.html). 15 | 16 | ## Example 17 | 18 | ```rust,ignore 19 | // find best subgroup size 20 | let subgroup_size = guess_workgroup_size(&device, &queue).await.unwrap(); 21 | let sorter = GPUSorter::new(&device, subgroup_size); 22 | 23 | // setup buffers to sort 100 key-value pairs 24 | let n = 100; 25 | let sort_buffers = sorter.create_sort_buffers(&device, NonZeroU32::new(n).unwrap()); 26 | 27 | let keys_scrambled: Vec = (0..n).rev().collect(); 28 | let values_scrambled:Vec = keys_scrambled.clone(); 29 | 30 | let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {label: None}); 31 | 32 | upload_to_buffer( 33 | &mut encoder, 34 | &sort_buffers.keys(), 35 | &device, 36 | keys_scrambled.as_slice(), 37 | ); 38 | upload_to_buffer( 39 | &mut encoder, 40 | &sort_buffers.values(), 41 | &device, 42 | values_scrambled.as_slice(), 43 | ); 44 | 45 | // sorter.sort(&mut encoder, &sort_buffers); 46 | sorter.sort(&mut encoder,&queue,&sort_buffers,None); 47 | queue.submit([encoder.finish()]); 48 | 49 | // key and value buffer is now sorted. 50 | ``` 51 | Indirect dispatching is also supported. See [examples/sort_indirect.rs](examples/sort_indirect.rs); 52 | 53 | ## Benchmarks 54 | 55 | To measure the performance we sort the key-value pairs 1000 times and report the average duration per run. 56 | Measurements were performed for different number of pairs. 57 | Take a look at [benches/sort.rs](benches/sort.rs) for more details. 58 | 59 | | Device | 10k | 100k | 1 Million | 8 Million | 20 Million | 60 | |------------------------|-----------|-----------|-----------|------------|------------| 61 | | NVIDIA RTX A5000 | 108.277µs | 110.179µs | 317.191µs | 1.641699ms | 3.980834ms | 62 | | AMD Radeon R9 380 | 803.527µs | 829.003µs | 2.76469ms | 18.81558ms | 46.12854ms | 63 | | Intel HD Graphics 4600 | 790.382µs | 4.12287ms | 38.7421ms | 295.2937ms | 732.3900ms | 64 | 65 | ## Limitations 66 | 67 | This sorter comes with a number of limitations that are explained in the following. 68 | 69 | **Subgroups** 70 | 71 | This renderer makes use of [subgroups](https://docs.vulkan.org/guide/latest/subgroups.html) to reduce synchronization and increase performance. 72 | Unfortunately subgroup operations are not supported bei WebGPU/wgpu right now. 73 | 74 | To overcome this issue we "guess" the subgroup size by trying out different subgroups and pick the largest one that works (see [utils::guess_workgroup_size](src/utils.rs)). 75 | This works in almost all cases but can fail because the subgroup size can change over time. 76 | Once subgroups are support this will be fixed. 77 | Status can be found [here](https://github.com/gpuweb/gpuweb/issues/4306). 78 | 79 | **Floating Point Numbers** 80 | 81 | The sorting algorithm interprets the values as integers and sorts the keys in ascending order. 82 | Non-negative float values can be interpreted as unsigned integers without affecting the ordering. 83 | Therefore this sorter can be used to sort 32-bit float keys. 84 | Note that NaN and Inf values lead to unexpected results as theses are interpreted as integers as well. 85 | An example for sorting float values can be found [here](examples/sort_indirect.rs). 86 | 87 | -------------------------------------------------------------------------------- /src/utils.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | num::NonZeroU32, 3 | ops::{Deref, RangeBounds}, 4 | }; 5 | 6 | use wgpu::util::DeviceExt; 7 | 8 | use crate::GPUSorter; 9 | 10 | #[doc(hidden)] 11 | /// only used for testing 12 | /// temporally used for guessing subgroup size 13 | pub fn upload_to_buffer( 14 | encoder: &mut wgpu::CommandEncoder, 15 | buffer: &wgpu::Buffer, 16 | device: &wgpu::Device, 17 | values: &[T], 18 | ) { 19 | let staging_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { 20 | label: Some("Staging buffer"), 21 | contents: bytemuck::cast_slice(values), 22 | usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC, 23 | }); 24 | encoder.copy_buffer_to_buffer(&staging_buffer, 0, buffer, 0, staging_buffer.size()); 25 | } 26 | 27 | #[doc(hidden)] 28 | /// only used for testing 29 | /// temporally used for guessing subgroup size 30 | pub async fn download_buffer( 31 | buffer: &wgpu::Buffer, 32 | device: &wgpu::Device, 33 | queue: &wgpu::Queue, 34 | range: impl RangeBounds, 35 | ) -> Vec { 36 | // copy buffer data 37 | let download_buffer = device.create_buffer(&wgpu::BufferDescriptor { 38 | label: Some("Download buffer"), 39 | size: buffer.size(), 40 | usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, 41 | mapped_at_creation: false, 42 | }); 43 | let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { 44 | label: Some("Copy encoder"), 45 | }); 46 | encoder.copy_buffer_to_buffer(buffer, 0, &download_buffer, 0, buffer.size()); 47 | queue.submit([encoder.finish()]); 48 | 49 | // download buffer 50 | let buffer_slice = download_buffer.slice(range); 51 | let (tx, rx) = futures_intrusive::channel::shared::oneshot_channel(); 52 | buffer_slice.map_async(wgpu::MapMode::Read, move |result| tx.send(result).unwrap()); 53 | device 54 | .poll(wgpu::PollType::Wait { 55 | submission_index: None, 56 | timeout: None, 57 | }) 58 | .unwrap(); 59 | rx.receive().await.unwrap().unwrap(); 60 | 61 | let data = buffer_slice.get_mapped_range(); 62 | return bytemuck::cast_slice(data.deref()).to_vec(); 63 | } 64 | 65 | pub fn download_buffer2(buffer: &wgpu::Buffer, device: &wgpu::Device, queue: &wgpu::Queue) { 66 | // copy buffer data 67 | let download_buffer = device.create_buffer(&wgpu::BufferDescriptor { 68 | label: Some("Download buffer"), 69 | size: buffer.size(), 70 | usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, 71 | mapped_at_creation: false, 72 | }); 73 | let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { 74 | label: Some("Copy encoder"), 75 | }); 76 | encoder.copy_buffer_to_buffer(buffer, 0, &download_buffer, 0, buffer.size()); 77 | queue.submit([encoder.finish()]); 78 | 79 | // Download buffer 80 | download_buffer 81 | .clone() 82 | .map_async(wgpu::MapMode::Read, .., move |result| { 83 | result.unwrap(); 84 | let data: Vec = 85 | bytemuck::cast_slice(&*download_buffer.get_mapped_range(..)).to_vec(); 86 | let data = data.iter().take(3 * 3 * 4).collect::>(); 87 | println!("{:?}", &data); 88 | if data.contains(&&0) { 89 | panic!(); 90 | } 91 | }); 92 | // device.poll(wgpu::Maintain::Wait); 93 | } 94 | 95 | async fn test_sort(sorter: &GPUSorter, device: &wgpu::Device, queue: &wgpu::Queue) -> bool { 96 | // simply runs a small sort and check if the sorting result is correct 97 | let n = 8192; // means that 2 workgroups are needed for sorting 98 | let scrambled_data: Vec = (0..n).rev().map(|x| x as f32).collect(); 99 | let sorted_data: Vec = (0..n).map(|x| x as f32).collect(); 100 | 101 | let sort_buffers = sorter.create_sort_buffers(device, NonZeroU32::new(n).unwrap()); 102 | 103 | let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { 104 | label: Some("GPURSSorter test_sort"), 105 | }); 106 | upload_to_buffer( 107 | &mut encoder, 108 | &sort_buffers.keys(), 109 | device, 110 | scrambled_data.as_slice(), 111 | ); 112 | 113 | sorter.sort(&mut encoder, queue, &sort_buffers, None); 114 | let idx = queue.submit([encoder.finish()]); 115 | device 116 | .poll(wgpu::PollType::Wait { 117 | submission_index: Some(idx), 118 | timeout: None, 119 | }) 120 | .unwrap(); 121 | 122 | let sorted = download_buffer::( 123 | &sort_buffers.keys(), 124 | device, 125 | queue, 126 | 0..sort_buffers.keys_valid_size(), 127 | ) 128 | .await; 129 | return sorted 130 | .into_iter() 131 | .zip(sorted_data.into_iter()) 132 | .all(|(a, b)| a == b); 133 | } 134 | 135 | /// Function guesses the best subgroup size by testing the sorter with 136 | /// subgroup sizes 1,8,16,32,64,128 and returning the largest subgroup size that worked. 137 | pub async fn guess_workgroup_size(device: &wgpu::Device, queue: &wgpu::Queue) -> Option { 138 | let mut cur_sorter: GPUSorter; 139 | 140 | log::debug!("Searching for the maximum subgroup size (wgpu currently does not allow to query subgroup sizes)"); 141 | 142 | let mut best = None; 143 | for subgroup_size in [1, 8, 16, 32, 64, 128] { 144 | log::debug!("Checking sorting with subgroupsize {}", subgroup_size); 145 | 146 | cur_sorter = GPUSorter::new(device, subgroup_size); 147 | let sort_success = test_sort(&cur_sorter, device, queue).await; 148 | 149 | log::debug!("{} worked: {}", subgroup_size, sort_success); 150 | 151 | if !sort_success { 152 | break; 153 | } else { 154 | best = Some(subgroup_size) 155 | } 156 | } 157 | return best; 158 | } 159 | -------------------------------------------------------------------------------- /tests/sort.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt::Debug, num::NonZeroU32}; 2 | 3 | use bytemuck::bytes_of; 4 | use float_ord::FloatOrd; 5 | use rand::{ 6 | distributions::{Distribution, Standard}, 7 | rngs::StdRng, 8 | Rng, SeedableRng, 9 | }; 10 | use wgpu::util::DeviceExt; 11 | use wgpu_sort::{ 12 | utils::{download_buffer, guess_workgroup_size, upload_to_buffer}, 13 | GPUSorter, SortBuffers, HISTO_BLOCK_KVS, 14 | }; 15 | 16 | /// tests sorting of two u32 keys 17 | #[pollster::test] 18 | async fn sort_u32_small() { 19 | test_sort::(2, &apply_sort, None).await; 20 | } 21 | 22 | /// tests sorting of one million pairs with u32 keys 23 | #[pollster::test] 24 | async fn sort_u32_large() { 25 | test_sort::(1_000_00, &apply_sort, None).await; 26 | } 27 | 28 | /// tests sorting of one million pairs with f32 keys 29 | #[pollster::test] 30 | async fn sort_f32_large() { 31 | test_sort::(1_000_00, &apply_sort, None).await; 32 | } 33 | 34 | /// tests sorting only first half of one million pairs 35 | #[pollster::test] 36 | async fn sort_half() { 37 | test_sort::(1_000_000, &apply_sort, Some(500_00)).await; 38 | } 39 | 40 | // INDIRECT SORTING 41 | 42 | /// tests sorting of two u32 keys 43 | /// indirect dispatch 44 | #[pollster::test] 45 | async fn sort_indirect_small() { 46 | test_sort::(2, &apply_sort_indirect, None).await; 47 | } 48 | 49 | /// tests sorting of one million pairs with u32 keys 50 | /// indirect dispatch 51 | #[pollster::test] 52 | async fn sort_indirect_large() { 53 | test_sort::(1_000_00, &apply_sort, None).await; 54 | } 55 | 56 | /// tests sorting only first half of one million pairs 57 | /// indirect dispatch 58 | #[pollster::test] 59 | async fn sort_indirect_half() { 60 | test_sort::(1_000_000, &apply_sort_indirect, Some(500_00)).await; 61 | } 62 | 63 | async fn setup() -> (wgpu::Device, wgpu::Queue) { 64 | let instance = wgpu::Instance::new(&wgpu::InstanceDescriptor::default()); 65 | 66 | let adapter = wgpu::util::initialize_adapter_from_env_or_default(&instance, None) 67 | .await 68 | .unwrap(); 69 | 70 | let (device, queue) = adapter 71 | .request_device(&wgpu::DeviceDescriptor { 72 | required_features: wgpu::Features::empty(), 73 | required_limits: wgpu::Limits::default(), 74 | label: None, 75 | experimental_features: wgpu::ExperimentalFeatures::disabled(), 76 | memory_hints: wgpu::MemoryHints::Performance, 77 | trace: wgpu::Trace::Off, 78 | }) 79 | .await 80 | .unwrap(); 81 | 82 | return (device, queue); 83 | } 84 | 85 | type SortFn = dyn Fn( 86 | &mut wgpu::CommandEncoder, 87 | &wgpu::Device, 88 | &wgpu::Queue, 89 | &GPUSorter, 90 | &SortBuffers, 91 | Option, 92 | ) -> (); 93 | 94 | /// applies gpu sort with direct dispatch 95 | fn apply_sort( 96 | encoder: &mut wgpu::CommandEncoder, 97 | _device: &wgpu::Device, 98 | queue: &wgpu::Queue, 99 | sorter: &GPUSorter, 100 | sort_buffers: &SortBuffers, 101 | n: Option, 102 | ) { 103 | sorter.sort(encoder, queue, &sort_buffers, n); 104 | } 105 | 106 | /// applies gpu sort with indirect dispatch 107 | fn apply_sort_indirect( 108 | encoder: &mut wgpu::CommandEncoder, 109 | device: &wgpu::Device, 110 | queue: &wgpu::Queue, 111 | sorter: &GPUSorter, 112 | sort_buffers: &SortBuffers, 113 | n: Option, 114 | ) { 115 | // round to next larger multiple of HISTO_BLOCK_KVS 116 | let nelm = n.unwrap_or(sort_buffers.len()); 117 | let num_wg = (nelm + HISTO_BLOCK_KVS - 1) / HISTO_BLOCK_KVS; 118 | 119 | let dispatch_indirect = wgpu::util::DispatchIndirectArgs { 120 | x: num_wg, 121 | y: 1, 122 | z: 1, 123 | }; 124 | 125 | queue.write_buffer(sort_buffers.state_buffer(), 0, bytes_of(&nelm)); 126 | 127 | let dispatch_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { 128 | label: Some("dispatch indirect buffer"), 129 | contents: dispatch_indirect.as_bytes(), 130 | usage: wgpu::BufferUsages::INDIRECT, 131 | }); 132 | 133 | sorter.sort_indirect(encoder, &sort_buffers, &dispatch_buffer); 134 | } 135 | 136 | async fn test_sort(n: u32, sort_fn: &SortFn, sort_first_n: Option) 137 | where 138 | Standard: Distribution, 139 | T: PartialEq + Clone + Copy + Debug + bytemuck::Pod + Ord, 140 | { 141 | let (device, queue) = setup().await; 142 | let subgroup_size = guess_workgroup_size(&device, &queue).await; 143 | assert_ne!(subgroup_size, None); 144 | let sorter = GPUSorter::new(&device, subgroup_size.unwrap()); 145 | 146 | let sort_buffers = sorter.create_sort_buffers(&device, NonZeroU32::new(n).unwrap()); 147 | let n_sorted = sort_first_n.unwrap_or(sort_buffers.len()); 148 | 149 | let mut rng = StdRng::seed_from_u64(0); 150 | let keys_scrambled: Vec = (0..n).map(|_| rng.gen()).collect(); 151 | let mut keys_sorted = keys_scrambled.clone(); 152 | keys_sorted[0..n_sorted as usize].sort(); 153 | 154 | let values_scrambled = keys_scrambled.clone(); 155 | let values_sorted = keys_sorted.clone(); 156 | 157 | let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { 158 | label: Some("GPURSSorter test_sort"), 159 | }); 160 | 161 | upload_to_buffer( 162 | &mut encoder, 163 | &sort_buffers.keys(), 164 | &device, 165 | keys_scrambled.as_slice(), 166 | ); 167 | upload_to_buffer( 168 | &mut encoder, 169 | &sort_buffers.values(), 170 | &device, 171 | values_scrambled.as_slice(), 172 | ); 173 | 174 | // sorter.sort(&mut encoder, &sort_buffers); 175 | sort_fn( 176 | &mut encoder, 177 | &device, 178 | &queue, 179 | &sorter, 180 | &sort_buffers, 181 | sort_first_n, 182 | ); 183 | 184 | let idx = queue.submit([encoder.finish()]); 185 | device 186 | .poll(wgpu::PollType::Wait { 187 | submission_index: Some(idx), 188 | timeout: None, 189 | }) 190 | .unwrap(); 191 | 192 | let keys_sorted_gpu = download_buffer::( 193 | &sort_buffers.keys(), 194 | &device, 195 | &queue, 196 | 0..sort_buffers.keys_valid_size(), 197 | ) 198 | .await; 199 | assert_eq!( 200 | keys_sorted_gpu[0..n_sorted as usize], 201 | keys_sorted[0..n_sorted as usize], 202 | "GPU keys equal to keys sorted on CPU" 203 | ); 204 | 205 | let values_sorted_gpu = download_buffer::(&sort_buffers.values(), &device, &queue, ..).await; 206 | assert_eq!( 207 | values_sorted_gpu[0..n_sorted as usize], 208 | values_sorted[0..n_sorted as usize], 209 | "GPU values equal to values sorted on CPU" 210 | ); 211 | } 212 | 213 | // ordered float 214 | #[repr(C)] 215 | #[derive(PartialEq, Debug, Clone, Copy, bytemuck::Pod, bytemuck::Zeroable)] 216 | struct Float(f32); 217 | 218 | impl Eq for Float {} 219 | 220 | impl Ord for Float { 221 | fn cmp(&self, other: &Self) -> std::cmp::Ordering { 222 | FloatOrd(self.0).cmp(&FloatOrd(other.0)) 223 | } 224 | } 225 | 226 | impl PartialOrd for Float { 227 | fn partial_cmp(&self, other: &Self) -> Option { 228 | self.0.partial_cmp(&other.0) 229 | } 230 | } 231 | impl Distribution for Standard { 232 | fn sample(&self, rng: &mut R) -> Float { 233 | Float(rng.gen()) 234 | } 235 | } 236 | -------------------------------------------------------------------------------- /src/radix_sort.wgsl: -------------------------------------------------------------------------------- 1 | // shader implementing gpu radix sort. More information in the beginning of gpu_rs.rs 2 | // info: 3 | 4 | // also the workgroup sizes are added in these prepasses 5 | // before the pipeline is started the following constant definitionis are prepended to this shadercode 6 | 7 | // const histogram_sg_size 8 | // const histogram_wg_size 9 | // const rs_radix_log2 10 | // const rs_radix_size 11 | // const rs_keyval_size 12 | // const rs_histogram_block_rows 13 | // const rs_scatter_block_rows 14 | 15 | struct GeneralInfo { 16 | num_keys: u32, 17 | padded_size: u32, 18 | even_pass: u32, 19 | odd_pass: u32, 20 | }; 21 | 22 | @group(0) @binding(0) 23 | var infos: GeneralInfo; 24 | @group(0) @binding(1) 25 | var histograms : array>; 26 | @group(0) @binding(2) 27 | var keys : array; 28 | @group(0) @binding(3) 29 | var keys_b : array; 30 | @group(0) @binding(4) 31 | var payload_a : array; 32 | @group(0) @binding(5) 33 | var payload_b : array; 34 | 35 | // layout of the histograms buffer 36 | // +---------------------------------+ <-- 0 37 | // | histograms[keyval_size] | 38 | // +---------------------------------+ <-- keyval_size * histo_size 39 | // | partitions[scatter_blocks_ru-1] | 40 | // +---------------------------------+ <-- (keyval_size + scatter_blocks_ru - 1) * histo_size 41 | // | workgroup_ids[keyval_size] | 42 | // +---------------------------------+ <-- (keyval_size + scatter_blocks_ru - 1) * histo_size + workgroup_ids_size 43 | 44 | // -------------------------------------------------------------------------------------------------------------- 45 | // Filling histograms and keys with default values (also resets the pass infos for odd and even scattering) 46 | // -------------------------------------------------------------------------------------------------------------- 47 | @compute @workgroup_size({histogram_wg_size}) 48 | fn zero_histograms(@builtin(global_invocation_id) gid: vec3, @builtin(num_workgroups) nwg: vec3) { 49 | if gid.x == 0u { 50 | infos.even_pass = 0u; 51 | infos.odd_pass = 1u; // has to be one, as on the first call to even pass + 1 % 2 is calculated 52 | } 53 | // here the histograms are set to zero and the partitions are set to 0xfffffffff to avoid sorting problems 54 | let scatter_wg_size = histogram_wg_size; 55 | let scatter_block_kvs = scatter_wg_size * rs_scatter_block_rows; 56 | let scatter_blocks_ru = (infos.num_keys + scatter_block_kvs - 1u) / scatter_block_kvs; 57 | 58 | let histo_size = rs_radix_size; 59 | var n = (rs_keyval_size + scatter_blocks_ru - 1u) * histo_size; 60 | let b = n; 61 | if infos.num_keys < infos.padded_size { 62 | n += infos.padded_size - infos.num_keys; 63 | } 64 | 65 | let line_size = nwg.x * {histogram_wg_size}u; 66 | for (var cur_index = gid.x; cur_index < n; cur_index += line_size){ 67 | if cur_index >= n { 68 | return; 69 | } 70 | 71 | if cur_index < rs_keyval_size * histo_size { 72 | atomicStore(&histograms[cur_index], 0u); 73 | } 74 | else if cur_index < b { 75 | atomicStore(&histograms[cur_index], 0u); 76 | } 77 | else { 78 | keys[infos.num_keys + cur_index - b] = 0xFFFFFFFFu; 79 | } 80 | } 81 | } 82 | 83 | // -------------------------------------------------------------------------------------------------------------- 84 | // Calculating the histograms 85 | // -------------------------------------------------------------------------------------------------------------- 86 | var smem : array, rs_radix_size>; 87 | var kv : array; 88 | fn zero_smem(lid: u32) { 89 | if lid < rs_radix_size { 90 | atomicStore(&smem[lid], 0u); 91 | } 92 | } 93 | fn histogram_pass(pass_: u32, lid: u32) { 94 | zero_smem(lid); 95 | workgroupBarrier(); 96 | 97 | for (var j = 0u; j < rs_histogram_block_rows; j++) { 98 | let u_val = bitcast(kv[j]); 99 | let digit = extractBits(u_val, pass_ * rs_radix_log2, rs_radix_log2); 100 | atomicAdd(&smem[digit], 1u); 101 | } 102 | 103 | workgroupBarrier(); 104 | let histogram_offset = rs_radix_size * pass_ + lid; 105 | if lid < rs_radix_size && atomicLoad(&smem[lid]) >= 0u { 106 | atomicAdd(&histograms[histogram_offset], atomicLoad(&smem[lid])); 107 | } 108 | } 109 | 110 | // the workgrpu_size can be gotten on the cpu by by calling pipeline.get_bind_group_layout(0).unwrap().get_local_workgroup_size(); 111 | fn fill_kv(wid: u32, lid: u32) { 112 | let rs_block_keyvals: u32 = rs_histogram_block_rows * histogram_wg_size; 113 | let kv_in_offset = wid * rs_block_keyvals + lid; 114 | for (var i = 0u; i < rs_histogram_block_rows; i++) { 115 | let pos = kv_in_offset + i * histogram_wg_size; 116 | kv[i] = keys[pos]; 117 | } 118 | } 119 | fn fill_kv_keys_b(wid: u32, lid: u32) { 120 | let rs_block_keyvals: u32 = rs_histogram_block_rows * histogram_wg_size; 121 | let kv_in_offset = wid * rs_block_keyvals + lid; 122 | for (var i = 0u; i < rs_histogram_block_rows; i++) { 123 | let pos = kv_in_offset + i * histogram_wg_size; 124 | kv[i] = keys_b[pos]; 125 | } 126 | } 127 | @compute @workgroup_size({histogram_wg_size}) 128 | fn calculate_histogram(@builtin(workgroup_id) wid: vec3, @builtin(local_invocation_id) lid: vec3) { 129 | // efficient loading of multiple values 130 | fill_kv(wid.x, lid.x); 131 | 132 | // Accumulate and store histograms for passes 133 | histogram_pass(3u, lid.x); 134 | histogram_pass(2u, lid.x); 135 | histogram_pass(1u, lid.x); 136 | histogram_pass(0u, lid.x); 137 | } 138 | 139 | // -------------------------------------------------------------------------------------------------------------- 140 | // Prefix sum over histogram 141 | // -------------------------------------------------------------------------------------------------------------- 142 | fn prefix_reduce_smem(lid: u32) { 143 | var offset = 1u; 144 | for (var d = rs_radix_size >> 1u; d > 0u; d = d >> 1u) { // sum in place tree 145 | workgroupBarrier(); 146 | if lid < d { 147 | let ai = offset * (2u * lid + 1u) - 1u; 148 | let bi = offset * (2u * lid + 2u) - 1u; 149 | atomicAdd(&smem[bi], atomicLoad(&smem[ai])); 150 | } 151 | offset = offset << 1u; 152 | } 153 | 154 | if lid == 0u { 155 | atomicStore(&smem[rs_radix_size - 1u], 0u); 156 | } // clear the last element 157 | 158 | for (var d = 1u; d < rs_radix_size; d = d << 1u) { 159 | offset = offset >> 1u; 160 | workgroupBarrier(); 161 | if lid < d { 162 | let ai = offset * (2u * lid + 1u) - 1u; 163 | let bi = offset * (2u * lid + 2u) - 1u; 164 | 165 | let t = atomicLoad(&smem[ai]); 166 | atomicStore(&smem[ai], atomicLoad(&smem[bi])); 167 | atomicAdd(&smem[bi], t); 168 | } 169 | } 170 | } 171 | @compute @workgroup_size({prefix_wg_size}) 172 | fn prefix_histogram(@builtin(workgroup_id) wid: vec3, @builtin(local_invocation_id) lid: vec3) { 173 | // the work group id is the pass, and is inverted in the next line, such that pass 3 is at the first position in the histogram buffer 174 | let histogram_base = (rs_keyval_size - 1u - wid.x) * rs_radix_size; 175 | let histogram_offset = histogram_base + lid.x; 176 | 177 | // the following coode now corresponds to the prefix calc code in fuchsia/../shaders/prefix.h 178 | // however the implementation is taken from https://www.eecs.umich.edu/courses/eecs570/hw/parprefix.pdf listing 2 (better overview, nw subgroup arithmetic) 179 | // this also means that only half the amount of workgroups is spawned (one workgroup calculates for 2 positioons) 180 | // the smemory is used from the previous section 181 | atomicStore(&smem[lid.x], atomicLoad(&histograms[histogram_offset])); 182 | atomicStore(&smem[lid.x + {prefix_wg_size}u], atomicLoad(&histograms[histogram_offset + {prefix_wg_size}u])); 183 | 184 | prefix_reduce_smem(lid.x); 185 | workgroupBarrier(); 186 | 187 | atomicStore(&histograms[histogram_offset], atomicLoad(&smem[lid.x])); 188 | atomicStore(&histograms[histogram_offset + {prefix_wg_size}u], atomicLoad(&smem[lid.x + {prefix_wg_size}u])); 189 | } 190 | 191 | // -------------------------------------------------------------------------------------------------------------- 192 | // Scattering the keys 193 | // -------------------------------------------------------------------------------------------------------------- 194 | // General note: Only 2 sweeps needed here 195 | var scatter_smem: array; // note: rs_mem_dwords is caclulated in the beginngin of gpu_rs.rs 196 | // | Dwords | Bytes 197 | // ----------+-------------------------------------------+-------- 198 | // Lookback | 256 | 1 KB 199 | // Histogram | 256 | 1 KB 200 | // Prefix | 4-84 | 16-336 201 | // Reorder | RS_WORKGROUP_SIZE * RS_SCATTER_BLOCK_ROWS | 2-8 KB 202 | fn partitions_base_offset() -> u32 { return rs_keyval_size * rs_radix_size;} 203 | fn smem_prefix_offset() -> u32 { return rs_radix_size + rs_radix_size;} 204 | fn rs_prefix_sweep_0(idx: u32) -> u32 { return scatter_smem[smem_prefix_offset() + rs_mem_sweep_0_offset + idx];} 205 | fn rs_prefix_sweep_1(idx: u32) -> u32 { return scatter_smem[smem_prefix_offset() + rs_mem_sweep_1_offset + idx];} 206 | fn rs_prefix_sweep_2(idx: u32) -> u32 { return scatter_smem[smem_prefix_offset() + rs_mem_sweep_2_offset + idx];} 207 | fn rs_prefix_load(lid: u32, idx: u32) -> u32 { return scatter_smem[rs_radix_size + lid + idx];} 208 | fn rs_prefix_store(lid: u32, idx: u32, val: u32) { scatter_smem[rs_radix_size + lid + idx] = val;} 209 | fn is_first_local_invocation(lid: u32) -> bool { return lid == 0u;} 210 | 211 | fn histogram_load(digit: u32) -> u32 { 212 | return atomicLoad(&smem[digit]); 213 | } 214 | 215 | fn histogram_store(digit: u32, count: u32) { 216 | atomicStore(&smem[digit], count); 217 | } 218 | 219 | 220 | const rs_partition_mask_status : u32 = 0xC0000000u; 221 | const rs_partition_mask_count : u32 = 0x3FFFFFFFu; 222 | var kr : array; 223 | var pv : array; 224 | 225 | fn fill_kv_even(wid: u32, lid: u32) { 226 | let subgroup_id = lid / histogram_sg_size; 227 | let subgroup_invoc_id = lid - subgroup_id * histogram_sg_size; 228 | let subgroup_keyvals = rs_scatter_block_rows * histogram_sg_size; 229 | let rs_block_keyvals: u32 = rs_histogram_block_rows * histogram_wg_size; 230 | let kv_in_offset = wid * rs_block_keyvals + subgroup_id * subgroup_keyvals + subgroup_invoc_id; 231 | for (var i = 0u; i < rs_histogram_block_rows; i++) { 232 | let pos = kv_in_offset + i * histogram_sg_size; 233 | kv[i] = keys[pos]; 234 | } 235 | for (var i = 0u; i < rs_histogram_block_rows; i++) { 236 | let pos = kv_in_offset + i * histogram_sg_size; 237 | pv[i] = payload_a[pos]; 238 | } 239 | } 240 | 241 | fn fill_kv_odd(wid: u32, lid: u32) { 242 | let subgroup_id = lid / histogram_sg_size; 243 | let subgroup_invoc_id = lid - subgroup_id * histogram_sg_size; 244 | let subgroup_keyvals = rs_scatter_block_rows * histogram_sg_size; 245 | let rs_block_keyvals: u32 = rs_histogram_block_rows * histogram_wg_size; 246 | let kv_in_offset = wid * rs_block_keyvals + subgroup_id * subgroup_keyvals + subgroup_invoc_id; 247 | for (var i = 0u; i < rs_histogram_block_rows; i++) { 248 | let pos = kv_in_offset + i * histogram_sg_size; 249 | kv[i] = keys_b[pos]; 250 | } 251 | for (var i = 0u; i < rs_histogram_block_rows; i++) { 252 | let pos = kv_in_offset + i * histogram_sg_size; 253 | pv[i] = payload_b[pos]; 254 | } 255 | } 256 | fn scatter(pass_: u32, lid: vec3, gid: vec3, wid: vec3, nwg: vec3, partition_status_invalid: u32, partition_status_reduction: u32, partition_status_prefix: u32) { 257 | let partition_mask_invalid = partition_status_invalid << 30u; 258 | let partition_mask_reduction = partition_status_reduction << 30u; 259 | let partition_mask_prefix = partition_status_prefix << 30u; 260 | // kv_filling is done in the scatter_even and scatter_odd functions to account for front and backbuffer switch 261 | // in the reference there is a nulling of the smmem here, was moved to line 251 as smem is used in the code until then 262 | 263 | // The following implements conceptually the same as the 264 | // Emulate a "match" operation with broadcasts for small subgroup sizes (line 665 ff in scatter.glsl) 265 | // The difference however is, that instead of using subrgoupBroadcast each thread stores 266 | // its current number in the smem at lid.x, and then looks up their neighbouring values of the subgroup 267 | let subgroup_id = lid.x / histogram_sg_size; 268 | let subgroup_offset = subgroup_id * histogram_sg_size; 269 | let subgroup_tid = lid.x - subgroup_offset; 270 | let subgroup_count = {scatter_wg_size}u / histogram_sg_size; 271 | for (var i = 0u; i < rs_scatter_block_rows; i++) { 272 | let u_val = bitcast(kv[i]); 273 | let digit = extractBits(u_val, pass_ * rs_radix_log2, rs_radix_log2); 274 | atomicStore(&smem[lid.x], digit); 275 | var count = 0u; 276 | var rank = 0u; 277 | 278 | for (var j = 0u; j < histogram_sg_size; j++) { 279 | if atomicLoad(&smem[subgroup_offset + j]) == digit { 280 | count += 1u; 281 | if j <= subgroup_tid { 282 | rank += 1u; 283 | } 284 | } 285 | } 286 | 287 | kr[i] = (count << 16u) | rank; 288 | } 289 | 290 | zero_smem(lid.x); // now zeroing the smmem as we are now accumulating the histogram there 291 | workgroupBarrier(); 292 | 293 | // The final histogram is stored in the smem buffer 294 | for (var i = 0u; i < subgroup_count; i++) { 295 | if subgroup_id == i { 296 | for (var j = 0u; j < rs_scatter_block_rows; j++) { 297 | let v = bitcast(kv[j]); 298 | let digit = extractBits(v, pass_ * rs_radix_log2, rs_radix_log2); 299 | let prev = histogram_load(digit); 300 | let rank = kr[j] & 0xFFFFu; 301 | let count = kr[j] >> 16u; 302 | kr[j] = prev + rank; 303 | 304 | if rank == count { 305 | histogram_store(digit, (prev + count)); 306 | } 307 | 308 | // TODO: check if the barrier here is needed 309 | } 310 | } 311 | workgroupBarrier(); 312 | } 313 | // kr filling is now done and contains the total offset for each value to be able to 314 | // move the values into order without having any collisions 315 | 316 | // we do not check for single work groups (is currently not assumed to occur very often) 317 | let partition_offset = lid.x + partitions_base_offset(); // is correct, the partitions pointer does not change 318 | let partition_base = wid.x * rs_radix_size; 319 | if wid.x == 0u { 320 | // special treating for the first workgroup as the data might be read back by later workgroups 321 | // corresponds to rs_first_prefix_store 322 | let hist_offset = pass_ * rs_radix_size + lid.x; 323 | if lid.x < rs_radix_size { 324 | // let exc = histograms[hist_offset]; 325 | let exc = atomicLoad(&histograms[hist_offset]); 326 | let red = histogram_load(lid.x);// scatter_smem[rs_keyval_size + lid.x]; 327 | 328 | scatter_smem[lid.x] = exc; 329 | 330 | let inc = exc + red; 331 | 332 | atomicStore(&histograms[partition_offset], inc | partition_mask_prefix); 333 | } 334 | } 335 | else { 336 | // standard case for the "inbetween" workgroups 337 | 338 | // rs_reduction_store, only for inbetween workgroups 339 | if lid.x < rs_radix_size && wid.x < nwg.x - 1u { 340 | let red = histogram_load(lid.x); 341 | atomicStore(&histograms[partition_offset + partition_base], red | partition_mask_reduction); 342 | } 343 | 344 | // rs_loopback_store 345 | if lid.x < rs_radix_size { 346 | var partition_base_prev = partition_base - rs_radix_size; 347 | var exc = 0u; 348 | 349 | // Note: Each workgroup invocation can proceed independently. 350 | // Subgroups and workgroups do NOT have to coordinate. 351 | while true { 352 | //let prev = atomicLoad(&histograms[partition_offset]);// histograms[partition_offset + partition_base_prev]; 353 | let prev = atomicLoad(&histograms[partition_base_prev + partition_offset]);// histograms[partition_offset + partition_base_prev]; 354 | if (prev & rs_partition_mask_status) == partition_mask_invalid { 355 | continue; 356 | } 357 | exc += prev & rs_partition_mask_count; 358 | if (prev & rs_partition_mask_status) != partition_mask_prefix { 359 | // continue accumulating reduction 360 | partition_base_prev -= rs_radix_size; 361 | continue; 362 | } 363 | 364 | // otherwise save the exclusive scan and atomically transform the 365 | // reduction into an inclusive prefix status math: reduction + 1 = prefix 366 | scatter_smem[lid.x] = exc; 367 | 368 | if wid.x < nwg.x - 1u { // only store when inbetween, skip for last workgrup 369 | atomicAdd(&histograms[partition_offset + partition_base], exc | (1u << 30u)); 370 | } 371 | break; 372 | } 373 | } 374 | } 375 | // special case for last workgroup is also done in the "inbetween" case 376 | 377 | // compute exclusive prefix scan of histogram 378 | // corresponds to rs_prefix 379 | // TODO make sure that the data is put into smem 380 | prefix_reduce_smem(lid.x); 381 | workgroupBarrier(); 382 | 383 | // convert keyval rank to local index, corresponds to rs_rank_to_local 384 | for (var i = 0u; i < rs_scatter_block_rows; i++) { 385 | let v = bitcast(kv[i]); 386 | let digit = extractBits(v, pass_ * rs_radix_log2, rs_radix_log2); 387 | let exc = histogram_load(digit); 388 | let idx = exc + kr[i]; 389 | 390 | kr[i] |= (idx << 16u); 391 | } 392 | workgroupBarrier(); 393 | 394 | // reorder kv[] and kr[], corresponds to rs_reorder 395 | let smem_reorder_offset = rs_radix_size; 396 | let smem_base = smem_reorder_offset + lid.x; // as we are in smem, the radix_size offset is not needed 397 | 398 | // keyvalues ---------------------------------------------- 399 | // store keyval to sorted location 400 | for (var j = 0u; j < rs_scatter_block_rows; j++) { 401 | let smem_idx = smem_reorder_offset + (kr[j] >> 16u) - 1u; 402 | 403 | scatter_smem[smem_idx] = bitcast(kv[j]); 404 | } 405 | workgroupBarrier(); 406 | 407 | // Load keyval dword from sorted location 408 | for (var j = 0u; j < rs_scatter_block_rows; j++) { 409 | kv[j] = scatter_smem[smem_base + j * {scatter_wg_size}u]; 410 | } 411 | workgroupBarrier(); 412 | // payload ---------------------------------------------- 413 | // store payload to sorted location 414 | for (var j = 0u; j < rs_scatter_block_rows; j++) { 415 | let smem_idx = smem_reorder_offset + (kr[j] >> 16u) - 1u; 416 | 417 | scatter_smem[smem_idx] = pv[j]; 418 | } 419 | workgroupBarrier(); 420 | 421 | // Load payload dword from sorted location 422 | for (var j = 0u; j < rs_scatter_block_rows; j++) { 423 | pv[j] = scatter_smem[smem_base + j * {scatter_wg_size}u]; 424 | } 425 | workgroupBarrier(); 426 | 427 | // store the digit-index to sorted location 428 | for (var i = 0u; i < rs_scatter_block_rows; i++) { 429 | let smem_idx = smem_reorder_offset + (kr[i] >> 16u) - 1u; 430 | scatter_smem[smem_idx] = kr[i]; 431 | } 432 | workgroupBarrier(); 433 | 434 | // Load kr[] from sorted location -- we only need the rank 435 | for (var i = 0u; i < rs_scatter_block_rows; i++) { 436 | kr[i] = scatter_smem[smem_base + i * {scatter_wg_size}u] & 0xFFFFu; 437 | } 438 | 439 | // convert local index to a global index, corresponds to rs_local_to_global 440 | for (var i = 0u; i < rs_scatter_block_rows; i++) { 441 | let v = bitcast(kv[i]); 442 | let digit = extractBits(v, pass_ * rs_radix_log2, rs_radix_log2); 443 | let exc = scatter_smem[digit]; 444 | 445 | kr[i] += exc - 1u; 446 | } 447 | 448 | // the storing is done in the scatter_even and scatter_odd functions as the front and back buffer changes 449 | } 450 | 451 | @compute @workgroup_size({scatter_wg_size}) 452 | fn scatter_even(@builtin(workgroup_id) wid: vec3, @builtin(local_invocation_id) lid: vec3, @builtin(global_invocation_id) gid: vec3, @builtin(num_workgroups) nwg: vec3) { 453 | if gid.x == 0u { 454 | infos.odd_pass = (infos.odd_pass + 1u) % 2u; // for this to work correctly the odd_pass has to start 1 455 | } 456 | let cur_pass = infos.even_pass * 2u; 457 | 458 | // load from keys, store to keys_b 459 | fill_kv_even(wid.x, lid.x); 460 | 461 | let partition_status_invalid = 0u; 462 | let partition_status_reduction = 1u; 463 | let partition_status_prefix = 2u; 464 | scatter(cur_pass, lid, gid, wid, nwg, partition_status_invalid, partition_status_reduction, partition_status_prefix); 465 | 466 | // store keyvals to their new locations, corresponds to rs_store 467 | for (var i = 0u; i < rs_scatter_block_rows; i++) { 468 | keys_b[kr[i]] = kv[i]; 469 | } 470 | for (var i = 0u; i < rs_scatter_block_rows; i++) { 471 | payload_b[kr[i]] = pv[i]; 472 | } 473 | } 474 | @compute @workgroup_size({scatter_wg_size}) 475 | fn scatter_odd(@builtin(workgroup_id) wid: vec3, @builtin(local_invocation_id) lid: vec3, @builtin(global_invocation_id) gid: vec3, @builtin(num_workgroups) nwg: vec3) { 476 | if gid.x == 0u { 477 | infos.even_pass = (infos.even_pass + 1u) % 2u; // for this to work correctly the even_pass has to start at 0 478 | } 479 | let cur_pass = infos.odd_pass * 2u + 1u; 480 | 481 | // load from keys_b, store to keys 482 | fill_kv_odd(wid.x, lid.x); 483 | 484 | let partition_status_invalid = 2u; 485 | let partition_status_reduction = 3u; 486 | let partition_status_prefix = 0u; 487 | scatter(cur_pass, lid, gid, wid, nwg, partition_status_invalid, partition_status_reduction, partition_status_prefix); 488 | 489 | // store keyvals to their new locations, corresponds to rs_store 490 | for (var i = 0u; i < rs_scatter_block_rows; i++) { 491 | keys[kr[i]] = kv[i]; 492 | } 493 | for (var i = 0u; i < rs_scatter_block_rows; i++) { 494 | payload_a[kr[i]] = pv[i]; 495 | } 496 | 497 | // the indirect buffer is reset after scattering via write buffer, see record_scatter_indirect for details 498 | } 499 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![doc = include_str!("../README.md")] 2 | /* 3 | This file implements a gpu version of radix sort. A good introduction to general purpose radix sort can 4 | be found here: http://www.codercorner.com/RadixSortRevisited.htm 5 | 6 | The gpu radix sort implemented here is a re-implementation of the Vulkan radix sort found in the fuchsia repos: https://fuchsia.googlesource.com/fuchsia/+/refs/heads/main/src/graphics/lib/compute/radix_sort/ 7 | Currently only the sorting for 32-bit key-value pairs is implemented 8 | 9 | All shaders can be found in radix_sort.wgsl 10 | */ 11 | 12 | use std::{ 13 | mem, 14 | num::{NonZeroU32, NonZeroU64}, 15 | }; 16 | pub mod utils; 17 | 18 | use bytemuck::bytes_of; 19 | use wgpu::{util::DeviceExt, ComputePassDescriptor}; 20 | 21 | // IMPORTANT: the following constants have to be synced with the numbers in radix_sort.wgsl 22 | 23 | /// workgroup size of histogram shader 24 | const HISTOGRAM_WG_SIZE: u32 = 256; 25 | 26 | /// one thread operates on 2 prefixes at the same time 27 | const PREFIX_WG_SIZE: u32 = 1 << 7; 28 | 29 | /// scatter compute shader work group size 30 | const SCATTER_WG_SIZE: u32 = 1 << 8; 31 | 32 | /// we sort 8 bits per pass 33 | const RS_RADIX_LOG2: u32 = 8; 34 | 35 | /// 256 entries into the radix table 36 | const RS_RADIX_SIZE: u32 = 1 << RS_RADIX_LOG2; 37 | 38 | /// number of bytes our keys and values have 39 | const RS_KEYVAL_SIZE: u32 = 32 / RS_RADIX_LOG2; 40 | 41 | /// TODO describe me 42 | const RS_HISTOGRAM_BLOCK_ROWS: u32 = 15; 43 | 44 | /// DO NOT CHANGE, shader assume this!!! 45 | const RS_SCATTER_BLOCK_ROWS: u32 = RS_HISTOGRAM_BLOCK_ROWS; 46 | 47 | /// number of elements scattered by one work group 48 | const SCATTER_BLOCK_KVS: u32 = HISTOGRAM_WG_SIZE * RS_SCATTER_BLOCK_ROWS; 49 | 50 | /// number of elements scattered by one work group 51 | pub const HISTO_BLOCK_KVS: u32 = HISTOGRAM_WG_SIZE * RS_HISTOGRAM_BLOCK_ROWS; 52 | 53 | /// bytes per value 54 | /// currently only 4 byte values are allowed 55 | const BYTES_PER_PAYLOAD_ELEM: u32 = 4; 56 | 57 | /// number of passed used for sorting 58 | /// we sort 8 bits per pass so 4 passes are required for a 32 bit value 59 | const NUM_PASSES: u32 = BYTES_PER_PAYLOAD_ELEM; 60 | 61 | /// Sorting pipeline. It can be used to sort key-value pairs stored in [SortBuffers] 62 | pub struct GPUSorter { 63 | zero_p: wgpu::ComputePipeline, 64 | histogram_p: wgpu::ComputePipeline, 65 | prefix_p: wgpu::ComputePipeline, 66 | scatter_even_p: wgpu::ComputePipeline, 67 | scatter_odd_p: wgpu::ComputePipeline, 68 | } 69 | 70 | impl GPUSorter { 71 | pub fn new(device: &wgpu::Device, subgroup_size: u32) -> Self { 72 | // special variables for scatter shade 73 | let histogram_sg_size = subgroup_size; 74 | let rs_sweep_0_size = RS_RADIX_SIZE / histogram_sg_size; 75 | let rs_sweep_1_size = rs_sweep_0_size / histogram_sg_size; 76 | let rs_sweep_2_size = rs_sweep_1_size / histogram_sg_size; 77 | let rs_sweep_size = rs_sweep_0_size + rs_sweep_1_size + rs_sweep_2_size; 78 | let _rs_smem_phase_1 = RS_RADIX_SIZE + RS_RADIX_SIZE + rs_sweep_size; 79 | let rs_smem_phase_2 = RS_RADIX_SIZE + RS_SCATTER_BLOCK_ROWS * SCATTER_WG_SIZE; 80 | // rs_smem_phase_2 will always be larger, so always use phase2 81 | let rs_mem_dwords = rs_smem_phase_2; 82 | let rs_mem_sweep_0_offset = 0; 83 | let rs_mem_sweep_1_offset = rs_mem_sweep_0_offset + rs_sweep_0_size; 84 | let rs_mem_sweep_2_offset = rs_mem_sweep_1_offset + rs_sweep_1_size; 85 | 86 | let bind_group_layout = Self::bind_group_layout(device); 87 | 88 | let pipeline_layout: wgpu::PipelineLayout = 89 | device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { 90 | label: Some("radix sort pipeline layout"), 91 | bind_group_layouts: &[&bind_group_layout], 92 | push_constant_ranges: &[], 93 | }); 94 | 95 | let raw_shader: &str = include_str!("radix_sort.wgsl"); 96 | 97 | // TODO replace with this with pipeline-overridable constants once they are available 98 | let shader_w_const = format!( 99 | "const histogram_sg_size: u32 = {:}u;\n\ 100 | const histogram_wg_size: u32 = {:}u;\n\ 101 | const rs_radix_log2: u32 = {:}u;\n\ 102 | const rs_radix_size: u32 = {:}u;\n\ 103 | const rs_keyval_size: u32 = {:}u;\n\ 104 | const rs_histogram_block_rows: u32 = {:}u;\n\ 105 | const rs_scatter_block_rows: u32 = {:}u;\n\ 106 | const rs_mem_dwords: u32 = {:}u;\n\ 107 | const rs_mem_sweep_0_offset: u32 = {:}u;\n\ 108 | const rs_mem_sweep_1_offset: u32 = {:}u;\n\ 109 | const rs_mem_sweep_2_offset: u32 = {:}u;\n{:}", 110 | histogram_sg_size, 111 | HISTOGRAM_WG_SIZE, 112 | RS_RADIX_LOG2, 113 | RS_RADIX_SIZE, 114 | RS_KEYVAL_SIZE, 115 | RS_HISTOGRAM_BLOCK_ROWS, 116 | RS_SCATTER_BLOCK_ROWS, 117 | rs_mem_dwords, 118 | rs_mem_sweep_0_offset, 119 | rs_mem_sweep_1_offset, 120 | rs_mem_sweep_2_offset, 121 | raw_shader 122 | ); 123 | let shader_code = shader_w_const 124 | .replace( 125 | "{histogram_wg_size}", 126 | HISTOGRAM_WG_SIZE.to_string().as_str(), 127 | ) 128 | .replace("{prefix_wg_size}", PREFIX_WG_SIZE.to_string().as_str()) 129 | .replace("{scatter_wg_size}", SCATTER_WG_SIZE.to_string().as_str()); 130 | 131 | let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { 132 | label: Some("Radix sort shader"), 133 | source: wgpu::ShaderSource::Wgsl(shader_code.into()), 134 | }); 135 | let zero_p = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { 136 | label: Some("Zero the histograms"), 137 | layout: Some(&pipeline_layout), 138 | module: &shader, 139 | entry_point: Some("zero_histograms"), 140 | compilation_options: Default::default(), 141 | cache: None, 142 | }); 143 | let histogram_p = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { 144 | label: Some("calculate_histogram"), 145 | layout: Some(&pipeline_layout), 146 | module: &shader, 147 | entry_point: Some("calculate_histogram"), 148 | compilation_options: Default::default(), 149 | cache: None, 150 | }); 151 | let prefix_p = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { 152 | label: Some("prefix_histogram"), 153 | layout: Some(&pipeline_layout), 154 | module: &shader, 155 | entry_point: Some("prefix_histogram"), 156 | compilation_options: Default::default(), 157 | cache: None, 158 | }); 159 | let scatter_even_p = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { 160 | label: Some("scatter_even"), 161 | layout: Some(&pipeline_layout), 162 | module: &shader, 163 | entry_point: Some("scatter_even"), 164 | compilation_options: Default::default(), 165 | cache: None, 166 | }); 167 | let scatter_odd_p = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { 168 | label: Some("scatter_odd"), 169 | layout: Some(&pipeline_layout), 170 | module: &shader, 171 | entry_point: Some("scatter_odd"), 172 | compilation_options: Default::default(), 173 | cache: None, 174 | }); 175 | 176 | return Self { 177 | zero_p, 178 | histogram_p, 179 | prefix_p, 180 | scatter_even_p, 181 | scatter_odd_p, 182 | }; 183 | } 184 | 185 | fn bind_group_layout(device: &wgpu::Device) -> wgpu::BindGroupLayout { 186 | return device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { 187 | label: Some("radix sort bind group layout"), 188 | entries: &[ 189 | wgpu::BindGroupLayoutEntry { 190 | binding: 0, 191 | visibility: wgpu::ShaderStages::COMPUTE, 192 | ty: wgpu::BindingType::Buffer { 193 | ty: wgpu::BufferBindingType::Storage { read_only: false }, 194 | has_dynamic_offset: false, 195 | min_binding_size: Some( 196 | NonZeroU64::new(mem::size_of::() as u64).unwrap(), 197 | ), 198 | }, 199 | count: None, 200 | }, 201 | wgpu::BindGroupLayoutEntry { 202 | binding: 1, 203 | visibility: wgpu::ShaderStages::COMPUTE, 204 | ty: wgpu::BindingType::Buffer { 205 | ty: wgpu::BufferBindingType::Storage { read_only: false }, 206 | has_dynamic_offset: false, 207 | min_binding_size: None, 208 | }, 209 | count: None, 210 | }, 211 | wgpu::BindGroupLayoutEntry { 212 | binding: 2, 213 | visibility: wgpu::ShaderStages::COMPUTE, 214 | ty: wgpu::BindingType::Buffer { 215 | ty: wgpu::BufferBindingType::Storage { read_only: false }, 216 | has_dynamic_offset: false, 217 | min_binding_size: None, 218 | }, 219 | count: None, 220 | }, 221 | wgpu::BindGroupLayoutEntry { 222 | binding: 3, 223 | visibility: wgpu::ShaderStages::COMPUTE, 224 | ty: wgpu::BindingType::Buffer { 225 | ty: wgpu::BufferBindingType::Storage { read_only: false }, 226 | has_dynamic_offset: false, 227 | min_binding_size: None, 228 | }, 229 | count: None, 230 | }, 231 | wgpu::BindGroupLayoutEntry { 232 | binding: 4, 233 | visibility: wgpu::ShaderStages::COMPUTE, 234 | ty: wgpu::BindingType::Buffer { 235 | ty: wgpu::BufferBindingType::Storage { read_only: false }, 236 | has_dynamic_offset: false, 237 | min_binding_size: None, 238 | }, 239 | count: None, 240 | }, 241 | wgpu::BindGroupLayoutEntry { 242 | binding: 5, 243 | visibility: wgpu::ShaderStages::COMPUTE, 244 | ty: wgpu::BindingType::Buffer { 245 | ty: wgpu::BufferBindingType::Storage { read_only: false }, 246 | has_dynamic_offset: false, 247 | min_binding_size: None, 248 | }, 249 | count: None, 250 | }, 251 | ], 252 | }); 253 | } 254 | 255 | fn create_keyval_buffers( 256 | device: &wgpu::Device, 257 | length: u32, 258 | ) -> (wgpu::Buffer, wgpu::Buffer, wgpu::Buffer, wgpu::Buffer) { 259 | // add padding so that our buffer size is a multiple of keys_per_workgroup 260 | let count_ru_histo = keys_buffer_size(length) * RS_KEYVAL_SIZE; 261 | 262 | // creating the two needed buffers for sorting 263 | let keys = device.create_buffer(&wgpu::BufferDescriptor { 264 | label: Some("radix sort keys buffer"), 265 | size: (count_ru_histo * BYTES_PER_PAYLOAD_ELEM) as u64, 266 | usage: wgpu::BufferUsages::STORAGE 267 | | wgpu::BufferUsages::COPY_DST 268 | | wgpu::BufferUsages::COPY_SRC, 269 | mapped_at_creation: false, 270 | }); 271 | 272 | // auxiliary buffer for keys 273 | let keys_aux = device.create_buffer(&wgpu::BufferDescriptor { 274 | label: Some("radix sort keys auxiliary buffer"), 275 | size: (count_ru_histo * BYTES_PER_PAYLOAD_ELEM) as u64, 276 | usage: wgpu::BufferUsages::STORAGE, 277 | mapped_at_creation: false, 278 | }); 279 | 280 | let payload_size = length * BYTES_PER_PAYLOAD_ELEM; // make sure that we have at least 1 byte of data; 281 | let payload = device.create_buffer(&wgpu::BufferDescriptor { 282 | label: Some("radix sort payload buffer"), 283 | size: payload_size as u64, 284 | usage: wgpu::BufferUsages::STORAGE 285 | | wgpu::BufferUsages::COPY_DST 286 | | wgpu::BufferUsages::COPY_SRC, 287 | mapped_at_creation: false, 288 | }); 289 | // auxiliary buffer for payload/values 290 | let payload_aux = device.create_buffer(&wgpu::BufferDescriptor { 291 | label: Some("radix sort payload auxiliary buffer"), 292 | size: payload_size as u64, 293 | usage: wgpu::BufferUsages::STORAGE, 294 | mapped_at_creation: false, 295 | }); 296 | return (keys, keys_aux, payload, payload_aux); 297 | } 298 | 299 | // calculates and allocates a buffer that is sufficient for holding all needed information for 300 | // sorting. This includes the histograms and the temporary scatter buffer 301 | // @return: tuple containing [internal memory buffer (should be bound at shader binding 1, count_ru_histo (padded size needed for the keyval buffer)] 302 | fn create_internal_mem_buffer(&self, device: &wgpu::Device, length: u32) -> wgpu::Buffer { 303 | // currently only a few different key bits are supported, maybe has to be extended 304 | 305 | // The "internal" memory map looks like this: 306 | // +---------------------------------+ <-- 0 307 | // | histograms[keyval_size] | 308 | // +---------------------------------+ <-- keyval_size * histo_size 309 | // | partitions[scatter_blocks_ru-1] | 310 | // +---------------------------------+ <-- (keyval_size + scatter_blocks_ru - 1) * histo_size 311 | // | workgroup_ids[keyval_size] | 312 | // +---------------------------------+ <-- (keyval_size + scatter_blocks_ru - 1) * histo_size + workgroup_ids_size 313 | 314 | let scatter_blocks_ru = scatter_blocks_ru(length); 315 | 316 | let histo_size = RS_RADIX_SIZE * std::mem::size_of::() as u32; 317 | 318 | let internal_size = (RS_KEYVAL_SIZE + scatter_blocks_ru) * histo_size; // +1 safety 319 | 320 | let buffer = device.create_buffer(&wgpu::BufferDescriptor { 321 | label: Some("Internal radix sort buffer"), 322 | size: internal_size as u64, 323 | usage: wgpu::BufferUsages::STORAGE, 324 | mapped_at_creation: false, 325 | }); 326 | return buffer; 327 | } 328 | 329 | fn general_info_data(length: u32) -> SorterState { 330 | SorterState { 331 | num_keys: length, 332 | padded_size: keys_buffer_size(length), 333 | even_pass: 0, 334 | odd_pass: 0, 335 | } 336 | } 337 | 338 | fn record_calculate_histogram( 339 | &self, 340 | bind_group: &wgpu::BindGroup, 341 | length: u32, 342 | encoder: &mut wgpu::CommandEncoder, 343 | ) { 344 | // as we only deal with 32 bit float values always 4 passes are conducted 345 | let hist_blocks_ru = histo_blocks_ru(length); 346 | 347 | { 348 | let mut pass = encoder.begin_compute_pass(&ComputePassDescriptor { 349 | label: Some("zeroing the histogram"), 350 | timestamp_writes: None, 351 | }); 352 | 353 | pass.set_pipeline(&self.zero_p); 354 | pass.set_bind_group(0, bind_group, &[]); 355 | pass.dispatch_workgroups(hist_blocks_ru as u32, 1, 1); 356 | } 357 | 358 | { 359 | let mut pass = encoder.begin_compute_pass(&ComputePassDescriptor { 360 | label: Some("calculate histogram"), 361 | timestamp_writes: None, 362 | }); 363 | 364 | pass.set_pipeline(&self.histogram_p); 365 | pass.set_bind_group(0, bind_group, &[]); 366 | pass.dispatch_workgroups(hist_blocks_ru as u32, 1, 1); 367 | } 368 | } 369 | 370 | fn record_calculate_histogram_indirect( 371 | &self, 372 | bind_group: &wgpu::BindGroup, 373 | dispatch_buffer: &wgpu::Buffer, 374 | encoder: &mut wgpu::CommandEncoder, 375 | ) { 376 | { 377 | let mut pass = encoder.begin_compute_pass(&ComputePassDescriptor { 378 | label: Some("zeroing the histogram"), 379 | timestamp_writes: None, 380 | }); 381 | 382 | pass.set_pipeline(&self.zero_p); 383 | pass.set_bind_group(0, bind_group, &[]); 384 | pass.dispatch_workgroups_indirect(dispatch_buffer, 0); 385 | } 386 | 387 | { 388 | let mut pass = encoder.begin_compute_pass(&ComputePassDescriptor { 389 | label: Some("calculate histogram"), 390 | timestamp_writes: None, 391 | }); 392 | 393 | pass.set_pipeline(&self.histogram_p); 394 | pass.set_bind_group(0, bind_group, &[]); 395 | pass.dispatch_workgroups_indirect(dispatch_buffer, 0); 396 | } 397 | } 398 | 399 | // There does not exist an indirect histogram dispatch as the number of prefixes is determined by the amount of passes 400 | fn record_prefix_histogram( 401 | &self, 402 | bind_group: &wgpu::BindGroup, 403 | encoder: &mut wgpu::CommandEncoder, 404 | ) { 405 | let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { 406 | label: Some("prefix histogram"), 407 | timestamp_writes: None, 408 | }); 409 | 410 | pass.set_pipeline(&self.prefix_p); 411 | pass.set_bind_group(0, bind_group, &[]); 412 | pass.dispatch_workgroups(NUM_PASSES as u32, 1, 1); 413 | } 414 | 415 | fn record_scatter_keys( 416 | &self, 417 | bind_group: &wgpu::BindGroup, 418 | length: u32, 419 | encoder: &mut wgpu::CommandEncoder, 420 | ) { 421 | let scatter_blocks_ru = scatter_blocks_ru(length); 422 | 423 | let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { 424 | label: Some("Scatter keyvals"), 425 | timestamp_writes: None, 426 | }); 427 | 428 | pass.set_bind_group(0, bind_group, &[]); 429 | pass.set_pipeline(&self.scatter_even_p); 430 | pass.dispatch_workgroups(scatter_blocks_ru as u32, 1, 1); 431 | 432 | pass.set_pipeline(&self.scatter_odd_p); 433 | pass.dispatch_workgroups(scatter_blocks_ru as u32, 1, 1); 434 | 435 | pass.set_pipeline(&self.scatter_even_p); 436 | pass.dispatch_workgroups(scatter_blocks_ru as u32, 1, 1); 437 | 438 | pass.set_pipeline(&self.scatter_odd_p); 439 | pass.dispatch_workgroups(scatter_blocks_ru as u32, 1, 1); 440 | } 441 | 442 | fn record_scatter_keys_indirect( 443 | &self, 444 | bind_group: &wgpu::BindGroup, 445 | dispatch_buffer: &wgpu::Buffer, 446 | encoder: &mut wgpu::CommandEncoder, 447 | ) { 448 | let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { 449 | label: Some("radix sort scatter keyvals"), 450 | timestamp_writes: None, 451 | }); 452 | 453 | pass.set_bind_group(0, bind_group, &[]); 454 | pass.set_pipeline(&self.scatter_even_p); 455 | pass.dispatch_workgroups_indirect(dispatch_buffer, 0); 456 | 457 | pass.set_pipeline(&self.scatter_odd_p); 458 | pass.dispatch_workgroups_indirect(dispatch_buffer, 0); 459 | 460 | pass.set_pipeline(&self.scatter_even_p); 461 | pass.dispatch_workgroups_indirect(dispatch_buffer, 0); 462 | 463 | pass.set_pipeline(&self.scatter_odd_p); 464 | pass.dispatch_workgroups_indirect(dispatch_buffer, 0); 465 | } 466 | 467 | /// Writes sort commands to command encoder. 468 | /// If sort_first_n is not none one the first n elements are sorted 469 | /// otherwise everything is sorted. 470 | /// 471 | /// **IMPORTANT**: if less than the whole buffer is sorted the rest of the keys buffer will be be corrupted 472 | pub fn sort( 473 | &self, 474 | encoder: &mut wgpu::CommandEncoder, 475 | queue: &wgpu::Queue, 476 | sort_buffers: &SortBuffers, 477 | sort_first_n: Option, 478 | ) { 479 | let bind_group = &sort_buffers.bind_group; 480 | let num_elements = sort_first_n.unwrap_or(sort_buffers.len()); 481 | 482 | // write number of elements to buffer 483 | queue.write_buffer(&sort_buffers.state_buffer, 0, bytes_of(&num_elements)); 484 | 485 | self.record_calculate_histogram(bind_group, num_elements, encoder); 486 | self.record_prefix_histogram(bind_group, encoder); 487 | self.record_scatter_keys(bind_group, num_elements, encoder); 488 | } 489 | 490 | /// Initiates sorting with an indirect call. 491 | /// The dispatch buffer must contain the struct [wgpu::util::DispatchIndirectArgs]. 492 | /// 493 | /// number of y and z workgroups must be 1 494 | /// 495 | /// x = (N + [HISTO_BLOCK_KVS]- 1 )/[HISTO_BLOCK_KVS], 496 | /// where N are the first N elements to be sorted 497 | /// 498 | /// [SortBuffers::state_buffer] contains the number of keys that will be sorted. 499 | /// This is set to sort the whole buffer by default. 500 | /// 501 | /// **IMPORTANT**: if less than the whole buffer is sorted the rest of the keys buffer will most likely be corrupted. 502 | pub fn sort_indirect( 503 | &self, 504 | encoder: &mut wgpu::CommandEncoder, 505 | sort_buffers: &SortBuffers, 506 | dispatch_buffer: &wgpu::Buffer, 507 | ) { 508 | let bind_group = &sort_buffers.bind_group; 509 | 510 | self.record_calculate_histogram_indirect(bind_group, dispatch_buffer, encoder); 511 | self.record_prefix_histogram(bind_group, encoder); 512 | self.record_scatter_keys_indirect(bind_group, dispatch_buffer, encoder); 513 | } 514 | 515 | /// creates all buffers necessary for sorting 516 | pub fn create_sort_buffers(&self, device: &wgpu::Device, length: NonZeroU32) -> SortBuffers { 517 | let length = length.get(); 518 | 519 | let (keys_a, keys_b, payload_a, payload_b) = 520 | GPUSorter::create_keyval_buffers(&device, length); 521 | let internal_mem_buffer = self.create_internal_mem_buffer(&device, length); 522 | 523 | let uniform_infos = Self::general_info_data(length); 524 | let uniform_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { 525 | label: Some("radix sort uniform buffer"), 526 | contents: bytemuck::bytes_of(&uniform_infos), 527 | usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, 528 | }); 529 | let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { 530 | label: Some("radix sort bind group"), 531 | layout: &Self::bind_group_layout(device), 532 | entries: &[ 533 | wgpu::BindGroupEntry { 534 | binding: 0, 535 | resource: uniform_buffer.as_entire_binding(), 536 | }, 537 | wgpu::BindGroupEntry { 538 | binding: 1, 539 | resource: internal_mem_buffer.as_entire_binding(), 540 | }, 541 | wgpu::BindGroupEntry { 542 | binding: 2, 543 | resource: keys_a.as_entire_binding(), 544 | }, 545 | wgpu::BindGroupEntry { 546 | binding: 3, 547 | resource: keys_b.as_entire_binding(), 548 | }, 549 | wgpu::BindGroupEntry { 550 | binding: 4, 551 | resource: payload_a.as_entire_binding(), 552 | }, 553 | wgpu::BindGroupEntry { 554 | binding: 5, 555 | resource: payload_b.as_entire_binding(), 556 | }, 557 | ], 558 | }); 559 | // return (uniform_buffer, bind_group); 560 | SortBuffers { 561 | keys_a, 562 | keys_b, 563 | payload_a, 564 | payload_b, 565 | internal_mem_buffer, 566 | state_buffer: uniform_buffer, 567 | bind_group, 568 | length, 569 | } 570 | } 571 | } 572 | 573 | /// Struct containing information about the state of the sorter. 574 | #[repr(C)] 575 | #[derive(Clone, Copy, bytemuck::Zeroable, bytemuck::Pod)] 576 | pub struct SorterState { 577 | /// number of first n keys that will be sorted 578 | pub num_keys: u32, 579 | padded_size: u32, 580 | even_pass: u32, 581 | odd_pass: u32, 582 | } 583 | 584 | /// Struct containing all buffers necessary for sorting. 585 | /// The key and value buffers can be read and written. 586 | pub struct SortBuffers { 587 | /// keys that are sorted 588 | keys_a: wgpu::Buffer, 589 | /// intermediate key buffer for sorting 590 | #[allow(dead_code)] 591 | keys_b: wgpu::Buffer, 592 | /// value/payload buffer that is sorted 593 | payload_a: wgpu::Buffer, 594 | /// intermediate value buffer for sorting 595 | #[allow(dead_code)] 596 | payload_b: wgpu::Buffer, 597 | 598 | /// buffer used to store intermediate results like histograms and scatter partitions 599 | #[allow(dead_code)] 600 | internal_mem_buffer: wgpu::Buffer, 601 | 602 | /// state buffer used for sorting 603 | state_buffer: wgpu::Buffer, 604 | 605 | /// bind group used for sorting 606 | bind_group: wgpu::BindGroup, 607 | 608 | // number of key-value pairs 609 | length: u32, 610 | } 611 | 612 | impl SortBuffers { 613 | /// number of key-value pairs that can be stored in this buffer 614 | pub fn len(&self) -> u32 { 615 | self.length 616 | } 617 | 618 | /// Buffer storing the keys values. 619 | /// 620 | /// **WARNING**: this buffer has padding bytes at the end 621 | /// use [SortBuffers::keys_valid_size] to get the valid size. 622 | pub fn keys(&self) -> &wgpu::Buffer { 623 | &self.keys_a 624 | } 625 | 626 | /// The keys buffer has padding bytes. 627 | /// This function returns the number of bytes without padding 628 | pub fn keys_valid_size(&self) -> u64 { 629 | (self.len() * RS_KEYVAL_SIZE) as u64 630 | } 631 | 632 | /// Buffer containing the values 633 | pub fn values(&self) -> &wgpu::Buffer { 634 | &self.payload_a 635 | } 636 | 637 | /// Buffer containing a [SorterState] 638 | pub fn state_buffer(&self) -> &wgpu::Buffer { 639 | &self.state_buffer 640 | } 641 | } 642 | 643 | fn scatter_blocks_ru(n: u32) -> u32 { 644 | (n + SCATTER_BLOCK_KVS - 1) / SCATTER_BLOCK_KVS 645 | } 646 | 647 | /// number of histogram blocks required 648 | fn histo_blocks_ru(n: u32) -> u32 { 649 | (scatter_blocks_ru(n) * SCATTER_BLOCK_KVS + HISTO_BLOCK_KVS - 1) / HISTO_BLOCK_KVS 650 | } 651 | 652 | /// keys buffer must be multiple of HISTO_BLOCK_KVS 653 | fn keys_buffer_size(n: u32) -> u32 { 654 | histo_blocks_ru(n) * HISTO_BLOCK_KVS 655 | } 656 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 4 4 | 5 | [[package]] 6 | name = "aho-corasick" 7 | version = "1.1.3" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 10 | dependencies = [ 11 | "memchr", 12 | ] 13 | 14 | [[package]] 15 | name = "android_system_properties" 16 | version = "0.1.5" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" 19 | dependencies = [ 20 | "libc", 21 | ] 22 | 23 | [[package]] 24 | name = "anes" 25 | version = "0.1.6" 26 | source = "registry+https://github.com/rust-lang/crates.io-index" 27 | checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" 28 | 29 | [[package]] 30 | name = "anstream" 31 | version = "0.6.14" 32 | source = "registry+https://github.com/rust-lang/crates.io-index" 33 | checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" 34 | dependencies = [ 35 | "anstyle", 36 | "anstyle-parse", 37 | "anstyle-query", 38 | "anstyle-wincon", 39 | "colorchoice", 40 | "is_terminal_polyfill", 41 | "utf8parse", 42 | ] 43 | 44 | [[package]] 45 | name = "anstyle" 46 | version = "1.0.7" 47 | source = "registry+https://github.com/rust-lang/crates.io-index" 48 | checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" 49 | 50 | [[package]] 51 | name = "anstyle-parse" 52 | version = "0.2.4" 53 | source = "registry+https://github.com/rust-lang/crates.io-index" 54 | checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" 55 | dependencies = [ 56 | "utf8parse", 57 | ] 58 | 59 | [[package]] 60 | name = "anstyle-query" 61 | version = "1.1.0" 62 | source = "registry+https://github.com/rust-lang/crates.io-index" 63 | checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391" 64 | dependencies = [ 65 | "windows-sys", 66 | ] 67 | 68 | [[package]] 69 | name = "anstyle-wincon" 70 | version = "3.0.3" 71 | source = "registry+https://github.com/rust-lang/crates.io-index" 72 | checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" 73 | dependencies = [ 74 | "anstyle", 75 | "windows-sys", 76 | ] 77 | 78 | [[package]] 79 | name = "arrayvec" 80 | version = "0.7.4" 81 | source = "registry+https://github.com/rust-lang/crates.io-index" 82 | checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" 83 | 84 | [[package]] 85 | name = "ash" 86 | version = "0.38.0+1.3.281" 87 | source = "registry+https://github.com/rust-lang/crates.io-index" 88 | checksum = "0bb44936d800fea8f016d7f2311c6a4f97aebd5dc86f09906139ec848cf3a46f" 89 | dependencies = [ 90 | "libloading", 91 | ] 92 | 93 | [[package]] 94 | name = "atty" 95 | version = "0.2.14" 96 | source = "registry+https://github.com/rust-lang/crates.io-index" 97 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" 98 | dependencies = [ 99 | "hermit-abi", 100 | "libc", 101 | "winapi", 102 | ] 103 | 104 | [[package]] 105 | name = "autocfg" 106 | version = "1.3.0" 107 | source = "registry+https://github.com/rust-lang/crates.io-index" 108 | checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" 109 | 110 | [[package]] 111 | name = "bit-set" 112 | version = "0.8.0" 113 | source = "registry+https://github.com/rust-lang/crates.io-index" 114 | checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" 115 | dependencies = [ 116 | "bit-vec", 117 | ] 118 | 119 | [[package]] 120 | name = "bit-vec" 121 | version = "0.8.0" 122 | source = "registry+https://github.com/rust-lang/crates.io-index" 123 | checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" 124 | 125 | [[package]] 126 | name = "bitflags" 127 | version = "1.3.2" 128 | source = "registry+https://github.com/rust-lang/crates.io-index" 129 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 130 | 131 | [[package]] 132 | name = "bitflags" 133 | version = "2.10.0" 134 | source = "registry+https://github.com/rust-lang/crates.io-index" 135 | checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" 136 | 137 | [[package]] 138 | name = "block" 139 | version = "0.1.6" 140 | source = "registry+https://github.com/rust-lang/crates.io-index" 141 | checksum = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a" 142 | 143 | [[package]] 144 | name = "bumpalo" 145 | version = "3.16.0" 146 | source = "registry+https://github.com/rust-lang/crates.io-index" 147 | checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" 148 | 149 | [[package]] 150 | name = "bytemuck" 151 | version = "1.24.0" 152 | source = "registry+https://github.com/rust-lang/crates.io-index" 153 | checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4" 154 | dependencies = [ 155 | "bytemuck_derive", 156 | ] 157 | 158 | [[package]] 159 | name = "bytemuck_derive" 160 | version = "1.10.2" 161 | source = "registry+https://github.com/rust-lang/crates.io-index" 162 | checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" 163 | dependencies = [ 164 | "proc-macro2", 165 | "quote", 166 | "syn 2.0.110", 167 | ] 168 | 169 | [[package]] 170 | name = "cast" 171 | version = "0.3.0" 172 | source = "registry+https://github.com/rust-lang/crates.io-index" 173 | checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" 174 | 175 | [[package]] 176 | name = "cfg-if" 177 | version = "1.0.0" 178 | source = "registry+https://github.com/rust-lang/crates.io-index" 179 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 180 | 181 | [[package]] 182 | name = "cfg_aliases" 183 | version = "0.2.1" 184 | source = "registry+https://github.com/rust-lang/crates.io-index" 185 | checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" 186 | 187 | [[package]] 188 | name = "ciborium" 189 | version = "0.2.2" 190 | source = "registry+https://github.com/rust-lang/crates.io-index" 191 | checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" 192 | dependencies = [ 193 | "ciborium-io", 194 | "ciborium-ll", 195 | "serde", 196 | ] 197 | 198 | [[package]] 199 | name = "ciborium-io" 200 | version = "0.2.2" 201 | source = "registry+https://github.com/rust-lang/crates.io-index" 202 | checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" 203 | 204 | [[package]] 205 | name = "ciborium-ll" 206 | version = "0.2.2" 207 | source = "registry+https://github.com/rust-lang/crates.io-index" 208 | checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" 209 | dependencies = [ 210 | "ciborium-io", 211 | "half", 212 | ] 213 | 214 | [[package]] 215 | name = "clap" 216 | version = "3.2.25" 217 | source = "registry+https://github.com/rust-lang/crates.io-index" 218 | checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" 219 | dependencies = [ 220 | "bitflags 1.3.2", 221 | "clap_lex", 222 | "indexmap 1.9.3", 223 | "textwrap", 224 | ] 225 | 226 | [[package]] 227 | name = "clap_lex" 228 | version = "0.2.4" 229 | source = "registry+https://github.com/rust-lang/crates.io-index" 230 | checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" 231 | dependencies = [ 232 | "os_str_bytes", 233 | ] 234 | 235 | [[package]] 236 | name = "codespan-reporting" 237 | version = "0.12.0" 238 | source = "registry+https://github.com/rust-lang/crates.io-index" 239 | checksum = "fe6d2e5af09e8c8ad56c969f2157a3d4238cebc7c55f0a517728c38f7b200f81" 240 | dependencies = [ 241 | "serde", 242 | "termcolor", 243 | "unicode-width", 244 | ] 245 | 246 | [[package]] 247 | name = "colorchoice" 248 | version = "1.0.1" 249 | source = "registry+https://github.com/rust-lang/crates.io-index" 250 | checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" 251 | 252 | [[package]] 253 | name = "core-foundation" 254 | version = "0.10.1" 255 | source = "registry+https://github.com/rust-lang/crates.io-index" 256 | checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" 257 | dependencies = [ 258 | "core-foundation-sys", 259 | "libc", 260 | ] 261 | 262 | [[package]] 263 | name = "core-foundation-sys" 264 | version = "0.8.6" 265 | source = "registry+https://github.com/rust-lang/crates.io-index" 266 | checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" 267 | 268 | [[package]] 269 | name = "core-graphics-types" 270 | version = "0.2.0" 271 | source = "registry+https://github.com/rust-lang/crates.io-index" 272 | checksum = "3d44a101f213f6c4cdc1853d4b78aef6db6bdfa3468798cc1d9912f4735013eb" 273 | dependencies = [ 274 | "bitflags 2.10.0", 275 | "core-foundation", 276 | "libc", 277 | ] 278 | 279 | [[package]] 280 | name = "criterion" 281 | version = "0.4.0" 282 | source = "registry+https://github.com/rust-lang/crates.io-index" 283 | checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb" 284 | dependencies = [ 285 | "anes", 286 | "atty", 287 | "cast", 288 | "ciborium", 289 | "clap", 290 | "criterion-plot", 291 | "itertools", 292 | "lazy_static", 293 | "num-traits", 294 | "oorandom", 295 | "plotters", 296 | "rayon", 297 | "regex", 298 | "serde", 299 | "serde_derive", 300 | "serde_json", 301 | "tinytemplate", 302 | "walkdir", 303 | ] 304 | 305 | [[package]] 306 | name = "criterion-plot" 307 | version = "0.5.0" 308 | source = "registry+https://github.com/rust-lang/crates.io-index" 309 | checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" 310 | dependencies = [ 311 | "cast", 312 | "itertools", 313 | ] 314 | 315 | [[package]] 316 | name = "crossbeam-deque" 317 | version = "0.8.5" 318 | source = "registry+https://github.com/rust-lang/crates.io-index" 319 | checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" 320 | dependencies = [ 321 | "crossbeam-epoch", 322 | "crossbeam-utils", 323 | ] 324 | 325 | [[package]] 326 | name = "crossbeam-epoch" 327 | version = "0.9.18" 328 | source = "registry+https://github.com/rust-lang/crates.io-index" 329 | checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" 330 | dependencies = [ 331 | "crossbeam-utils", 332 | ] 333 | 334 | [[package]] 335 | name = "crossbeam-utils" 336 | version = "0.8.20" 337 | source = "registry+https://github.com/rust-lang/crates.io-index" 338 | checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" 339 | 340 | [[package]] 341 | name = "crunchy" 342 | version = "0.2.2" 343 | source = "registry+https://github.com/rust-lang/crates.io-index" 344 | checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" 345 | 346 | [[package]] 347 | name = "document-features" 348 | version = "0.2.10" 349 | source = "registry+https://github.com/rust-lang/crates.io-index" 350 | checksum = "cb6969eaabd2421f8a2775cfd2471a2b634372b4a25d41e3bd647b79912850a0" 351 | dependencies = [ 352 | "litrs", 353 | ] 354 | 355 | [[package]] 356 | name = "either" 357 | version = "1.13.0" 358 | source = "registry+https://github.com/rust-lang/crates.io-index" 359 | checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" 360 | 361 | [[package]] 362 | name = "env_filter" 363 | version = "0.1.0" 364 | source = "registry+https://github.com/rust-lang/crates.io-index" 365 | checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea" 366 | dependencies = [ 367 | "log", 368 | "regex", 369 | ] 370 | 371 | [[package]] 372 | name = "env_logger" 373 | version = "0.11.3" 374 | source = "registry+https://github.com/rust-lang/crates.io-index" 375 | checksum = "38b35839ba51819680ba087cd351788c9a3c476841207e0b8cee0b04722343b9" 376 | dependencies = [ 377 | "anstream", 378 | "anstyle", 379 | "env_filter", 380 | "humantime", 381 | "log", 382 | ] 383 | 384 | [[package]] 385 | name = "equivalent" 386 | version = "1.0.1" 387 | source = "registry+https://github.com/rust-lang/crates.io-index" 388 | checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" 389 | 390 | [[package]] 391 | name = "float-ord" 392 | version = "0.3.2" 393 | source = "registry+https://github.com/rust-lang/crates.io-index" 394 | checksum = "8ce81f49ae8a0482e4c55ea62ebbd7e5a686af544c00b9d090bba3ff9be97b3d" 395 | 396 | [[package]] 397 | name = "foldhash" 398 | version = "0.1.5" 399 | source = "registry+https://github.com/rust-lang/crates.io-index" 400 | checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" 401 | 402 | [[package]] 403 | name = "foldhash" 404 | version = "0.2.0" 405 | source = "registry+https://github.com/rust-lang/crates.io-index" 406 | checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" 407 | 408 | [[package]] 409 | name = "foreign-types" 410 | version = "0.5.0" 411 | source = "registry+https://github.com/rust-lang/crates.io-index" 412 | checksum = "d737d9aa519fb7b749cbc3b962edcf310a8dd1f4b67c91c4f83975dbdd17d965" 413 | dependencies = [ 414 | "foreign-types-macros", 415 | "foreign-types-shared", 416 | ] 417 | 418 | [[package]] 419 | name = "foreign-types-macros" 420 | version = "0.2.3" 421 | source = "registry+https://github.com/rust-lang/crates.io-index" 422 | checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" 423 | dependencies = [ 424 | "proc-macro2", 425 | "quote", 426 | "syn 2.0.110", 427 | ] 428 | 429 | [[package]] 430 | name = "foreign-types-shared" 431 | version = "0.3.1" 432 | source = "registry+https://github.com/rust-lang/crates.io-index" 433 | checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b" 434 | 435 | [[package]] 436 | name = "futures-core" 437 | version = "0.3.30" 438 | source = "registry+https://github.com/rust-lang/crates.io-index" 439 | checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" 440 | 441 | [[package]] 442 | name = "futures-intrusive" 443 | version = "0.5.0" 444 | source = "registry+https://github.com/rust-lang/crates.io-index" 445 | checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" 446 | dependencies = [ 447 | "futures-core", 448 | "lock_api", 449 | "parking_lot", 450 | ] 451 | 452 | [[package]] 453 | name = "getrandom" 454 | version = "0.2.15" 455 | source = "registry+https://github.com/rust-lang/crates.io-index" 456 | checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" 457 | dependencies = [ 458 | "cfg-if", 459 | "libc", 460 | "wasi", 461 | ] 462 | 463 | [[package]] 464 | name = "gl_generator" 465 | version = "0.14.0" 466 | source = "registry+https://github.com/rust-lang/crates.io-index" 467 | checksum = "1a95dfc23a2b4a9a2f5ab41d194f8bfda3cabec42af4e39f08c339eb2a0c124d" 468 | dependencies = [ 469 | "khronos_api", 470 | "log", 471 | "xml-rs", 472 | ] 473 | 474 | [[package]] 475 | name = "glow" 476 | version = "0.16.0" 477 | source = "registry+https://github.com/rust-lang/crates.io-index" 478 | checksum = "c5e5ea60d70410161c8bf5da3fdfeaa1c72ed2c15f8bbb9d19fe3a4fad085f08" 479 | dependencies = [ 480 | "js-sys", 481 | "slotmap", 482 | "wasm-bindgen", 483 | "web-sys", 484 | ] 485 | 486 | [[package]] 487 | name = "glutin_wgl_sys" 488 | version = "0.6.1" 489 | source = "registry+https://github.com/rust-lang/crates.io-index" 490 | checksum = "2c4ee00b289aba7a9e5306d57c2d05499b2e5dc427f84ac708bd2c090212cf3e" 491 | dependencies = [ 492 | "gl_generator", 493 | ] 494 | 495 | [[package]] 496 | name = "gpu-alloc" 497 | version = "0.6.0" 498 | source = "registry+https://github.com/rust-lang/crates.io-index" 499 | checksum = "fbcd2dba93594b227a1f57ee09b8b9da8892c34d55aa332e034a228d0fe6a171" 500 | dependencies = [ 501 | "bitflags 2.10.0", 502 | "gpu-alloc-types", 503 | ] 504 | 505 | [[package]] 506 | name = "gpu-alloc-types" 507 | version = "0.3.0" 508 | source = "registry+https://github.com/rust-lang/crates.io-index" 509 | checksum = "98ff03b468aa837d70984d55f5d3f846f6ec31fe34bbb97c4f85219caeee1ca4" 510 | dependencies = [ 511 | "bitflags 2.10.0", 512 | ] 513 | 514 | [[package]] 515 | name = "gpu-allocator" 516 | version = "0.27.0" 517 | source = "registry+https://github.com/rust-lang/crates.io-index" 518 | checksum = "c151a2a5ef800297b4e79efa4f4bec035c5f51d5ae587287c9b952bdf734cacd" 519 | dependencies = [ 520 | "log", 521 | "presser", 522 | "thiserror 1.0.69", 523 | "windows", 524 | ] 525 | 526 | [[package]] 527 | name = "gpu-descriptor" 528 | version = "0.3.2" 529 | source = "registry+https://github.com/rust-lang/crates.io-index" 530 | checksum = "b89c83349105e3732062a895becfc71a8f921bb71ecbbdd8ff99263e3b53a0ca" 531 | dependencies = [ 532 | "bitflags 2.10.0", 533 | "gpu-descriptor-types", 534 | "hashbrown 0.15.5", 535 | ] 536 | 537 | [[package]] 538 | name = "gpu-descriptor-types" 539 | version = "0.2.0" 540 | source = "registry+https://github.com/rust-lang/crates.io-index" 541 | checksum = "fdf242682df893b86f33a73828fb09ca4b2d3bb6cc95249707fc684d27484b91" 542 | dependencies = [ 543 | "bitflags 2.10.0", 544 | ] 545 | 546 | [[package]] 547 | name = "half" 548 | version = "2.7.1" 549 | source = "registry+https://github.com/rust-lang/crates.io-index" 550 | checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" 551 | dependencies = [ 552 | "cfg-if", 553 | "crunchy", 554 | "num-traits", 555 | "zerocopy", 556 | ] 557 | 558 | [[package]] 559 | name = "hashbrown" 560 | version = "0.12.3" 561 | source = "registry+https://github.com/rust-lang/crates.io-index" 562 | checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" 563 | 564 | [[package]] 565 | name = "hashbrown" 566 | version = "0.15.5" 567 | source = "registry+https://github.com/rust-lang/crates.io-index" 568 | checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" 569 | dependencies = [ 570 | "foldhash 0.1.5", 571 | ] 572 | 573 | [[package]] 574 | name = "hashbrown" 575 | version = "0.16.0" 576 | source = "registry+https://github.com/rust-lang/crates.io-index" 577 | checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" 578 | dependencies = [ 579 | "foldhash 0.2.0", 580 | ] 581 | 582 | [[package]] 583 | name = "hermit-abi" 584 | version = "0.1.19" 585 | source = "registry+https://github.com/rust-lang/crates.io-index" 586 | checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" 587 | dependencies = [ 588 | "libc", 589 | ] 590 | 591 | [[package]] 592 | name = "hexf-parse" 593 | version = "0.2.1" 594 | source = "registry+https://github.com/rust-lang/crates.io-index" 595 | checksum = "dfa686283ad6dd069f105e5ab091b04c62850d3e4cf5d67debad1933f55023df" 596 | 597 | [[package]] 598 | name = "humantime" 599 | version = "2.1.0" 600 | source = "registry+https://github.com/rust-lang/crates.io-index" 601 | checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" 602 | 603 | [[package]] 604 | name = "indexmap" 605 | version = "1.9.3" 606 | source = "registry+https://github.com/rust-lang/crates.io-index" 607 | checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" 608 | dependencies = [ 609 | "autocfg", 610 | "hashbrown 0.12.3", 611 | ] 612 | 613 | [[package]] 614 | name = "indexmap" 615 | version = "2.12.0" 616 | source = "registry+https://github.com/rust-lang/crates.io-index" 617 | checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" 618 | dependencies = [ 619 | "equivalent", 620 | "hashbrown 0.16.0", 621 | ] 622 | 623 | [[package]] 624 | name = "is_terminal_polyfill" 625 | version = "1.70.0" 626 | source = "registry+https://github.com/rust-lang/crates.io-index" 627 | checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" 628 | 629 | [[package]] 630 | name = "itertools" 631 | version = "0.10.5" 632 | source = "registry+https://github.com/rust-lang/crates.io-index" 633 | checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" 634 | dependencies = [ 635 | "either", 636 | ] 637 | 638 | [[package]] 639 | name = "itoa" 640 | version = "1.0.11" 641 | source = "registry+https://github.com/rust-lang/crates.io-index" 642 | checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" 643 | 644 | [[package]] 645 | name = "jni-sys" 646 | version = "0.3.0" 647 | source = "registry+https://github.com/rust-lang/crates.io-index" 648 | checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" 649 | 650 | [[package]] 651 | name = "js-sys" 652 | version = "0.3.82" 653 | source = "registry+https://github.com/rust-lang/crates.io-index" 654 | checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65" 655 | dependencies = [ 656 | "once_cell", 657 | "wasm-bindgen", 658 | ] 659 | 660 | [[package]] 661 | name = "khronos-egl" 662 | version = "6.0.0" 663 | source = "registry+https://github.com/rust-lang/crates.io-index" 664 | checksum = "6aae1df220ece3c0ada96b8153459b67eebe9ae9212258bb0134ae60416fdf76" 665 | dependencies = [ 666 | "libc", 667 | "libloading", 668 | "pkg-config", 669 | ] 670 | 671 | [[package]] 672 | name = "khronos_api" 673 | version = "3.1.0" 674 | source = "registry+https://github.com/rust-lang/crates.io-index" 675 | checksum = "e2db585e1d738fc771bf08a151420d3ed193d9d895a36df7f6f8a9456b911ddc" 676 | 677 | [[package]] 678 | name = "lazy_static" 679 | version = "1.5.0" 680 | source = "registry+https://github.com/rust-lang/crates.io-index" 681 | checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" 682 | 683 | [[package]] 684 | name = "libc" 685 | version = "0.2.177" 686 | source = "registry+https://github.com/rust-lang/crates.io-index" 687 | checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" 688 | 689 | [[package]] 690 | name = "libloading" 691 | version = "0.8.4" 692 | source = "registry+https://github.com/rust-lang/crates.io-index" 693 | checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d" 694 | dependencies = [ 695 | "cfg-if", 696 | "windows-targets", 697 | ] 698 | 699 | [[package]] 700 | name = "libm" 701 | version = "0.2.15" 702 | source = "registry+https://github.com/rust-lang/crates.io-index" 703 | checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" 704 | 705 | [[package]] 706 | name = "litrs" 707 | version = "0.4.1" 708 | source = "registry+https://github.com/rust-lang/crates.io-index" 709 | checksum = "b4ce301924b7887e9d637144fdade93f9dfff9b60981d4ac161db09720d39aa5" 710 | 711 | [[package]] 712 | name = "lock_api" 713 | version = "0.4.12" 714 | source = "registry+https://github.com/rust-lang/crates.io-index" 715 | checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" 716 | dependencies = [ 717 | "autocfg", 718 | "scopeguard", 719 | ] 720 | 721 | [[package]] 722 | name = "log" 723 | version = "0.4.22" 724 | source = "registry+https://github.com/rust-lang/crates.io-index" 725 | checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" 726 | 727 | [[package]] 728 | name = "malloc_buf" 729 | version = "0.0.6" 730 | source = "registry+https://github.com/rust-lang/crates.io-index" 731 | checksum = "62bb907fe88d54d8d9ce32a3cceab4218ed2f6b7d35617cafe9adf84e43919cb" 732 | dependencies = [ 733 | "libc", 734 | ] 735 | 736 | [[package]] 737 | name = "memchr" 738 | version = "2.7.4" 739 | source = "registry+https://github.com/rust-lang/crates.io-index" 740 | checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" 741 | 742 | [[package]] 743 | name = "metal" 744 | version = "0.32.0" 745 | source = "registry+https://github.com/rust-lang/crates.io-index" 746 | checksum = "00c15a6f673ff72ddcc22394663290f870fb224c1bfce55734a75c414150e605" 747 | dependencies = [ 748 | "bitflags 2.10.0", 749 | "block", 750 | "core-graphics-types", 751 | "foreign-types", 752 | "log", 753 | "objc", 754 | "paste", 755 | ] 756 | 757 | [[package]] 758 | name = "naga" 759 | version = "27.0.3" 760 | source = "registry+https://github.com/rust-lang/crates.io-index" 761 | checksum = "066cf25f0e8b11ee0df221219010f213ad429855f57c494f995590c861a9a7d8" 762 | dependencies = [ 763 | "arrayvec", 764 | "bit-set", 765 | "bitflags 2.10.0", 766 | "cfg-if", 767 | "cfg_aliases", 768 | "codespan-reporting", 769 | "half", 770 | "hashbrown 0.16.0", 771 | "hexf-parse", 772 | "indexmap 2.12.0", 773 | "libm", 774 | "log", 775 | "num-traits", 776 | "once_cell", 777 | "rustc-hash", 778 | "spirv", 779 | "thiserror 2.0.17", 780 | "unicode-ident", 781 | ] 782 | 783 | [[package]] 784 | name = "ndk-sys" 785 | version = "0.6.0+11769913" 786 | source = "registry+https://github.com/rust-lang/crates.io-index" 787 | checksum = "ee6cda3051665f1fb8d9e08fc35c96d5a244fb1be711a03b71118828afc9a873" 788 | dependencies = [ 789 | "jni-sys", 790 | ] 791 | 792 | [[package]] 793 | name = "num-traits" 794 | version = "0.2.19" 795 | source = "registry+https://github.com/rust-lang/crates.io-index" 796 | checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" 797 | dependencies = [ 798 | "autocfg", 799 | "libm", 800 | ] 801 | 802 | [[package]] 803 | name = "objc" 804 | version = "0.2.7" 805 | source = "registry+https://github.com/rust-lang/crates.io-index" 806 | checksum = "915b1b472bc21c53464d6c8461c9d3af805ba1ef837e1cac254428f4a77177b1" 807 | dependencies = [ 808 | "malloc_buf", 809 | ] 810 | 811 | [[package]] 812 | name = "once_cell" 813 | version = "1.21.3" 814 | source = "registry+https://github.com/rust-lang/crates.io-index" 815 | checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" 816 | 817 | [[package]] 818 | name = "oorandom" 819 | version = "11.1.4" 820 | source = "registry+https://github.com/rust-lang/crates.io-index" 821 | checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" 822 | 823 | [[package]] 824 | name = "ordered-float" 825 | version = "5.1.0" 826 | source = "registry+https://github.com/rust-lang/crates.io-index" 827 | checksum = "7f4779c6901a562440c3786d08192c6fbda7c1c2060edd10006b05ee35d10f2d" 828 | dependencies = [ 829 | "num-traits", 830 | ] 831 | 832 | [[package]] 833 | name = "os_str_bytes" 834 | version = "6.6.1" 835 | source = "registry+https://github.com/rust-lang/crates.io-index" 836 | checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" 837 | 838 | [[package]] 839 | name = "parking_lot" 840 | version = "0.12.3" 841 | source = "registry+https://github.com/rust-lang/crates.io-index" 842 | checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" 843 | dependencies = [ 844 | "lock_api", 845 | "parking_lot_core", 846 | ] 847 | 848 | [[package]] 849 | name = "parking_lot_core" 850 | version = "0.9.10" 851 | source = "registry+https://github.com/rust-lang/crates.io-index" 852 | checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" 853 | dependencies = [ 854 | "cfg-if", 855 | "libc", 856 | "redox_syscall", 857 | "smallvec", 858 | "windows-targets", 859 | ] 860 | 861 | [[package]] 862 | name = "paste" 863 | version = "1.0.15" 864 | source = "registry+https://github.com/rust-lang/crates.io-index" 865 | checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" 866 | 867 | [[package]] 868 | name = "pkg-config" 869 | version = "0.3.30" 870 | source = "registry+https://github.com/rust-lang/crates.io-index" 871 | checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" 872 | 873 | [[package]] 874 | name = "plotters" 875 | version = "0.3.6" 876 | source = "registry+https://github.com/rust-lang/crates.io-index" 877 | checksum = "a15b6eccb8484002195a3e44fe65a4ce8e93a625797a063735536fd59cb01cf3" 878 | dependencies = [ 879 | "num-traits", 880 | "plotters-backend", 881 | "plotters-svg", 882 | "wasm-bindgen", 883 | "web-sys", 884 | ] 885 | 886 | [[package]] 887 | name = "plotters-backend" 888 | version = "0.3.6" 889 | source = "registry+https://github.com/rust-lang/crates.io-index" 890 | checksum = "414cec62c6634ae900ea1c56128dfe87cf63e7caece0852ec76aba307cebadb7" 891 | 892 | [[package]] 893 | name = "plotters-svg" 894 | version = "0.3.6" 895 | source = "registry+https://github.com/rust-lang/crates.io-index" 896 | checksum = "81b30686a7d9c3e010b84284bdd26a29f2138574f52f5eb6f794fc0ad924e705" 897 | dependencies = [ 898 | "plotters-backend", 899 | ] 900 | 901 | [[package]] 902 | name = "pollster" 903 | version = "0.3.0" 904 | source = "registry+https://github.com/rust-lang/crates.io-index" 905 | checksum = "22686f4785f02a4fcc856d3b3bb19bf6c8160d103f7a99cc258bddd0251dc7f2" 906 | dependencies = [ 907 | "pollster-macro", 908 | ] 909 | 910 | [[package]] 911 | name = "pollster-macro" 912 | version = "0.1.0" 913 | source = "registry+https://github.com/rust-lang/crates.io-index" 914 | checksum = "ea78f0ef4193055a4b09814ce6bcb572ad1174d6023e2f00a9ea1a798d18d301" 915 | dependencies = [ 916 | "proc-macro2", 917 | "quote", 918 | "syn 1.0.109", 919 | ] 920 | 921 | [[package]] 922 | name = "portable-atomic" 923 | version = "1.11.1" 924 | source = "registry+https://github.com/rust-lang/crates.io-index" 925 | checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" 926 | 927 | [[package]] 928 | name = "portable-atomic-util" 929 | version = "0.2.4" 930 | source = "registry+https://github.com/rust-lang/crates.io-index" 931 | checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" 932 | dependencies = [ 933 | "portable-atomic", 934 | ] 935 | 936 | [[package]] 937 | name = "ppv-lite86" 938 | version = "0.2.17" 939 | source = "registry+https://github.com/rust-lang/crates.io-index" 940 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" 941 | 942 | [[package]] 943 | name = "presser" 944 | version = "0.3.1" 945 | source = "registry+https://github.com/rust-lang/crates.io-index" 946 | checksum = "e8cf8e6a8aa66ce33f63993ffc4ea4271eb5b0530a9002db8455ea6050c77bfa" 947 | 948 | [[package]] 949 | name = "proc-macro2" 950 | version = "1.0.103" 951 | source = "registry+https://github.com/rust-lang/crates.io-index" 952 | checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" 953 | dependencies = [ 954 | "unicode-ident", 955 | ] 956 | 957 | [[package]] 958 | name = "profiling" 959 | version = "1.0.15" 960 | source = "registry+https://github.com/rust-lang/crates.io-index" 961 | checksum = "43d84d1d7a6ac92673717f9f6d1518374ef257669c24ebc5ac25d5033828be58" 962 | 963 | [[package]] 964 | name = "quote" 965 | version = "1.0.36" 966 | source = "registry+https://github.com/rust-lang/crates.io-index" 967 | checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" 968 | dependencies = [ 969 | "proc-macro2", 970 | ] 971 | 972 | [[package]] 973 | name = "rand" 974 | version = "0.8.5" 975 | source = "registry+https://github.com/rust-lang/crates.io-index" 976 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 977 | dependencies = [ 978 | "libc", 979 | "rand_chacha", 980 | "rand_core", 981 | ] 982 | 983 | [[package]] 984 | name = "rand_chacha" 985 | version = "0.3.1" 986 | source = "registry+https://github.com/rust-lang/crates.io-index" 987 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 988 | dependencies = [ 989 | "ppv-lite86", 990 | "rand_core", 991 | ] 992 | 993 | [[package]] 994 | name = "rand_core" 995 | version = "0.6.4" 996 | source = "registry+https://github.com/rust-lang/crates.io-index" 997 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 998 | dependencies = [ 999 | "getrandom", 1000 | ] 1001 | 1002 | [[package]] 1003 | name = "range-alloc" 1004 | version = "0.1.3" 1005 | source = "registry+https://github.com/rust-lang/crates.io-index" 1006 | checksum = "9c8a99fddc9f0ba0a85884b8d14e3592853e787d581ca1816c91349b10e4eeab" 1007 | 1008 | [[package]] 1009 | name = "raw-window-handle" 1010 | version = "0.6.2" 1011 | source = "registry+https://github.com/rust-lang/crates.io-index" 1012 | checksum = "20675572f6f24e9e76ef639bc5552774ed45f1c30e2951e1e99c59888861c539" 1013 | 1014 | [[package]] 1015 | name = "rayon" 1016 | version = "1.10.0" 1017 | source = "registry+https://github.com/rust-lang/crates.io-index" 1018 | checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" 1019 | dependencies = [ 1020 | "either", 1021 | "rayon-core", 1022 | ] 1023 | 1024 | [[package]] 1025 | name = "rayon-core" 1026 | version = "1.12.1" 1027 | source = "registry+https://github.com/rust-lang/crates.io-index" 1028 | checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" 1029 | dependencies = [ 1030 | "crossbeam-deque", 1031 | "crossbeam-utils", 1032 | ] 1033 | 1034 | [[package]] 1035 | name = "redox_syscall" 1036 | version = "0.5.2" 1037 | source = "registry+https://github.com/rust-lang/crates.io-index" 1038 | checksum = "c82cf8cff14456045f55ec4241383baeff27af886adb72ffb2162f99911de0fd" 1039 | dependencies = [ 1040 | "bitflags 2.10.0", 1041 | ] 1042 | 1043 | [[package]] 1044 | name = "regex" 1045 | version = "1.10.5" 1046 | source = "registry+https://github.com/rust-lang/crates.io-index" 1047 | checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" 1048 | dependencies = [ 1049 | "aho-corasick", 1050 | "memchr", 1051 | "regex-automata", 1052 | "regex-syntax", 1053 | ] 1054 | 1055 | [[package]] 1056 | name = "regex-automata" 1057 | version = "0.4.7" 1058 | source = "registry+https://github.com/rust-lang/crates.io-index" 1059 | checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" 1060 | dependencies = [ 1061 | "aho-corasick", 1062 | "memchr", 1063 | "regex-syntax", 1064 | ] 1065 | 1066 | [[package]] 1067 | name = "regex-syntax" 1068 | version = "0.8.4" 1069 | source = "registry+https://github.com/rust-lang/crates.io-index" 1070 | checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" 1071 | 1072 | [[package]] 1073 | name = "renderdoc-sys" 1074 | version = "1.1.0" 1075 | source = "registry+https://github.com/rust-lang/crates.io-index" 1076 | checksum = "19b30a45b0cd0bcca8037f3d0dc3421eaf95327a17cad11964fb8179b4fc4832" 1077 | 1078 | [[package]] 1079 | name = "rustc-hash" 1080 | version = "1.1.0" 1081 | source = "registry+https://github.com/rust-lang/crates.io-index" 1082 | checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" 1083 | 1084 | [[package]] 1085 | name = "rustversion" 1086 | version = "1.0.22" 1087 | source = "registry+https://github.com/rust-lang/crates.io-index" 1088 | checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" 1089 | 1090 | [[package]] 1091 | name = "ryu" 1092 | version = "1.0.18" 1093 | source = "registry+https://github.com/rust-lang/crates.io-index" 1094 | checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" 1095 | 1096 | [[package]] 1097 | name = "same-file" 1098 | version = "1.0.6" 1099 | source = "registry+https://github.com/rust-lang/crates.io-index" 1100 | checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" 1101 | dependencies = [ 1102 | "winapi-util", 1103 | ] 1104 | 1105 | [[package]] 1106 | name = "scopeguard" 1107 | version = "1.2.0" 1108 | source = "registry+https://github.com/rust-lang/crates.io-index" 1109 | checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" 1110 | 1111 | [[package]] 1112 | name = "serde" 1113 | version = "1.0.204" 1114 | source = "registry+https://github.com/rust-lang/crates.io-index" 1115 | checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" 1116 | dependencies = [ 1117 | "serde_derive", 1118 | ] 1119 | 1120 | [[package]] 1121 | name = "serde_derive" 1122 | version = "1.0.204" 1123 | source = "registry+https://github.com/rust-lang/crates.io-index" 1124 | checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" 1125 | dependencies = [ 1126 | "proc-macro2", 1127 | "quote", 1128 | "syn 2.0.110", 1129 | ] 1130 | 1131 | [[package]] 1132 | name = "serde_json" 1133 | version = "1.0.120" 1134 | source = "registry+https://github.com/rust-lang/crates.io-index" 1135 | checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5" 1136 | dependencies = [ 1137 | "itoa", 1138 | "ryu", 1139 | "serde", 1140 | ] 1141 | 1142 | [[package]] 1143 | name = "slotmap" 1144 | version = "1.0.7" 1145 | source = "registry+https://github.com/rust-lang/crates.io-index" 1146 | checksum = "dbff4acf519f630b3a3ddcfaea6c06b42174d9a44bc70c620e9ed1649d58b82a" 1147 | dependencies = [ 1148 | "version_check", 1149 | ] 1150 | 1151 | [[package]] 1152 | name = "smallvec" 1153 | version = "1.15.1" 1154 | source = "registry+https://github.com/rust-lang/crates.io-index" 1155 | checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" 1156 | 1157 | [[package]] 1158 | name = "spirv" 1159 | version = "0.3.0+sdk-1.3.268.0" 1160 | source = "registry+https://github.com/rust-lang/crates.io-index" 1161 | checksum = "eda41003dc44290527a59b13432d4a0379379fa074b70174882adfbdfd917844" 1162 | dependencies = [ 1163 | "bitflags 2.10.0", 1164 | ] 1165 | 1166 | [[package]] 1167 | name = "static_assertions" 1168 | version = "1.1.0" 1169 | source = "registry+https://github.com/rust-lang/crates.io-index" 1170 | checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" 1171 | 1172 | [[package]] 1173 | name = "syn" 1174 | version = "1.0.109" 1175 | source = "registry+https://github.com/rust-lang/crates.io-index" 1176 | checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" 1177 | dependencies = [ 1178 | "proc-macro2", 1179 | "quote", 1180 | "unicode-ident", 1181 | ] 1182 | 1183 | [[package]] 1184 | name = "syn" 1185 | version = "2.0.110" 1186 | source = "registry+https://github.com/rust-lang/crates.io-index" 1187 | checksum = "a99801b5bd34ede4cf3fc688c5919368fea4e4814a4664359503e6015b280aea" 1188 | dependencies = [ 1189 | "proc-macro2", 1190 | "quote", 1191 | "unicode-ident", 1192 | ] 1193 | 1194 | [[package]] 1195 | name = "termcolor" 1196 | version = "1.4.1" 1197 | source = "registry+https://github.com/rust-lang/crates.io-index" 1198 | checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" 1199 | dependencies = [ 1200 | "winapi-util", 1201 | ] 1202 | 1203 | [[package]] 1204 | name = "textwrap" 1205 | version = "0.16.1" 1206 | source = "registry+https://github.com/rust-lang/crates.io-index" 1207 | checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" 1208 | 1209 | [[package]] 1210 | name = "thiserror" 1211 | version = "1.0.69" 1212 | source = "registry+https://github.com/rust-lang/crates.io-index" 1213 | checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" 1214 | dependencies = [ 1215 | "thiserror-impl 1.0.69", 1216 | ] 1217 | 1218 | [[package]] 1219 | name = "thiserror" 1220 | version = "2.0.17" 1221 | source = "registry+https://github.com/rust-lang/crates.io-index" 1222 | checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" 1223 | dependencies = [ 1224 | "thiserror-impl 2.0.17", 1225 | ] 1226 | 1227 | [[package]] 1228 | name = "thiserror-impl" 1229 | version = "1.0.69" 1230 | source = "registry+https://github.com/rust-lang/crates.io-index" 1231 | checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" 1232 | dependencies = [ 1233 | "proc-macro2", 1234 | "quote", 1235 | "syn 2.0.110", 1236 | ] 1237 | 1238 | [[package]] 1239 | name = "thiserror-impl" 1240 | version = "2.0.17" 1241 | source = "registry+https://github.com/rust-lang/crates.io-index" 1242 | checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" 1243 | dependencies = [ 1244 | "proc-macro2", 1245 | "quote", 1246 | "syn 2.0.110", 1247 | ] 1248 | 1249 | [[package]] 1250 | name = "tinytemplate" 1251 | version = "1.2.1" 1252 | source = "registry+https://github.com/rust-lang/crates.io-index" 1253 | checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" 1254 | dependencies = [ 1255 | "serde", 1256 | "serde_json", 1257 | ] 1258 | 1259 | [[package]] 1260 | name = "unicode-ident" 1261 | version = "1.0.12" 1262 | source = "registry+https://github.com/rust-lang/crates.io-index" 1263 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" 1264 | 1265 | [[package]] 1266 | name = "unicode-width" 1267 | version = "0.1.13" 1268 | source = "registry+https://github.com/rust-lang/crates.io-index" 1269 | checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" 1270 | 1271 | [[package]] 1272 | name = "utf8parse" 1273 | version = "0.2.2" 1274 | source = "registry+https://github.com/rust-lang/crates.io-index" 1275 | checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" 1276 | 1277 | [[package]] 1278 | name = "version_check" 1279 | version = "0.9.4" 1280 | source = "registry+https://github.com/rust-lang/crates.io-index" 1281 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 1282 | 1283 | [[package]] 1284 | name = "walkdir" 1285 | version = "2.5.0" 1286 | source = "registry+https://github.com/rust-lang/crates.io-index" 1287 | checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" 1288 | dependencies = [ 1289 | "same-file", 1290 | "winapi-util", 1291 | ] 1292 | 1293 | [[package]] 1294 | name = "wasi" 1295 | version = "0.11.0+wasi-snapshot-preview1" 1296 | source = "registry+https://github.com/rust-lang/crates.io-index" 1297 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 1298 | 1299 | [[package]] 1300 | name = "wasm-bindgen" 1301 | version = "0.2.105" 1302 | source = "registry+https://github.com/rust-lang/crates.io-index" 1303 | checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60" 1304 | dependencies = [ 1305 | "cfg-if", 1306 | "once_cell", 1307 | "rustversion", 1308 | "wasm-bindgen-macro", 1309 | "wasm-bindgen-shared", 1310 | ] 1311 | 1312 | [[package]] 1313 | name = "wasm-bindgen-futures" 1314 | version = "0.4.55" 1315 | source = "registry+https://github.com/rust-lang/crates.io-index" 1316 | checksum = "551f88106c6d5e7ccc7cd9a16f312dd3b5d36ea8b4954304657d5dfba115d4a0" 1317 | dependencies = [ 1318 | "cfg-if", 1319 | "js-sys", 1320 | "once_cell", 1321 | "wasm-bindgen", 1322 | "web-sys", 1323 | ] 1324 | 1325 | [[package]] 1326 | name = "wasm-bindgen-macro" 1327 | version = "0.2.105" 1328 | source = "registry+https://github.com/rust-lang/crates.io-index" 1329 | checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2" 1330 | dependencies = [ 1331 | "quote", 1332 | "wasm-bindgen-macro-support", 1333 | ] 1334 | 1335 | [[package]] 1336 | name = "wasm-bindgen-macro-support" 1337 | version = "0.2.105" 1338 | source = "registry+https://github.com/rust-lang/crates.io-index" 1339 | checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc" 1340 | dependencies = [ 1341 | "bumpalo", 1342 | "proc-macro2", 1343 | "quote", 1344 | "syn 2.0.110", 1345 | "wasm-bindgen-shared", 1346 | ] 1347 | 1348 | [[package]] 1349 | name = "wasm-bindgen-shared" 1350 | version = "0.2.105" 1351 | source = "registry+https://github.com/rust-lang/crates.io-index" 1352 | checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76" 1353 | dependencies = [ 1354 | "unicode-ident", 1355 | ] 1356 | 1357 | [[package]] 1358 | name = "web-sys" 1359 | version = "0.3.82" 1360 | source = "registry+https://github.com/rust-lang/crates.io-index" 1361 | checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1" 1362 | dependencies = [ 1363 | "js-sys", 1364 | "wasm-bindgen", 1365 | ] 1366 | 1367 | [[package]] 1368 | name = "wgpu" 1369 | version = "27.0.1" 1370 | source = "registry+https://github.com/rust-lang/crates.io-index" 1371 | checksum = "bfe68bac7cde125de7a731c3400723cadaaf1703795ad3f4805f187459cd7a77" 1372 | dependencies = [ 1373 | "arrayvec", 1374 | "bitflags 2.10.0", 1375 | "cfg-if", 1376 | "cfg_aliases", 1377 | "document-features", 1378 | "hashbrown 0.16.0", 1379 | "js-sys", 1380 | "log", 1381 | "naga", 1382 | "parking_lot", 1383 | "portable-atomic", 1384 | "profiling", 1385 | "raw-window-handle", 1386 | "smallvec", 1387 | "static_assertions", 1388 | "wasm-bindgen", 1389 | "wasm-bindgen-futures", 1390 | "web-sys", 1391 | "wgpu-core", 1392 | "wgpu-hal", 1393 | "wgpu-types", 1394 | ] 1395 | 1396 | [[package]] 1397 | name = "wgpu-core" 1398 | version = "27.0.3" 1399 | source = "registry+https://github.com/rust-lang/crates.io-index" 1400 | checksum = "27a75de515543b1897b26119f93731b385a19aea165a1ec5f0e3acecc229cae7" 1401 | dependencies = [ 1402 | "arrayvec", 1403 | "bit-set", 1404 | "bit-vec", 1405 | "bitflags 2.10.0", 1406 | "bytemuck", 1407 | "cfg_aliases", 1408 | "document-features", 1409 | "hashbrown 0.16.0", 1410 | "indexmap 2.12.0", 1411 | "log", 1412 | "naga", 1413 | "once_cell", 1414 | "parking_lot", 1415 | "portable-atomic", 1416 | "profiling", 1417 | "raw-window-handle", 1418 | "rustc-hash", 1419 | "smallvec", 1420 | "thiserror 2.0.17", 1421 | "wgpu-core-deps-apple", 1422 | "wgpu-core-deps-emscripten", 1423 | "wgpu-core-deps-windows-linux-android", 1424 | "wgpu-hal", 1425 | "wgpu-types", 1426 | ] 1427 | 1428 | [[package]] 1429 | name = "wgpu-core-deps-apple" 1430 | version = "27.0.0" 1431 | source = "registry+https://github.com/rust-lang/crates.io-index" 1432 | checksum = "0772ae958e9be0c729561d5e3fd9a19679bcdfb945b8b1a1969d9bfe8056d233" 1433 | dependencies = [ 1434 | "wgpu-hal", 1435 | ] 1436 | 1437 | [[package]] 1438 | name = "wgpu-core-deps-emscripten" 1439 | version = "27.0.0" 1440 | source = "registry+https://github.com/rust-lang/crates.io-index" 1441 | checksum = "b06ac3444a95b0813ecfd81ddb2774b66220b264b3e2031152a4a29fda4da6b5" 1442 | dependencies = [ 1443 | "wgpu-hal", 1444 | ] 1445 | 1446 | [[package]] 1447 | name = "wgpu-core-deps-windows-linux-android" 1448 | version = "27.0.0" 1449 | source = "registry+https://github.com/rust-lang/crates.io-index" 1450 | checksum = "71197027d61a71748e4120f05a9242b2ad142e3c01f8c1b47707945a879a03c3" 1451 | dependencies = [ 1452 | "wgpu-hal", 1453 | ] 1454 | 1455 | [[package]] 1456 | name = "wgpu-hal" 1457 | version = "27.0.4" 1458 | source = "registry+https://github.com/rust-lang/crates.io-index" 1459 | checksum = "5b21cb61c57ee198bc4aff71aeadff4cbb80b927beb912506af9c780d64313ce" 1460 | dependencies = [ 1461 | "android_system_properties", 1462 | "arrayvec", 1463 | "ash", 1464 | "bit-set", 1465 | "bitflags 2.10.0", 1466 | "block", 1467 | "bytemuck", 1468 | "cfg-if", 1469 | "cfg_aliases", 1470 | "core-graphics-types", 1471 | "glow", 1472 | "glutin_wgl_sys", 1473 | "gpu-alloc", 1474 | "gpu-allocator", 1475 | "gpu-descriptor", 1476 | "hashbrown 0.16.0", 1477 | "js-sys", 1478 | "khronos-egl", 1479 | "libc", 1480 | "libloading", 1481 | "log", 1482 | "metal", 1483 | "naga", 1484 | "ndk-sys", 1485 | "objc", 1486 | "once_cell", 1487 | "ordered-float", 1488 | "parking_lot", 1489 | "portable-atomic", 1490 | "portable-atomic-util", 1491 | "profiling", 1492 | "range-alloc", 1493 | "raw-window-handle", 1494 | "renderdoc-sys", 1495 | "smallvec", 1496 | "thiserror 2.0.17", 1497 | "wasm-bindgen", 1498 | "web-sys", 1499 | "wgpu-types", 1500 | "windows", 1501 | "windows-core", 1502 | ] 1503 | 1504 | [[package]] 1505 | name = "wgpu-types" 1506 | version = "27.0.1" 1507 | source = "registry+https://github.com/rust-lang/crates.io-index" 1508 | checksum = "afdcf84c395990db737f2dd91628706cb31e86d72e53482320d368e52b5da5eb" 1509 | dependencies = [ 1510 | "bitflags 2.10.0", 1511 | "bytemuck", 1512 | "js-sys", 1513 | "log", 1514 | "thiserror 2.0.17", 1515 | "web-sys", 1516 | ] 1517 | 1518 | [[package]] 1519 | name = "wgpu_sort" 1520 | version = "0.1.0" 1521 | dependencies = [ 1522 | "bytemuck", 1523 | "criterion", 1524 | "env_logger", 1525 | "float-ord", 1526 | "futures-intrusive", 1527 | "log", 1528 | "pollster", 1529 | "rand", 1530 | "wgpu", 1531 | ] 1532 | 1533 | [[package]] 1534 | name = "winapi" 1535 | version = "0.3.9" 1536 | source = "registry+https://github.com/rust-lang/crates.io-index" 1537 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 1538 | dependencies = [ 1539 | "winapi-i686-pc-windows-gnu", 1540 | "winapi-x86_64-pc-windows-gnu", 1541 | ] 1542 | 1543 | [[package]] 1544 | name = "winapi-i686-pc-windows-gnu" 1545 | version = "0.4.0" 1546 | source = "registry+https://github.com/rust-lang/crates.io-index" 1547 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 1548 | 1549 | [[package]] 1550 | name = "winapi-util" 1551 | version = "0.1.8" 1552 | source = "registry+https://github.com/rust-lang/crates.io-index" 1553 | checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" 1554 | dependencies = [ 1555 | "windows-sys", 1556 | ] 1557 | 1558 | [[package]] 1559 | name = "winapi-x86_64-pc-windows-gnu" 1560 | version = "0.4.0" 1561 | source = "registry+https://github.com/rust-lang/crates.io-index" 1562 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 1563 | 1564 | [[package]] 1565 | name = "windows" 1566 | version = "0.58.0" 1567 | source = "registry+https://github.com/rust-lang/crates.io-index" 1568 | checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6" 1569 | dependencies = [ 1570 | "windows-core", 1571 | "windows-targets", 1572 | ] 1573 | 1574 | [[package]] 1575 | name = "windows-core" 1576 | version = "0.58.0" 1577 | source = "registry+https://github.com/rust-lang/crates.io-index" 1578 | checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99" 1579 | dependencies = [ 1580 | "windows-implement", 1581 | "windows-interface", 1582 | "windows-result", 1583 | "windows-strings", 1584 | "windows-targets", 1585 | ] 1586 | 1587 | [[package]] 1588 | name = "windows-implement" 1589 | version = "0.58.0" 1590 | source = "registry+https://github.com/rust-lang/crates.io-index" 1591 | checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b" 1592 | dependencies = [ 1593 | "proc-macro2", 1594 | "quote", 1595 | "syn 2.0.110", 1596 | ] 1597 | 1598 | [[package]] 1599 | name = "windows-interface" 1600 | version = "0.58.0" 1601 | source = "registry+https://github.com/rust-lang/crates.io-index" 1602 | checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515" 1603 | dependencies = [ 1604 | "proc-macro2", 1605 | "quote", 1606 | "syn 2.0.110", 1607 | ] 1608 | 1609 | [[package]] 1610 | name = "windows-result" 1611 | version = "0.2.0" 1612 | source = "registry+https://github.com/rust-lang/crates.io-index" 1613 | checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" 1614 | dependencies = [ 1615 | "windows-targets", 1616 | ] 1617 | 1618 | [[package]] 1619 | name = "windows-strings" 1620 | version = "0.1.0" 1621 | source = "registry+https://github.com/rust-lang/crates.io-index" 1622 | checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" 1623 | dependencies = [ 1624 | "windows-result", 1625 | "windows-targets", 1626 | ] 1627 | 1628 | [[package]] 1629 | name = "windows-sys" 1630 | version = "0.52.0" 1631 | source = "registry+https://github.com/rust-lang/crates.io-index" 1632 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" 1633 | dependencies = [ 1634 | "windows-targets", 1635 | ] 1636 | 1637 | [[package]] 1638 | name = "windows-targets" 1639 | version = "0.52.6" 1640 | source = "registry+https://github.com/rust-lang/crates.io-index" 1641 | checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" 1642 | dependencies = [ 1643 | "windows_aarch64_gnullvm", 1644 | "windows_aarch64_msvc", 1645 | "windows_i686_gnu", 1646 | "windows_i686_gnullvm", 1647 | "windows_i686_msvc", 1648 | "windows_x86_64_gnu", 1649 | "windows_x86_64_gnullvm", 1650 | "windows_x86_64_msvc", 1651 | ] 1652 | 1653 | [[package]] 1654 | name = "windows_aarch64_gnullvm" 1655 | version = "0.52.6" 1656 | source = "registry+https://github.com/rust-lang/crates.io-index" 1657 | checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" 1658 | 1659 | [[package]] 1660 | name = "windows_aarch64_msvc" 1661 | version = "0.52.6" 1662 | source = "registry+https://github.com/rust-lang/crates.io-index" 1663 | checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" 1664 | 1665 | [[package]] 1666 | name = "windows_i686_gnu" 1667 | version = "0.52.6" 1668 | source = "registry+https://github.com/rust-lang/crates.io-index" 1669 | checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" 1670 | 1671 | [[package]] 1672 | name = "windows_i686_gnullvm" 1673 | version = "0.52.6" 1674 | source = "registry+https://github.com/rust-lang/crates.io-index" 1675 | checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" 1676 | 1677 | [[package]] 1678 | name = "windows_i686_msvc" 1679 | version = "0.52.6" 1680 | source = "registry+https://github.com/rust-lang/crates.io-index" 1681 | checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" 1682 | 1683 | [[package]] 1684 | name = "windows_x86_64_gnu" 1685 | version = "0.52.6" 1686 | source = "registry+https://github.com/rust-lang/crates.io-index" 1687 | checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" 1688 | 1689 | [[package]] 1690 | name = "windows_x86_64_gnullvm" 1691 | version = "0.52.6" 1692 | source = "registry+https://github.com/rust-lang/crates.io-index" 1693 | checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" 1694 | 1695 | [[package]] 1696 | name = "windows_x86_64_msvc" 1697 | version = "0.52.6" 1698 | source = "registry+https://github.com/rust-lang/crates.io-index" 1699 | checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" 1700 | 1701 | [[package]] 1702 | name = "xml-rs" 1703 | version = "0.8.20" 1704 | source = "registry+https://github.com/rust-lang/crates.io-index" 1705 | checksum = "791978798f0597cfc70478424c2b4fdc2b7a8024aaff78497ef00f24ef674193" 1706 | 1707 | [[package]] 1708 | name = "zerocopy" 1709 | version = "0.8.27" 1710 | source = "registry+https://github.com/rust-lang/crates.io-index" 1711 | checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" 1712 | dependencies = [ 1713 | "zerocopy-derive", 1714 | ] 1715 | 1716 | [[package]] 1717 | name = "zerocopy-derive" 1718 | version = "0.8.27" 1719 | source = "registry+https://github.com/rust-lang/crates.io-index" 1720 | checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" 1721 | dependencies = [ 1722 | "proc-macro2", 1723 | "quote", 1724 | "syn 2.0.110", 1725 | ] 1726 | --------------------------------------------------------------------------------