WebGPU Speculative Progressive Isosurface Raycaster

├── .gitignore ├── shaders ├── lru_cache_age_slots.comp ├── display_render.vert ├── display_render_frag.wgsl ├── lru_cache_extract_slot_available.comp ├── sort_data.comp ├── display_render.frag ├── add_block_sums.comp ├── lru_cache_init.comp ├── reset_block_num_rays.comp ├── add_block_sums.wgsl ├── display_render_vert.wgsl ├── lru_copy_available_slot_age.comp ├── reverse_buffer.comp ├── embed_wgsl.py ├── lru_cache_inputs.comp ├── compile_shader.py ├── stream_compact.comp ├── compute_initial_rays.vert ├── reset_rays.comp ├── mark_ray_active.comp ├── lru_cache_update.comp ├── stream_compact_data.comp ├── reset_speculative_ids.comp ├── reset_block_active.comp ├── write_ray_and_block_id.comp ├── compute_initial_rays_vert.wgsl ├── zfp_decompress_block.comp ├── debug_view_rays_per_block.comp ├── debug_view_rays_per_block.wgsl ├── count_block_rays.wgsl ├── compute_initial_rays.frag ├── lru_cache_mark_new_items.comp ├── combine_block_information.wgsl ├── combine_block_information.comp ├── prefix_sum.comp ├── block_prefix_sum.wgsl ├── block_prefix_sum.comp ├── compute_initial_rays_frag.wgsl ├── compute_voxel_range.comp ├── load_block.wgsl ├── compute_coarse_cell_range.comp ├── zfp_compute_block_range.comp ├── depth_composite.comp ├── merge_sorted_chunks.comp ├── mark_block_active.comp ├── embed_shaders.py ├── radix_sort_chunk.comp ├── mark_block_active.wgsl ├── util.glsl ├── macro_traverse.comp ├── load_block.comp ├── zfp_decompress.comp └── raytrace_active_block.comp ├── LICENSE.md ├── README.md ├── js ├── util.js ├── run_benchmark.js ├── FileSaver.js ├── volumes.js ├── stream_compact.js ├── radix_sort_by_key.js ├── exclusive_scan.js ├── tri_table.js └── render.js ├── .clang-format └── index.html /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.raw 3 | *.spv 4 | shaders/embedded_shaders.js 5 | models/ 6 | *.wasm 7 | js/liblas.js 8 | js/liblas_wrapper.js 9 | .DS_Store 10 | glslc.exe 11 | tint.exe 12 | bcmc-data.zip 13 | -------------------------------------------------------------------------------- /shaders/lru_cache_age_slots.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | #include "lru_cache_inputs.comp" 4 | 5 | layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; 6 | 7 | void main(void) 8 | { 9 | slot_data[gl_GlobalInvocationID.x].age += 1; 10 | } 11 | 12 | 13 | -------------------------------------------------------------------------------- /shaders/display_render.vert: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | // Draw a full screen quad using two triangles 4 | 5 | const vec4 pos[6] = vec4[6]( 6 | vec4(-1, 1, 0.5, 1), 7 | vec4(-1, -1, 0.5, 1), 8 | vec4(1, 1, 0.5, 1), 9 | vec4(-1, -1, 0.5, 1), 10 | vec4(1, 1, 0.5, 1), 11 | vec4(1, -1, 0.5, 1) 12 | ); 13 | 14 | void main(void){ 15 | gl_Position = pos[gl_VertexIndex]; 16 | } 17 | -------------------------------------------------------------------------------- /shaders/display_render_frag.wgsl: -------------------------------------------------------------------------------- 1 | // Fragment shader 2 | // May not need uniform declaration 3 | [[group(0), binding(0)]] var output_texture : texture_2d; 4 | 5 | [[stage(fragment)]] 6 | fn main([[builtin(position)]] frag_coord : vec4) -> [[location(0)]] vec4 { 7 | var color : vec4 = textureLoad(output_texture, vec2(frag_coord.xy), 0); 8 | color.a = 1.0; 9 | return color; 10 | } -------------------------------------------------------------------------------- /shaders/lru_cache_extract_slot_available.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | #include "lru_cache_inputs.comp" 4 | 5 | layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; 6 | 7 | layout(set = 1, binding = 0, std430) buffer Output 8 | { 9 | uint out_buf[]; 10 | }; 11 | 12 | void main(void) 13 | { 14 | out_buf[gl_GlobalInvocationID.x] = slot_data[gl_GlobalInvocationID.x].available; 15 | } 16 | 17 | -------------------------------------------------------------------------------- /shaders/sort_data.comp: -------------------------------------------------------------------------------- 1 | #define UINT_MAX uint(0xffffffff) 2 | 3 | layout(local_size_x = SORT_CHUNK_SIZE, local_size_y = 1, local_size_z = 1) in; 4 | 5 | layout(set = 0, binding = 0, std140) uniform BufferInfo 6 | { 7 | uint size; 8 | }; 9 | 10 | uint next_pow2(uint x) 11 | { 12 | x = x - 1; 13 | x |= x >> 1; 14 | x |= x >> 2; 15 | x |= x >> 4; 16 | x |= x >> 8; 17 | x |= x >> 16; 18 | return x + 1; 19 | } 20 | 21 | -------------------------------------------------------------------------------- /shaders/display_render.frag: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | layout(location = 0) out vec4 color; 4 | 5 | layout(set = 0, binding = 0) uniform texture2D output_texture; 6 | layout(set = 0, binding = 2) uniform sampler u_sampler; 7 | layout(set = 0, binding = 1) uniform Resolution 8 | { 9 | uint width; 10 | uint height; 11 | }; 12 | 13 | void main(void) { 14 | color = texture(sampler2D(output_texture, u_sampler), gl_FragCoord.xy / vec2(width, height)); 15 | color.a = 1.f; 16 | } 17 | -------------------------------------------------------------------------------- /shaders/add_block_sums.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | layout(local_size_x = BLOCK_SIZE / 2) in; 4 | 5 | layout(set = 0, binding = 0, std430) buffer Data { 6 | uint vals[]; 7 | }; 8 | 9 | layout(set = 0, binding = 1, std430) buffer BlockSums { 10 | uint block_sums[]; 11 | }; 12 | 13 | void main(void) { 14 | const uint prev_sum = block_sums[gl_WorkGroupID.x]; 15 | vals[2 * gl_GlobalInvocationID.x] += prev_sum; 16 | vals[2 * gl_GlobalInvocationID.x + 1] += prev_sum; 17 | } 18 | 19 | -------------------------------------------------------------------------------- /shaders/lru_cache_init.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | #include "lru_cache_inputs.comp" 4 | 5 | layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; 6 | 7 | layout(set = 1, binding = 0, std140) uniform OldSize 8 | { 9 | uint old_size; 10 | }; 11 | 12 | void main(void) 13 | { 14 | // Initialize each new empty slot 15 | slot_data[old_size + gl_GlobalInvocationID.x].age = 100000; 16 | slot_data[old_size + gl_GlobalInvocationID.x].available = 1; 17 | slot_data[old_size + gl_GlobalInvocationID.x].item_id = -1; 18 | } 19 | 20 | -------------------------------------------------------------------------------- /shaders/reset_block_num_rays.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in; 4 | 5 | layout(set = 0, binding = 0, std140) uniform BlockIDOffset 6 | { 7 | uint id_offset; 8 | uint total_visible_blocks; 9 | }; 10 | 11 | layout(set = 0, binding = 1, std430) buffer BlockNumRays 12 | { 13 | uint block_num_rays[]; 14 | }; 15 | 16 | void main(void) 17 | { 18 | const uint block_id = gl_GlobalInvocationID.x + id_offset; 19 | if (block_id >= total_visible_blocks) { 20 | return; 21 | } 22 | 23 | block_num_rays[block_id] = 0; 24 | } 25 | 26 | 27 | -------------------------------------------------------------------------------- /shaders/add_block_sums.wgsl: -------------------------------------------------------------------------------- 1 | [[block]] struct UintArray { 2 | vals : array; 3 | }; 4 | 5 | [[group(0), binding(0)]] var vals : UintArray; 6 | [[group(0), binding(1)]] var block_sums : UintArray; 7 | 8 | [[stage(compute), workgroup_size(BLOCK_SIZE / 2.0)]] 9 | fn main([[builtin(global_invocation_id)]] global_id : vec3, [[builtin(workgroup_id)]] workgroup_id : vec3) { 10 | let prev_sum : u32 = block_sums.vals[workgroup_id.x]; 11 | vals.vals[2 * global_id.x] = vals.vals[2 * global_id.x] + prev_sum; 12 | vals.vals[2 * global_id.x + 1] = vals.vals[2 * global_id.x + 1] + prev_sum; 13 | } -------------------------------------------------------------------------------- /shaders/display_render_vert.wgsl: -------------------------------------------------------------------------------- 1 | // Draw a full screen quad using two triangles 2 | struct VertexOutput { 3 | [[builtin(position)]] Position : vec4; 4 | }; 5 | let pos : array, 6> = array, 6>( 6 | vec4(-1, 1, 0.5, 1), 7 | vec4(-1, -1, 0.5, 1), 8 | vec4(1, 1, 0.5, 1), 9 | vec4(-1, -1, 0.5, 1), 10 | vec4(1, 1, 0.5, 1), 11 | vec4(1, -1, 0.5, 1) 12 | ); 13 | 14 | [[stage(vertex)]] 15 | fn main([[builtin(vertex_index)]] vertex_index : u32) 16 | -> VertexOutput { 17 | var output : VertexOutput; 18 | output.Position = pos[vertex_index]; 19 | return output; 20 | } 21 | 22 | 23 | -------------------------------------------------------------------------------- /shaders/lru_copy_available_slot_age.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | #include "lru_cache_inputs.comp" 4 | 5 | layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; 6 | 7 | layout(set = 1, binding = 0, std430) buffer AvailableSlotAges 8 | { 9 | uint available_slot_ages[]; 10 | }; 11 | 12 | layout(set = 2, binding = 0) uniform NumNewItemIDs 13 | { 14 | uint num_slots_available; 15 | }; 16 | 17 | void main(void) 18 | { 19 | if (gl_GlobalInvocationID.x >= num_slots_available) { 20 | return; 21 | } 22 | available_slot_ages[gl_GlobalInvocationID.x] = slot_data[slot_available_id[gl_GlobalInvocationID.x]].age; 23 | } 24 | 25 | -------------------------------------------------------------------------------- /shaders/reverse_buffer.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | #include "sort_data.comp" 4 | 5 | layout(set = 1, binding = 0, std430) buffer Values 6 | { 7 | uint values[]; 8 | }; 9 | 10 | void main(void) 11 | { 12 | // Each thread swaps a pair of elements in place 13 | const uint aligned_size = next_pow2(uint(ceil(float(size) / SORT_CHUNK_SIZE))) * SORT_CHUNK_SIZE; 14 | if (aligned_size < SORT_CHUNK_SIZE && gl_GlobalInvocationID.x > SORT_CHUNK_SIZE / 2) { 15 | return; 16 | } 17 | const uint i = gl_GlobalInvocationID.x; 18 | const uint j = aligned_size - gl_GlobalInvocationID.x - 1; 19 | const uint tmp = values[i]; 20 | values[i] = values[j]; 21 | values[j] = tmp; 22 | } 23 | 24 | -------------------------------------------------------------------------------- /shaders/embed_wgsl.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | shaders = [ 3 | "add_block_sums.wgsl", 4 | "block_prefix_sum.wgsl", 5 | "combine_block_information.wgsl", 6 | "compute_initial_rays_frag.wgsl", 7 | "compute_initial_rays_vert.wgsl", 8 | ] 9 | compiled_shaders = "" 10 | 11 | with open("util.wgsl") as f: 12 | utils_code = f.read() 13 | 14 | for shader in shaders: 15 | with open(shader, "r") as f: 16 | compiled_code = f.read() 17 | if compiled_code.startswith("//include util.wgsl"): 18 | compiled_code = utils_code + compiled_code 19 | compiled_shaders += f"const {shader[:-5]} = `{compiled_code}`;\n" 20 | 21 | with open("../js/wgsl.js", "w") as f: 22 | f.write(compiled_shaders) 23 | 24 | -------------------------------------------------------------------------------- /shaders/lru_cache_inputs.comp: -------------------------------------------------------------------------------- 1 | #ifndef LRU_CACHE_INPUTS_COMP 2 | #define LRU_CACHE_INPUTS_COMP 3 | 4 | struct Slot { 5 | // Age of the item in the slot 6 | uint age; 7 | // 1/0 if the slot is available 8 | uint available; 9 | // IDs of the item in the slot 10 | int item_id; 11 | }; 12 | 13 | layout(set = 0, binding = 0, std430) buffer CachedItemSlots 14 | { 15 | // Slot occupied by currently cached items, or -1 if not cached 16 | int cached_item_slot[]; 17 | }; 18 | 19 | layout(set = 0, binding = 1, std430) buffer SlotAvailableIDs 20 | { 21 | // IDs of available slots 22 | uint slot_available_id[]; 23 | }; 24 | 25 | layout(set = 0, binding = 2, std430) buffer SlotData 26 | { 27 | Slot slot_data[]; 28 | }; 29 | 30 | #endif 31 | 32 | -------------------------------------------------------------------------------- /shaders/compile_shader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | import os 5 | import subprocess 6 | 7 | if len(sys.argv) < 5: 8 | print("Usage [glslc_args...]") 9 | sys.exit(1) 10 | 11 | glslc = sys.argv[1] 12 | tint = sys.argv[2] 13 | shader = sys.argv[3] 14 | var_name = sys.argv[4] 15 | 16 | compiled_shader = "" 17 | args = [glslc, shader] 18 | if len(sys.argv) > 5: 19 | args.extend(sys.argv[5:]) 20 | 21 | # Compile the GLSL shader to SPV 22 | subprocess.check_output(args) 23 | 24 | # Now compile the SPV file to WGSL with Tint 25 | subprocess.check_output([tint, "a.spv", "-o", "a.wgsl"]) 26 | 27 | with open("a.wgsl", "r") as f: 28 | compiled_code = f.read() 29 | compiled_shader = "const " + var_name + " = `" + compiled_code + "`;\n" 30 | 31 | os.remove("a.spv") 32 | os.remove("a.wgsl") 33 | print(compiled_shader) 34 | 35 | -------------------------------------------------------------------------------- /shaders/stream_compact.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | layout(local_size_x = 8, local_size_y = 1, local_size_z = 1) in; 4 | 5 | layout(set = 0, binding = 0) buffer Input 6 | { 7 | uint inputs[]; 8 | }; 9 | 10 | layout(set = 0, binding = 1) buffer Offsets 11 | { 12 | uint offsets[]; 13 | }; 14 | 15 | // The compaction execution offset chunk we're running 16 | layout(set = 0, binding = 2) uniform CompactionOffset 17 | { 18 | uint compact_offset; 19 | }; 20 | 21 | layout(set = 0, binding = 3) buffer Output 22 | { 23 | uint outputs[]; 24 | }; 25 | 26 | void main(void) { 27 | // Note: this is just for compacting down id's of "active" elements, so 0's are inherently 28 | // things we don't want to output. 29 | if (inputs[gl_GlobalInvocationID.x] != 0) { 30 | outputs[offsets[gl_GlobalInvocationID.x]] = gl_GlobalInvocationID.x + compact_offset; 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /shaders/compute_initial_rays.vert: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | layout(location = 0) in vec3 pos; 4 | 5 | layout(location = 0) out vec3 vray_dir; 6 | layout(location = 1) flat out vec3 transformed_eye; 7 | 8 | layout(set = 0, binding = 0, std140) uniform ViewParams 9 | { 10 | mat4 proj_view; 11 | vec4 eye_pos; 12 | vec4 eye_dir; 13 | float near_plane; 14 | }; 15 | 16 | layout(set = 0, binding = 2, std140) uniform VolumeParams 17 | { 18 | uvec4 volume_dims; 19 | uvec4 padded_dims; 20 | vec4 volume_scale; 21 | uint max_bits; 22 | float isovalue; 23 | uint image_width; 24 | }; 25 | 26 | void main(void) { 27 | vec3 volume_translation = vec3(0) - volume_scale.xyz * 0.5; 28 | gl_Position = proj_view * vec4(pos * volume_scale.xyz + volume_translation, 1); 29 | transformed_eye = (eye_pos.xyz - volume_translation) / volume_scale.xyz; 30 | vray_dir = pos - transformed_eye; 31 | } 32 | -------------------------------------------------------------------------------- /shaders/reset_rays.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | #include "util.glsl" 4 | 5 | layout(local_size_x = 8, local_size_y = 1, local_size_z = 1) in; 6 | 7 | layout(set = 0, binding = 0, std430) buffer RayInformation { 8 | RayInfo rays[]; 9 | }; 10 | 11 | layout(set = 0, binding = 1, std140) uniform VolumeParams 12 | { 13 | uvec4 volume_dims; 14 | uvec4 padded_dims; 15 | vec4 volume_scale; 16 | 17 | uint max_bits; 18 | float isovalue; 19 | uint image_width; 20 | }; 21 | layout(set = 0, binding = 2, std430) buffer RayBlockIDs 22 | { 23 | uint block_ids[]; 24 | }; 25 | 26 | void main() { 27 | if (gl_GlobalInvocationID.x >= image_width) { 28 | return; 29 | } 30 | uint ray_index = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * image_width; 31 | rays[ray_index].ray_dir = vec3(0); 32 | block_ids[ray_index] = UINT_MAX; 33 | rays[ray_index].t = FLT_MAX; 34 | } 35 | 36 | -------------------------------------------------------------------------------- /shaders/mark_ray_active.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | #include "util.glsl" 4 | 5 | layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in; 6 | 7 | layout(set = 0, binding = 0, std140) uniform VolumeParams 8 | { 9 | uvec4 volume_dims; 10 | uvec4 padded_dims; 11 | vec4 volume_scale; 12 | uint max_bits; 13 | float isovalue; 14 | uint image_width; 15 | }; 16 | 17 | layout(set = 0, binding = 1, std430) buffer RayInformation 18 | { 19 | RayInfo rays[]; 20 | }; 21 | 22 | 23 | layout(set = 0, binding = 2, std430) buffer RayActive 24 | { 25 | uint ray_active[]; 26 | }; 27 | 28 | void main() { 29 | if (gl_GlobalInvocationID.x >= image_width) { 30 | return; 31 | } 32 | // Mark the pixels active, speculated ray-block intersections refer to these pixels 33 | uint ray_index = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * image_width; 34 | ray_active[ray_index] = rays[ray_index].t != FLT_MAX ? 1 : 0; 35 | } 36 | 37 | -------------------------------------------------------------------------------- /shaders/lru_cache_update.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | #include "lru_cache_inputs.comp" 4 | 5 | layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; 6 | 7 | layout(set = 1, binding = 0, std430) buffer NewItemIDs 8 | { 9 | uint new_items[]; 10 | }; 11 | 12 | layout(set = 2, binding = 0) uniform NumNewItemIDs 13 | { 14 | uint num_new_items; 15 | }; 16 | 17 | void main(void) 18 | { 19 | if (gl_GlobalInvocationID.x >= num_new_items) { 20 | return; 21 | } 22 | const uint item = new_items[gl_GlobalInvocationID.x]; 23 | const uint slot = slot_available_id[gl_GlobalInvocationID.x]; 24 | const int prev = slot_data[slot].item_id; 25 | // Evict the previous item, if there was one in this slot 26 | if (prev != -1) { 27 | cached_item_slot[prev] = -1; 28 | } 29 | 30 | slot_data[slot].age = 0; 31 | slot_data[slot].item_id = int(item); 32 | slot_data[slot].available = 0; 33 | cached_item_slot[item] = int(slot); 34 | } 35 | 36 | -------------------------------------------------------------------------------- /shaders/stream_compact_data.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | layout(local_size_x = 8, local_size_y = 1, local_size_z = 1) in; 4 | 5 | layout(set = 0, binding = 0) buffer Input 6 | { 7 | uint inputs[]; 8 | }; 9 | 10 | layout(set = 0, binding = 1) buffer Offsets 11 | { 12 | uint offsets[]; 13 | }; 14 | 15 | // The compaction execution offset chunk we're running 16 | layout(set = 0, binding = 2) uniform CompactionOffset 17 | { 18 | uint compact_offset; 19 | }; 20 | 21 | layout(set = 0, binding = 3) buffer Output 22 | { 23 | uint outputs[]; 24 | }; 25 | 26 | layout(set = 1, binding = 0) buffer Data 27 | { 28 | uint input_data[]; 29 | }; 30 | 31 | void main(void) { 32 | // Note: this is just for compacting down id's of "active" elements, so 0's are inherently 33 | // things we don't want to output. 34 | const uint i = gl_GlobalInvocationID.x + compact_offset; 35 | if (inputs[i] != 0) { 36 | outputs[offsets[i]] = input_data[i]; 37 | } 38 | } 39 | 40 | 41 | -------------------------------------------------------------------------------- /shaders/reset_speculative_ids.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | #include "util.glsl" 4 | 5 | layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in; 6 | 7 | layout(set = 0, binding = 0, std140) uniform VolumeParams 8 | { 9 | uvec4 volume_dims; 10 | uvec4 padded_dims; 11 | vec4 volume_scale; 12 | 13 | uint max_bits; 14 | float isovalue; 15 | uint image_width; 16 | }; 17 | 18 | layout(set = 0, binding = 1, std430) buffer RayIDs 19 | { 20 | uint ray_ids[]; 21 | }; 22 | 23 | layout(set = 0, binding = 2, std430) buffer RayRGBZ 24 | { 25 | vec2 ray_rgbz[]; 26 | }; 27 | 28 | layout(set = 0, binding = 3, std430) buffer RayBlockIDs 29 | { 30 | uint block_ids[]; 31 | }; 32 | 33 | void main(void) 34 | { 35 | if (gl_GlobalInvocationID.x >= image_width) { 36 | return; 37 | } 38 | uint ray_index = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * image_width; 39 | ray_ids[ray_index] = UINT_MAX; 40 | ray_rgbz[ray_index] = vec2(intBitsToFloat(0), FLT_MAX); 41 | block_ids[ray_index] = UINT_MAX; 42 | } 43 | 44 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2020 Will Usher 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /shaders/reset_block_active.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | layout(local_size_x = 8, local_size_y = 1, local_size_z = 1) in; 4 | 5 | layout(set = 0, binding = 0, std140) uniform VolumeParams 6 | { 7 | uvec4 volume_dims; 8 | uvec4 padded_dims; 9 | vec4 volume_scale; 10 | uint max_bits; 11 | float isovalue; 12 | }; 13 | 14 | layout(set = 0, binding = 1, std430) buffer BlockActive 15 | { 16 | uint block_active[]; 17 | }; 18 | 19 | layout(set = 0, binding = 2, std430) buffer BlockVisible 20 | { 21 | uint block_visible[]; 22 | }; 23 | 24 | // Resetting the blocks active flag is split to a separate stage so that we 25 | // don't have a read-write conflict where some threads try to mark it active 26 | // while another is resetting it to inactive 27 | void main(void) 28 | { 29 | const uvec3 n_blocks = padded_dims.xyz / uvec3(4); 30 | if (gl_GlobalInvocationID.x >= n_blocks.x) { 31 | return; 32 | } 33 | 34 | const uint block_id = gl_GlobalInvocationID.x + n_blocks.x 35 | * (gl_GlobalInvocationID.y + n_blocks.y * gl_GlobalInvocationID.z); 36 | 37 | block_active[block_id] = 0; 38 | block_visible[block_id] = 0; 39 | } 40 | 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # WebGPU Speculative Progressive Isosurface Raycaster 2 | 3 | This is the implementation of the GPU-parallel speculative, progressive 4 | implicit isosurface raycasting algorithm for 5 | block-compressed data sets described in "Speculative Progressive Raycasting for Memory Constrained Isosurface Visualization of Massive Volumes" by Will Usher, Landon Dyken, and Sidharth Kumar 6 | at LDAV 2023. Please [see the paper](https://www.willusher.io/publications/wgpu-prog-iso) for more details. 7 | 8 | ## Usage 9 | 10 | - [Skull](https://www.willusher.io/webgpu-prog-iso/) (256^3) 11 | - [Magnetic Reconnection](https://www.willusher.io/webgpu-prog-iso/#magnetic) (512^3) 12 | - [Chameleon](https://www.willusher.io/webgpu-prog-iso/#chameleon) (1024x1024x1080) 13 | 14 | The data sets are available on the [Open SciVis Data Sets page](https://klacansky.com/open-scivis-datasets/). 15 | 16 | ## Images 17 | 18 | 19 | ![skull_256x256x256_uint8 raw crate2_prog_iso](https://github.com/Twinklebear/webgpu-prog-iso/assets/1522476/831200d8-201a-479c-b2b4-b1124ef8c43a) 20 | 21 | ![chameleon_1024x1024x1080_uint16 raw crate2_prog_iso](https://github.com/Twinklebear/webgpu-prog-iso/assets/1522476/f1d7b80b-c170-43c4-8c61-6257295a5240) 22 | -------------------------------------------------------------------------------- /shaders/write_ray_and_block_id.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | #include "util.glsl" 4 | 5 | layout(local_size_x = 8, local_size_y = 1, local_size_z = 1) in; 6 | 7 | layout(set = 0, binding = 0, std140) uniform VolumeParams 8 | { 9 | uvec4 volume_dims; 10 | uvec4 padded_dims; 11 | vec4 volume_scale; 12 | uint max_bits; 13 | float isovalue; 14 | uint image_width; 15 | }; 16 | 17 | layout(set = 0, binding = 1, std430) buffer RayBlockID 18 | { 19 | uint block_id[]; 20 | }; 21 | 22 | layout(set = 0, binding = 2, std430) buffer RayActive 23 | { 24 | uint ray_active[]; 25 | }; 26 | 27 | void main() { 28 | if (gl_GlobalInvocationID.x >= image_width) { 29 | return; 30 | } 31 | // Write out the ray index and its block id so that we can sort 32 | // the ray IDs using the block ID as the key. Also save out which rays 33 | // are active so we can do a compaction on the active ray IDs so that 34 | // we don't have to sort the whole framebuffer each time 35 | // The ray IDs are built during the stream compact IDs, so we don't need to 36 | // write them out here 37 | uint ray_index = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * image_width; 38 | ray_active[ray_index] = block_id[ray_index] != UINT_MAX ? 1 : 0; 39 | } 40 | 41 | -------------------------------------------------------------------------------- /shaders/compute_initial_rays_vert.wgsl: -------------------------------------------------------------------------------- 1 | // Vertex shader 2 | struct VertexOutput { 3 | [[builtin(position)]] Position : vec4; 4 | [[location(0)]] vray_dir: vec3; 5 | [[location(1), interpolate(flat)]] transformed_eye: vec3; 6 | }; 7 | [[block]] struct ViewParams { 8 | proj_view : mat4x4; 9 | eye_pos : vec4; 10 | eye_dir : vec4; 11 | near_plane : f32; 12 | }; 13 | [[block]] struct VolumeParams { 14 | volume_dims : vec4; 15 | padded_dims : vec4; 16 | volume_scale : vec4; 17 | max_bits : u32; 18 | isovalue : f32; 19 | image_width : u32; 20 | }; 21 | [[group(0), binding(0)]] var view_params : ViewParams; 22 | [[group(0), binding(2)]] var volume_params : VolumeParams; 23 | 24 | [[stage(vertex)]] 25 | fn main([[location(0)]] position : vec3) 26 | -> VertexOutput { 27 | var output : VertexOutput; 28 | var volume_translation : vec3 = vec3(0, 0, 0) - volume_params.volume_scale.xyz * 0.5; 29 | output.Position = view_params.proj_view * vec4(position * volume_params.volume_scale.xyz + volume_translation, 1.0); 30 | output.transformed_eye = (view_params.eye_pos.xyz - volume_translation) / volume_params.volume_scale.xyz; 31 | output.vray_dir = position - output.transformed_eye; 32 | return output; 33 | } -------------------------------------------------------------------------------- /shaders/zfp_decompress_block.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; 4 | 5 | #include "zfp_decompress.comp" 6 | 7 | layout(set = 0, binding = 2, std430) buffer Decompressed 8 | { 9 | float decompressed[]; 10 | }; 11 | 12 | layout(set = 0, binding = 3, std430) buffer BlockIDs 13 | { 14 | uint block_ids[]; 15 | }; 16 | 17 | // Note: should just make a bind group layout which maps all the cache params 18 | // and place it on a separate set 19 | layout(set = 0, binding = 4, std430) buffer CachedItemSlots 20 | { 21 | uint cached_item_slots[]; 22 | }; 23 | 24 | layout(set = 1, binding = 0) uniform DecompressBlockOffset 25 | { 26 | uint start_block_offset; 27 | uint total_n_blocks; 28 | }; 29 | 30 | void main(void) 31 | { 32 | if (start_block_offset + gl_GlobalInvocationID.x >= total_n_blocks) { 33 | return; 34 | } 35 | const uint block_index = block_ids[start_block_offset + gl_GlobalInvocationID.x]; 36 | const uint cache_location = cached_item_slots[block_index]; 37 | BlockReader reader = create_block_reader(block_index); 38 | float decompressed_block[ZFP_BLOCK_SIZE]; 39 | decompress_block(reader, decompressed_block); 40 | for (uint i = 0; i < ZFP_BLOCK_SIZE; ++i) { 41 | decompressed[cache_location * 64 + i] = decompressed_block[i]; 42 | } 43 | } 44 | 45 | -------------------------------------------------------------------------------- /shaders/debug_view_rays_per_block.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | #include "util.glsl" 4 | 5 | layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; 6 | 7 | layout(set = 0, binding = 0, std140) uniform VolumeParams 8 | { 9 | uvec4 volume_dims; 10 | uvec4 padded_dims; 11 | vec4 volume_scale; 12 | uint max_bits; 13 | float isovalue; 14 | uint image_width; 15 | }; 16 | 17 | layout(set = 0, binding = 1, std430) buffer BlockNumRays 18 | { 19 | uint block_num_rays[]; 20 | }; 21 | 22 | layout(set = 0, binding = 2, std430) buffer RayInformation 23 | { 24 | RayInfo rays[]; 25 | }; 26 | 27 | layout(set = 0, binding = 4, std430) buffer RayBlockIDs 28 | { 29 | uint block_ids[]; 30 | }; 31 | 32 | uniform layout(set = 0, binding = 3, rgba8) writeonly image2D render_target; 33 | 34 | void main(void) 35 | { 36 | uint ray_index = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * image_width; 37 | if (rays[ray_index].t == FLT_MAX) { 38 | return; 39 | } 40 | 41 | const uint block_id = block_ids[ray_index]; 42 | vec4 color; 43 | // We don't really bother to find the max, though we could do it in the 44 | // shader since this is just for debugging. 45 | color.rgb = vec3(block_num_rays[block_id] / 256.0); 46 | color.a = 1.0; 47 | imageStore(render_target, ivec2(gl_GlobalInvocationID.xy), color); 48 | } 49 | 50 | 51 | -------------------------------------------------------------------------------- /shaders/debug_view_rays_per_block.wgsl: -------------------------------------------------------------------------------- 1 | //include util.wgsl 2 | [[block]] struct UintArray { 3 | vals : array; 4 | }; 5 | [[block]] struct RayInfos { 6 | rays : array; 7 | }; 8 | [[block]] struct VolumeParams { 9 | volume_dims : vec4; 10 | padded_dims : vec4; 11 | volume_scale : vec4; 12 | max_bits : u32; 13 | isovalue : f32; 14 | image_width : u32; 15 | }; 16 | 17 | [[group(0), binding(0)]] var volume_params : VolumeParams; 18 | [[group(0), binding(1)]] var block_num_rays : UintArray; 19 | [[group(0), binding(2)]] var ray_info : RayInfos; 20 | // May not need to be uniform variable 21 | [[group(0), binding(3)]] var render_target : texture_storage_2d; 22 | 23 | [[stage(compute), workgroup_size(1, 1, 1)]] 24 | fn main([[builtin(global_invocation_id)]] global_id : vec3) { 25 | var ray_index : u32 = global_id.x + global_id.y * volume_params.image_width; 26 | if (ray_info.rays[ray_index].t == FLT_MAX) { 27 | return; 28 | } 29 | 30 | let block_id : u32 = ray_info.rays[ray_index].block_id; 31 | var color : vec4; 32 | // We don't really bother to find the max, though we could do it in the 33 | // shader since this is just for debugging. 34 | color.rgb = vec3(block_num_rays.vals[block_id] / 256.0); 35 | color.a = 1.0; 36 | textureStore(render_target, vec2(global_id.xy), color); 37 | } -------------------------------------------------------------------------------- /shaders/count_block_rays.wgsl: -------------------------------------------------------------------------------- 1 | /* 2 | // #include "util.glsl" 3 | */ 4 | 5 | const UINT_MAX: u32 = 0xffffffffu; 6 | const FLT_MAX: f32 = 3.402823466e+38; 7 | 8 | alias float2 = vec2; 9 | alias float3 = vec3; 10 | alias float4 = vec4; 11 | alias uint2 = vec2; 12 | alias uint3 = vec3; 13 | alias uint4 = vec4; 14 | 15 | struct VolumeParams { 16 | volume_dims: uint4, 17 | padded_dims: uint4, 18 | volume_scale: float4, 19 | max_bits: u32, 20 | isovalue: f32, 21 | image_width: u32, 22 | } 23 | 24 | @group(0) @binding(0) var volume_params : VolumeParams; 25 | 26 | @group(0) @binding(1) var block_num_rays : array>; 27 | 28 | @group(0) @binding(2) var ray_block_ids : array; 29 | 30 | @group(0) @binding(3) var block_compact_offsets : array; 31 | 32 | 33 | @compute @workgroup_size(32, 1, 1) 34 | fn main(@builtin(global_invocation_id) g_invocation_id : vec3) { 35 | if (g_invocation_id.x >= volume_params.image_width) { 36 | return; 37 | } 38 | 39 | let ray_index = g_invocation_id.x + g_invocation_id.y * volume_params.image_width; 40 | 41 | let block_id = ray_block_ids[ray_index]; 42 | if (block_id == UINT_MAX) { 43 | return; 44 | } 45 | 46 | // Count this ray for the block 47 | let block_index = block_compact_offsets[block_id]; 48 | atomicAdd(&block_num_rays[block_index], 1u); 49 | } 50 | 51 | -------------------------------------------------------------------------------- /shaders/compute_initial_rays.frag: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | #include "util.glsl" 4 | 5 | layout(location = 0) in vec3 vray_dir; 6 | layout(location = 1) flat in vec3 transformed_eye; 7 | 8 | layout(set = 0, binding = 1, std430) buffer RayInformation { 9 | RayInfo rays[]; 10 | }; 11 | 12 | layout(set = 0, binding = 2, std140) uniform VolumeParams 13 | { 14 | uvec4 volume_dims; 15 | uvec4 padded_dims; 16 | vec4 volume_scale; 17 | uint max_bits; 18 | float isovalue; 19 | uint image_width; 20 | }; 21 | 22 | layout(set = 0, binding = 3, std430) buffer RayBlockIDs 23 | { 24 | uint block_ids[]; 25 | }; 26 | 27 | void main() { 28 | vec3 ray_dir = normalize(vray_dir); 29 | 30 | // Transform the ray into the dual grid space and intersect with the dual grid bounds 31 | const vec3 vol_eye = transformed_eye * volume_dims.xyz - vec3(0.5); 32 | const vec3 grid_ray_dir = normalize(ray_dir * volume_dims.xyz); 33 | 34 | vec2 t_hit = intersect_box(vol_eye, grid_ray_dir, vec3(0), volume_dims.xyz - 1); 35 | 36 | // We don't want to sample voxels behind the eye if it's 37 | // inside the volume, so keep the starting point at or in front 38 | // of the eye 39 | t_hit.x = max(t_hit.x, 0.0); 40 | 41 | const uint pixel = uint(gl_FragCoord.x) + image_width * uint(gl_FragCoord.y); 42 | if (t_hit.x < t_hit.y) { 43 | rays[pixel].ray_dir = grid_ray_dir; 44 | block_ids[pixel] = UINT_MAX; 45 | rays[pixel].t = t_hit.x; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /shaders/lru_cache_mark_new_items.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | #include "lru_cache_inputs.comp" 4 | 5 | // TODO: can use a larger thread group size 6 | layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in; 7 | 8 | layout(set = 1, binding = 0, std430) readonly buffer ItemNeeded 9 | { 10 | // Input of active elements from the marching cubes active block determination pass 11 | uint item_needed[]; 12 | }; 13 | 14 | layout(set = 1, binding = 1, std430) buffer ItemNeedsCaching 15 | { 16 | // 1/0 which new items need to be added to the cache 17 | uint item_needs_caching[]; 18 | }; 19 | 20 | layout(set = 2, binding = 0, std140) uniform PushConstants 21 | { 22 | uint global_idx_offset; 23 | uint num_work_items; 24 | }; 25 | 26 | void main(void) 27 | { 28 | const uint idx = gl_GlobalInvocationID.x + global_idx_offset * 32; 29 | const int slot = cached_item_slot[idx]; 30 | if (slot >= 0) { 31 | item_needs_caching[idx] = 0; 32 | if (item_needed[idx] == 1) { 33 | // Item is already cached, reset age and unset slot availability 34 | slot_data[slot].age = 0; 35 | slot_data[slot].available = 0; 36 | } else { 37 | slot_data[slot].available = 1; 38 | } 39 | } else { 40 | // Item is not cached, just pass through whether we need the item or not 41 | // to tell us if we need to cache it 42 | item_needs_caching[idx] = item_needed[idx]; 43 | } 44 | } 45 | 46 | -------------------------------------------------------------------------------- /shaders/combine_block_information.wgsl: -------------------------------------------------------------------------------- 1 | //include util.wgsl 2 | [[block]] struct UintArray { 3 | vals : array; 4 | }; 5 | [[block]] struct BlockInfos { 6 | vals : array; 7 | }; 8 | 9 | [[group(0), binding(0)]] var blocks : BlockInfos; 10 | [[group(0), binding(1)]] var block_ids : UintArray; 11 | [[group(0), binding(2)]] var block_ray_offsets : UintArray; 12 | [[group(0), binding(3)]] var block_num_rays : UintArray; 13 | [[group(0), binding(4)]] var block_active : UintArray; 14 | 15 | [[stage(compute), workgroup_size(BLOCK_SIZE / 2.0)]] 16 | fn main([[builtin(global_invocation_id)]] global_id : vec3) { 17 | // Combine the buffers to fit in fewer storage buffers until limits are removed 18 | // Note that 8 will be supported soon in Chromium so we could remove this 19 | // This data is compacted down as it's run on the compacted block ids 20 | let id : u32 = block_ids.vals[global_id.x]; 21 | blocks.vals[global_id.x].id = id; 22 | blocks.vals[global_id.x].ray_offset = block_ray_offsets.vals[id]; 23 | blocks.vals[global_id.x].num_rays = block_num_rays.vals[id]; 24 | // If the block is running in this pipeline it must be visible, 25 | // so if it's not active, then it's an LOD block 26 | if (block_active.vals[id] == 0) { 27 | blocks.vals[global_id.x].lod = 1; 28 | } else { 29 | blocks.vals[global_id.x].lod = 0; 30 | } 31 | } -------------------------------------------------------------------------------- /shaders/combine_block_information.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | #include "util.glsl" 4 | 5 | layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; 6 | 7 | layout(set = 0, binding = 0, std430) buffer BlockInformation 8 | { 9 | BlockInfo blocks[]; 10 | }; 11 | 12 | layout(set = 0, binding = 1, std430) buffer BlockIDs 13 | { 14 | uint block_ids[]; 15 | }; 16 | 17 | layout(set = 0, binding = 2, std430) buffer BlockRayOffset 18 | { 19 | uint block_ray_offsets[]; 20 | }; 21 | 22 | layout(set = 0, binding = 3, std430) buffer BlockNumRays 23 | { 24 | uint block_num_rays[]; 25 | }; 26 | 27 | layout(set = 0, binding = 4, std430) buffer BlockActive 28 | { 29 | uint block_active[]; 30 | }; 31 | 32 | layout(set = 1, binding = 0, std140) uniform BlockIDOffset 33 | { 34 | uint id_offset; 35 | uint total_work_groups; 36 | uint total_active_blocks; 37 | }; 38 | 39 | void main(void) 40 | { 41 | const uint item_idx = gl_GlobalInvocationID.x + id_offset * gl_WorkGroupSize.x; 42 | if (item_idx >= total_active_blocks) { 43 | return; 44 | } 45 | // Combine the buffers to fit in fewer storage buffers until limits are removed 46 | // Note that 8 will be supported soon in Chromium so we could remove this 47 | // This data is compacted down as it's run on the compacted block ids 48 | const uint id = block_ids[item_idx]; 49 | blocks[item_idx].id = id; 50 | blocks[item_idx].ray_offset = block_ray_offsets[item_idx]; 51 | blocks[item_idx].num_rays = block_num_rays[item_idx]; 52 | // We don't do LOD, just set to 0 53 | blocks[item_idx].lod = 0; 54 | } 55 | 56 | -------------------------------------------------------------------------------- /shaders/prefix_sum.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | // See https://www.eecs.umich.edu/courses/eecs570/hw/parprefix.pdf 4 | 5 | layout(local_size_x = BLOCK_SIZE / 2) in; 6 | 7 | layout(set = 0, binding = 0, std430) buffer Data { 8 | uint vals[]; 9 | }; 10 | 11 | layout(set = 0, binding = 1, std430) buffer BlockSums { 12 | uint block_sums[]; 13 | }; 14 | 15 | shared uint chunk[BLOCK_SIZE]; 16 | 17 | void main(void) { 18 | chunk[2 * gl_LocalInvocationID.x] = vals[2 * gl_GlobalInvocationID.x]; 19 | chunk[2 * gl_LocalInvocationID.x + 1] = vals[2 * gl_GlobalInvocationID.x + 1]; 20 | 21 | uint offs = 1; 22 | // Reduce step up tree 23 | for (int d = BLOCK_SIZE >> 1; d > 0; d = d >> 1) { 24 | barrier(); 25 | if (gl_LocalInvocationID.x < d) { 26 | uint a = offs * (2 * gl_LocalInvocationID.x + 1) - 1; 27 | uint b = offs * (2 * gl_LocalInvocationID.x + 2) - 1; 28 | chunk[b] += chunk[a]; 29 | } 30 | offs = offs << 1; 31 | } 32 | 33 | if (gl_LocalInvocationID.x == 0) { 34 | block_sums[gl_WorkGroupID.x] = chunk[BLOCK_SIZE - 1]; 35 | chunk[BLOCK_SIZE - 1] = 0; 36 | } 37 | 38 | // Sweep down the tree to finish the scan 39 | for (int d = 1; d < BLOCK_SIZE; d = d << 1) { 40 | offs = offs >> 1; 41 | barrier(); 42 | if (gl_LocalInvocationID.x < d) { 43 | uint a = offs * (2 * gl_LocalInvocationID.x + 1) - 1; 44 | uint b = offs * (2 * gl_LocalInvocationID.x + 2) - 1; 45 | const uint tmp = chunk[a]; 46 | chunk[a] = chunk[b]; 47 | chunk[b] += tmp; 48 | } 49 | } 50 | 51 | barrier(); 52 | vals[2 * gl_GlobalInvocationID.x] = chunk[2 * gl_LocalInvocationID.x]; 53 | vals[2 * gl_GlobalInvocationID.x + 1] = chunk[2 * gl_LocalInvocationID.x + 1]; 54 | } 55 | 56 | -------------------------------------------------------------------------------- /shaders/block_prefix_sum.wgsl: -------------------------------------------------------------------------------- 1 | [[block]] struct UintArray { 2 | vals : array; 3 | }; 4 | [[block]] struct Carry { 5 | in : u32; 6 | out : u32; 7 | }; 8 | 9 | [[group(0), binding(0)]] var vals : UintArray; 10 | [[group(0), binding(1)]] var carry : Carry; 11 | 12 | var chunk : array; 13 | 14 | [[stage(compute), workgroup_size(BLOCK_SIZE / 2.0)]] 15 | fn main([[builtin(global_invocation_id)]] global_id : vec3, 16 | [[builtin(workgroup_id)]] workgroup_id : vec3, 17 | [[builtin(local_invocation_id)]] local_id : vec3) { 18 | chunk[2 * local_id.x] = vals.vals[2 * global_id.x]; 19 | chunk[2 * local_id.x + 1] = vals.vals[2 * global_id.x + 1]; 20 | 21 | var offs : u32 = 1; 22 | // Reduce step up tree 23 | for (var d : i32 = BLOCK_SIZE >> 1; d > 0; d = d >> 1) { 24 | workgroupBarrier(); 25 | if (local_id.x < d) { 26 | var a : u32 = offs * (2 * local_id.x + 1) - 1; 27 | var b : u32 = offs * (2 * local_id.x + 2) - 1; 28 | chunk[b] = chunk[b] + chunk[a]; 29 | } 30 | offs = offs << 1; 31 | } 32 | 33 | if (local_id.x == 0) { 34 | carry.out = chunk[BLOCK_SIZE - 1] + carry.in; 35 | chunk[BLOCK_SIZE - 1] = 0; 36 | } 37 | 38 | // Sweep down the tree to finish the scan 39 | for (var d : i32 = 1; d < BLOCK_SIZE; d = d << 1) { 40 | offs = offs >> 1; 41 | workgroupBarrier(); 42 | if (local_id.x < d) { 43 | var a : u32 = offs * (2 * local_id.x + 1) - 1; 44 | var b : u32 = offs * (2 * local_id.x + 2) - 1; 45 | let tmp : u32 = chunk[a]; 46 | chunk[a] = chunk[b]; 47 | chunk[b] = chunk[b] + tmp; 48 | } 49 | } 50 | 51 | workgroupBarrier(); 52 | vals.vals[2 * global_id.x] = chunk[2 * local_id.x] + carry.in; 53 | vals.vals[2 * global_id.x + 1] = chunk[2 * local_id.x + 1] + carry.in; 54 | } -------------------------------------------------------------------------------- /shaders/block_prefix_sum.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | // See https://www.eecs.umich.edu/courses/eecs570/hw/parprefix.pdf 4 | // Compute the prefix sum over the results from each block, this no longer 5 | // writes out the block sums since we're scanning on the block sums 6 | 7 | layout(local_size_x = BLOCK_SIZE / 2) in; 8 | 9 | layout(set = 0, binding = 0, std430) buffer Data { 10 | uint vals[]; 11 | }; 12 | 13 | layout(set = 0, binding = 1, std430) buffer CarryInOut { 14 | uint carry_in; 15 | uint carry_out; 16 | }; 17 | 18 | shared uint chunk[BLOCK_SIZE]; 19 | 20 | void main(void) { 21 | chunk[2 * gl_LocalInvocationID.x] = vals[2 * gl_GlobalInvocationID.x]; 22 | chunk[2 * gl_LocalInvocationID.x + 1] = vals[2 * gl_GlobalInvocationID.x + 1]; 23 | 24 | uint offs = 1; 25 | // Reduce step up tree 26 | for (int d = BLOCK_SIZE >> 1; d > 0; d = d >> 1) { 27 | barrier(); 28 | if (gl_LocalInvocationID.x < d) { 29 | uint a = offs * (2 * gl_LocalInvocationID.x + 1) - 1; 30 | uint b = offs * (2 * gl_LocalInvocationID.x + 2) - 1; 31 | chunk[b] += chunk[a]; 32 | } 33 | offs = offs << 1; 34 | } 35 | 36 | if (gl_LocalInvocationID.x == 0) { 37 | carry_out = chunk[BLOCK_SIZE - 1] + carry_in; 38 | chunk[BLOCK_SIZE - 1] = 0; 39 | } 40 | 41 | // Sweep down the tree to finish the scan 42 | for (int d = 1; d < BLOCK_SIZE; d = d << 1) { 43 | offs = offs >> 1; 44 | barrier(); 45 | if (gl_LocalInvocationID.x < d) { 46 | uint a = offs * (2 * gl_LocalInvocationID.x + 1) - 1; 47 | uint b = offs * (2 * gl_LocalInvocationID.x + 2) - 1; 48 | const uint tmp = chunk[a]; 49 | chunk[a] = chunk[b]; 50 | chunk[b] += tmp; 51 | } 52 | } 53 | 54 | barrier(); 55 | vals[2 * gl_GlobalInvocationID.x] = chunk[2 * gl_LocalInvocationID.x] + carry_in; 56 | vals[2 * gl_GlobalInvocationID.x + 1] = chunk[2 * gl_LocalInvocationID.x + 1] + carry_in; 57 | } 58 | 59 | -------------------------------------------------------------------------------- /shaders/compute_initial_rays_frag.wgsl: -------------------------------------------------------------------------------- 1 | //include util.wgsl 2 | // Fragment shader 3 | [[block]] struct RayInfos { 4 | rays : array; 5 | }; 6 | [[block]] struct VolumeParams { 7 | volume_dims : vec4; 8 | padded_dims : vec4; 9 | volume_scale : vec4; 10 | max_bits : u32; 11 | isovalue : f32; 12 | image_width : u32; 13 | }; 14 | 15 | [[group(0), binding(1)]] var ray_info: RayInfos; 16 | [[group(0), binding(2)]] var volume_params : VolumeParams; 17 | 18 | fn intersect_box(orig : vec3, dir : vec3, box_min : vec3, box_max : vec3) -> vec2 { 19 | let inv_dir : vec3 = 1.0 / dir; 20 | let tmin_tmp : vec3 = (box_min - orig) * inv_dir; 21 | let tmax_tmp : vec3 = (box_max - orig) * inv_dir; 22 | var tmin : vec3 = min(tmin_tmp, tmax_tmp); 23 | var tmax : vec3 = max(tmin_tmp, tmax_tmp); 24 | var t0 : f32 = max(tmin.x, max(tmin.y, tmin.z)); 25 | var t1 : f32 = min(tmax.x, min(tmax.y, tmax.z)); 26 | return vec2(t0, t1); 27 | } 28 | 29 | [[stage(fragment)]] 30 | fn main( 31 | [[builtin(position)]] frag_coord : vec4, 32 | [[location(0)]] vray_dir : vec3, 33 | [[location(1), interpolate(flat)]] transformed_eye : vec3 34 | ) { 35 | var ray_dir : vec3 = normalize(vray_dir); 36 | 37 | // Transform the ray into the dual grid space and intersect with the dual grid bounds 38 | let vol_eye : vec3 = transformed_eye * volume_params.volume_dims.xyz - vec3(0.5); 39 | let grid_ray_dir : vec3 = normalize(ray_dir * volume_dims.xyz); 40 | 41 | var t_hit : vec2 = intersect_box(vol_eye, grid_ray_dir, vec3(0.0), volume_dims.xyz - 1.0); 42 | 43 | // We don't want to sample voxels behind the eye if it's 44 | // inside the volume, so keep the starting point at or in front 45 | // of the eye 46 | t_hit.x = max(t_hit.x, 0.0); 47 | 48 | let pixel : u32 = u32(frag_coord.x) + volume_params.image_width * u32(frag_coord.y); 49 | if (t_hit.x < t_hit.y) { 50 | ray_info.rays[pixel].ray_dir = ray_dir; 51 | ray_info.rays[pixel].block_id = UINT_MAX; 52 | ray_info.rays[pixel].t = t_hit.x; 53 | } 54 | } 55 | 56 | -------------------------------------------------------------------------------- /shaders/compute_voxel_range.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in; 4 | 5 | #include "zfp_decompress.comp" 6 | #include "util.glsl" 7 | 8 | // Note: This could really be done by the file format or server ahead of time, 9 | // instead of having the client do a preprocess to compute these ranges 10 | layout(set = 0, binding = 2, std430) buffer BlockInformation 11 | { 12 | vec2 block_ranges[]; 13 | }; 14 | 15 | layout(set = 1, binding = 0, std140) uniform BlockIDOffset 16 | { 17 | uint block_id_offset; 18 | }; 19 | 20 | layout(set = 2, binding = 0, std430) buffer VoxelInformation 21 | { 22 | vec2 voxel_ranges[]; 23 | }; 24 | 25 | void main(void) 26 | { 27 | const uint block_index = gl_GlobalInvocationID.x + block_id_offset * 32; 28 | const uint total_blocks = ((padded_dims.x * padded_dims.y) / 64) * padded_dims.z; 29 | const uvec3 n_blocks = padded_dims.xyz / uvec3(4); 30 | 31 | if (block_index >= total_blocks) { 32 | return; 33 | } 34 | 35 | uvec3 block_pos; 36 | block_pos.x = block_index % n_blocks.x; 37 | block_pos.y = (block_index / n_blocks.x) % n_blocks.y; 38 | block_pos.z = block_index / (n_blocks.x * n_blocks.y); 39 | 40 | // Value range for a block is its range combined with that of its neighbors to 41 | // the positive side 42 | vec2 cell_range = block_ranges[block_index]; 43 | for (int k = 0; k < 2; ++k) { 44 | for (int j = 0; j < 2; ++j) { 45 | for (int i = 0; i < 2; ++i) { 46 | const uvec3 neighbor = uvec3(i, j, k); 47 | const uvec3 coords = block_pos + neighbor; 48 | if (neighbor == uvec3(0) || any(lessThan(coords, uvec3(0))) 49 | || any(greaterThanEqual(coords, n_blocks))) 50 | { 51 | continue; 52 | } 53 | // TODO: use fcn 54 | const uint neighbor_id = coords.x + n_blocks.x * (coords.y + n_blocks.y * coords.z); 55 | cell_range.x = min(block_ranges[neighbor_id].x, cell_range.x); 56 | cell_range.y = max(block_ranges[neighbor_id].y, cell_range.y); 57 | } 58 | } 59 | } 60 | voxel_ranges[block_index] = cell_range; 61 | } 62 | -------------------------------------------------------------------------------- /shaders/load_block.wgsl: -------------------------------------------------------------------------------- 1 | let BLOCK_NUM_VOXELS : u32 = 64; 2 | 3 | // For ghost voxels, we only need those in the positive dir, 4 | // since verts for triangles ''behind'' us are the job of the neighboring 5 | // block to that side. So our max size is 5^3 elements if we have a ghost 6 | // layer on each side, which is rounded up to 128 7 | var volume_block : array; 8 | 9 | [[block]] struct VolumeParams { 10 | volume_dims : vec4; 11 | padded_dims : vec4; 12 | volume_scale : vec4; 13 | max_bits : u32; 14 | isovalue : f32; 15 | image_width : u32; 16 | }; 17 | [[block]] struct FloatArray { 18 | vals : array; 19 | }; 20 | [[block]] struct IntArray { 21 | vals : array; 22 | }; 23 | 24 | [[group(0), binding(0)]] var volume_params : VolumeParams; 25 | [[group(0), binding(1)]] var decompressed : FloatArray; 26 | // Cached item slots in the cache 27 | // this is lruCache.cachedItemSlots 28 | [[group(0), binding(2)]] var block_locations : IntArray; 29 | 30 | let index_to_vertex : array, 8> = array>( 31 | vec3(0, 0, 0), // v000 = 0 32 | vec3(1, 0, 0), // v100 = 1 33 | vec3(0, 1, 0), // v010 = 2 34 | vec3(1, 1, 0), // v110 = 3 35 | vec3(0, 0, 1), // v001 = 4 36 | vec3(1, 0, 1), // v101 = 5 37 | vec3(0, 1, 1), // v011 = 6 38 | vec3(1, 1, 1) // v111 = 7 39 | ); 40 | 41 | fn ray_id_to_pos(id : u32) -> vec2 { 42 | return vec2(id % volume_params.image_width, id / volume_params.image_width); 43 | } 44 | 45 | fn block_id_to_pos(id : u32) -> vec3 { 46 | var n_blocks : vec3 = volume_params.padded_dims.xyz / vec3(4); 47 | return vec3(id % n_blocks.x, 48 | (id / n_blocks.x) % n_blocks.y, 49 | id / (n_blocks.x * n_blocks.y)); 50 | } 51 | 52 | fn compute_block_id(block_pos : vec3) -> u32 { 53 | var n_blocks : vec3 = padded_dims.xyz / vec3(4); 54 | return block_pos.x + n_blocks.x * (block_pos.y + n_blocks.y * block_pos.z); 55 | } 56 | 57 | fn voxel_id_to_voxel(id : u32) -> vec3 { 58 | return vec3(id % 4, (id / 4) % 4, id / 16); 59 | } 60 | 61 | fn compute_voxel_id(voxel_pos : vec3, block_dims : vec3) -> u32 { 62 | return voxel_pos.x + block_dims.x * (voxel_pos.y + block_dims.y * voxel_pos.z); 63 | } 64 | 65 | fn compute_vertex_values(voxel_pos : vec3, block_dims : vec3, values : array, value_range : vec2) { 66 | 67 | } -------------------------------------------------------------------------------- /shaders/compute_coarse_cell_range.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in; 4 | 5 | #include "util.glsl" 6 | 7 | layout(set = 0, binding = 0, std140) uniform VolumeParams 8 | { 9 | uvec4 volume_dims; 10 | uvec4 padded_dims; 11 | vec4 volume_scale; 12 | uint max_bits; 13 | float isovalue; 14 | uint image_width; 15 | }; 16 | 17 | layout(set = 0, binding = 1, std140) uniform BlockIDOffset 18 | { 19 | uint block_id_offset; 20 | }; 21 | 22 | // This is the brick value range + its neighbors from 23 | // the compute_voxel_range.comp step 24 | layout(set = 0, binding = 2, std430) buffer BrickInformation 25 | { 26 | vec2 voxel_ranges[]; 27 | }; 28 | 29 | layout(set = 0, binding = 3, std430) buffer CoarseCellRange 30 | { 31 | vec2 coarse_cell_ranges[]; 32 | }; 33 | 34 | 35 | void main(void) 36 | { 37 | const uint coarse_cell_idx = gl_GlobalInvocationID.x + block_id_offset * 32; 38 | const uint total_coarse_cells = ((padded_dims.x * padded_dims.y) / (16 * 16 * 16)) * padded_dims.z ; 39 | 40 | if (coarse_cell_idx >= total_coarse_cells) { 41 | return; 42 | } 43 | 44 | const uvec3 n_blocks = padded_dims.xyz / uvec3(4); 45 | const uvec3 n_cells = uvec3(ceil(vec3(n_blocks) / vec3(4))); 46 | 47 | uvec3 cell_pos; 48 | cell_pos.x = coarse_cell_idx % n_cells.x; 49 | cell_pos.y = (coarse_cell_idx / n_cells.x) % n_cells.y; 50 | cell_pos.z = coarse_cell_idx / (n_cells.x * n_cells.y); 51 | 52 | const uvec3 block_pos = cell_pos * 4; 53 | // Loop through the bricks within this cell and merge their range to compute the range 54 | // for the coarse cell 55 | uint block_idx = block_pos.x + n_blocks.x * (block_pos.y + n_blocks.y * block_pos.z); 56 | vec2 coarse_cell_range = voxel_ranges[block_idx]; 57 | for (int k = 0; k < 4; ++k) { 58 | for (int j = 0; j < 4; ++j) { 59 | for (int i = 0; i < 4; ++i) { 60 | const uvec3 offs = uvec3(i, j, k); 61 | const uvec3 coords = block_pos + offs; 62 | if (any(greaterThanEqual(coords, n_blocks))) { 63 | continue; 64 | } 65 | uint cur_block_idx = coords.x + n_blocks.x * (coords.y + n_blocks.y * coords.z); 66 | coarse_cell_range.x = min(voxel_ranges[cur_block_idx].x, coarse_cell_range.x); 67 | coarse_cell_range.y = max(voxel_ranges[cur_block_idx].y, coarse_cell_range.y); 68 | } 69 | } 70 | } 71 | coarse_cell_ranges[coarse_cell_idx] = coarse_cell_range; 72 | } 73 | 74 | -------------------------------------------------------------------------------- /shaders/zfp_compute_block_range.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in; 4 | 5 | #include "zfp_decompress.comp" 6 | #include "util.glsl" 7 | 8 | // Note: This could really be done by the file format or server ahead of time, 9 | // instead of having the client do a preprocess to compute these ranges 10 | layout(set = 0, binding = 2, std430) buffer BlockInformation 11 | { 12 | vec2 block_ranges[]; 13 | }; 14 | 15 | layout(set = 1, binding = 0, std140) uniform BlockIDOffset 16 | { 17 | uint block_id_offset; 18 | }; 19 | 20 | const uvec3 index_to_vertex[8] = { 21 | uvec3(0, 0, 0), // v000 = 0 22 | uvec3(1, 0, 0), // v100 = 1 23 | uvec3(0, 1, 0), // v010 = 2 24 | uvec3(1, 1, 0), // v110 = 3 25 | uvec3(0, 0, 1), // v001 = 4 26 | uvec3(1, 0, 1), // v101 = 5 27 | uvec3(0, 1, 1), // v011 = 6 28 | uvec3(1, 1, 1) // v111 = 7 29 | }; 30 | 31 | void main(void) 32 | { 33 | const uint block_index = gl_GlobalInvocationID.x + block_id_offset * 32; 34 | const uint total_blocks = ((padded_dims.x * padded_dims.y) / 64) * padded_dims.z; 35 | 36 | if (block_index >= total_blocks) { 37 | return; 38 | } 39 | 40 | BlockReader reader = create_block_reader(block_index); 41 | float decompressed_block[ZFP_BLOCK_SIZE]; 42 | decompress_block(reader, decompressed_block); 43 | 44 | // No support for strided volumes 45 | const uvec3 stride = uvec3(1, volume_dims.x, volume_dims.x * volume_dims.y); 46 | 47 | uvec3 nblocks; 48 | nblocks.x = padded_dims.x >> 2; 49 | nblocks.y = padded_dims.y >> 2; 50 | nblocks.z = padded_dims.z >> 2; 51 | 52 | uvec3 block; 53 | block.x = (block_index % nblocks.x) * 4; 54 | block.y = ((block_index / nblocks.x) % nblocks.y) * 4; 55 | block.z = (block_index / (nblocks.x * nblocks.y)) * 4; 56 | 57 | vec2 block_range = vec2(1e20f, -1e20f); 58 | bvec3 partial = greaterThan(block + 4, volume_dims.xyz); 59 | uvec3 partial_size = uvec3(partial.x ? volume_dims.x - block.x : 4, 60 | partial.y ? volume_dims.y - block.y : 4, 61 | partial.z ? volume_dims.z - block.z : 4); 62 | 63 | for (uint z = 0; z < partial_size.z; ++z) { 64 | for (uint y = 0; y < partial_size.y; ++y) { 65 | for (uint x = 0; x < partial_size.x; ++x) { 66 | block_range.x = min(block_range.x, decompressed_block[16 * z + 4 * y + x]); 67 | block_range.y = max(block_range.y, decompressed_block[16 * z + 4 * y + x]); 68 | } 69 | } 70 | } 71 | block_ranges[block_index] = block_range; 72 | } 73 | -------------------------------------------------------------------------------- /js/util.js: -------------------------------------------------------------------------------- 1 | // Generate the work group ID offset buffer and the dynamic offset buffer to use for chunking 2 | // up a large compute dispatch. The start of the push constants data will be: 3 | // { 4 | // u32: global work group id offset 5 | // u32: totalWorkGroups 6 | // ...: up to 248 bytes additional data (if any) from the pushConstants parameter, 7 | // passed as an ArrayBuffer or TypedArray 8 | // } 9 | // ID offset (u32), 10 | function buildPushConstantsBuffer(device, totalWorkGroups, pushConstants) 11 | { 12 | var dynamicOffsets = []; 13 | var dispatchSizes = []; 14 | 15 | var numDynamicOffsets = 16 | Math.ceil(totalWorkGroups / device.limits.maxComputeWorkgroupsPerDimension); 17 | var idOffsetsBuffer = device.createBuffer({ 18 | size: 256 * numDynamicOffsets, 19 | usage: GPUBufferUsage.UNIFORM, 20 | mappedAtCreation: true, 21 | }); 22 | { 23 | var pushConstantsView = null; 24 | if (pushConstants) { 25 | var pc = pushConstants; 26 | if (pushConstants.buffer) { 27 | pc = pushConstants.buffer; 28 | } 29 | if (pc.byteLength > 248) { 30 | console.log(`Error: push constants can be at most 248 bytes`); 31 | } 32 | pushConstantsView = new Uint8Array(pc); 33 | } 34 | var mapping = idOffsetsBuffer.getMappedRange(); 35 | for (var i = 0; i < numDynamicOffsets; ++i) { 36 | dynamicOffsets.push(i * 256); 37 | 38 | if (i + 1 < numDynamicOffsets) { 39 | dispatchSizes.push(device.limits.maxComputeWorkgroupsPerDimension); 40 | } else { 41 | dispatchSizes.push(totalWorkGroups % 42 | device.limits.maxComputeWorkgroupsPerDimension); 43 | } 44 | 45 | // Write the push constants data 46 | var u32view = new Uint32Array(mapping, i * 256, 2); 47 | u32view[0] = device.limits.maxComputeWorkgroupsPerDimension * i; 48 | u32view[1] = totalWorkGroups; 49 | 50 | // Copy in any additional push constants data if provided 51 | if (pushConstantsView) { 52 | var u8view = new Uint8Array(mapping, i * 256 + 8, 248); 53 | u8view.set(pushConstantsView); 54 | } 55 | } 56 | idOffsetsBuffer.unmap(); 57 | } 58 | dynamicOffsets = new Uint32Array(dynamicOffsets); 59 | 60 | return { 61 | nOffsets: numDynamicOffsets, 62 | gpuBuffer: idOffsetsBuffer, 63 | dynamicOffsets: dynamicOffsets, 64 | dispatchSizes: dispatchSizes, 65 | }; 66 | } 67 | 68 | -------------------------------------------------------------------------------- /shaders/depth_composite.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | #include "util.glsl" 4 | 5 | layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in; 6 | 7 | layout(set = 0, binding = 0, std140) uniform ViewParams 8 | { 9 | mat4 proj_view; 10 | vec4 eye_pos; 11 | vec4 eye_dir; 12 | float near_plane; 13 | uint current_pass_index; 14 | uint speculation_count; 15 | }; 16 | 17 | layout(set = 0, binding = 1, std430) buffer RayIDs 18 | { 19 | uint ray_ids[]; 20 | }; 21 | 22 | layout(set = 0, binding = 2, std430) buffer RayRGBZ 23 | { 24 | vec2 ray_rgbz[]; 25 | }; 26 | 27 | uniform layout(set = 0, binding = 3, rgba8) writeonly image2D render_target; 28 | 29 | layout(set = 0, binding = 4, std140) uniform VolumeParams 30 | { 31 | uvec4 volume_dims; 32 | uvec4 padded_dims; 33 | vec4 volume_scale; 34 | uint max_bits; 35 | float isovalue; 36 | uint image_width; 37 | }; 38 | 39 | layout(set = 1, binding = 0, std430) buffer RayInformation 40 | { 41 | RayInfo rays[]; 42 | }; 43 | 44 | void main(void) 45 | { 46 | if (gl_GlobalInvocationID.x >= image_width) { 47 | return; 48 | } 49 | uint ray_index; 50 | uint spec_index; 51 | if (speculation_count > 1) { 52 | spec_index = 53 | (gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * image_width) * speculation_count; 54 | ray_index = ray_ids[spec_index]; 55 | } else { 56 | ray_index = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * image_width; 57 | spec_index = ray_index; 58 | } 59 | ivec2 pixel_coords = ivec2(ray_index % image_width, ray_index / image_width); 60 | vec4 color = vec4(0, 0, 0, FLT_MAX); 61 | for (int i = 0; i < speculation_count; i++) { 62 | if (ray_rgbz[spec_index + i].y < color.w) { 63 | // Unpack the ray color 64 | color.xyz = unpack_color(floatBitsToInt(ray_rgbz[spec_index + i].x)); 65 | color.w = ray_rgbz[spec_index + i].y; 66 | } 67 | } 68 | if (color.a != FLT_MAX) { 69 | #ifdef DRAW_FOG 70 | const vec3 volume_translation = vec3(0.f) - volume_scale.xyz * 0.5f; 71 | vec3 vol_eye = (eye_pos.xyz - volume_translation) / volume_scale.xyz; 72 | vol_eye = vol_eye * volume_dims.xyz - vec3(0.5f); 73 | vec2 t_hit = 74 | intersect_box(vol_eye, rays[ray_index].ray_dir, vec3(0.f), volume_dims.xyz - 1.f); 75 | t_hit.x = max(t_hit.x, 0.f); 76 | 77 | const float fog_start = t_hit.x + 0.1f * length(volume_dims.xyz); 78 | float fog = exp(-1.f * max((color.w - fog_start) / (t_hit.y - t_hit.x), 0.f)); 79 | color.rgb = fog * color.rgb + (1.f - fog) * vec3(1.f); 80 | #endif 81 | imageStore(render_target, pixel_coords, vec4(color.rgb, 1.f)); 82 | } 83 | // Terminate rays that were partially speculated and may not have hit anything 84 | // This could also be done in macro traverse 85 | if (rays[ray_index].t == -FLT_MAX) { 86 | rays[ray_index].t = FLT_MAX; 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /shaders/merge_sorted_chunks.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | #include "sort_data.comp" 4 | 5 | layout(set = 1, binding = 0, std430) buffer InputKeys 6 | { 7 | uint input_keys[]; 8 | }; 9 | 10 | layout(set = 1, binding = 1, std430) buffer InputValues 11 | { 12 | uint input_values[]; 13 | }; 14 | 15 | layout(set = 1, binding = 2, std430) buffer OutputKeys 16 | { 17 | uint output_keys[]; 18 | }; 19 | 20 | layout(set = 1, binding = 3, std430) buffer OutputValues 21 | { 22 | uint output_values[]; 23 | }; 24 | 25 | layout(set = 2, binding = 0, std140) uniform NumWorkGroups 26 | { 27 | uint work_groups_x; 28 | }; 29 | 30 | uint upper_bound(uint start, uint count, uint element) 31 | { 32 | while (count > 0) { 33 | uint i = start + count / 2; 34 | if (element >= input_keys[i]) { 35 | start = i + 1; 36 | count -= count / 2 + 1; 37 | } else { 38 | count = count / 2; 39 | } 40 | } 41 | return start; 42 | } 43 | 44 | uint lower_bound(uint start, uint count, uint element) 45 | { 46 | while (count > 0) { 47 | uint i = start + count / 2; 48 | if (input_keys[i] < element) { 49 | start = i + 1; 50 | count -= count / 2 + 1; 51 | } else { 52 | count = count / 2; 53 | } 54 | } 55 | return start; 56 | } 57 | 58 | void main(void) 59 | { 60 | // Compute the merge chunk size, which is based on the number of work groups and input data size 61 | const uint aligned_size = next_pow2(uint(ceil(float(size) / SORT_CHUNK_SIZE))) * SORT_CHUNK_SIZE; 62 | const uint merge_output_size = aligned_size / work_groups_x; 63 | const uint merge_chunk_size = merge_output_size / 2; 64 | 65 | // Load the first set of elements to merge 66 | const uint offs = gl_WorkGroupID.x * merge_output_size; 67 | 68 | // Each work group merges two chunks, each thread is responsible for 69 | // two elements in the chunks, which it merges into the sorted output 70 | // Loop through and merge each SORT_CHUNK_SIZE group of elements from merge_chunk_size 71 | for (uint i = 0; i < merge_chunk_size / SORT_CHUNK_SIZE; ++i) { 72 | const uint a_in = offs + i * SORT_CHUNK_SIZE + gl_LocalInvocationID.x; 73 | const uint b_in = offs + merge_chunk_size + i * SORT_CHUNK_SIZE + gl_LocalInvocationID.x; 74 | const uint base_idx = gl_LocalInvocationID.x + i * SORT_CHUNK_SIZE; 75 | // Could be done better, but short on time 76 | // Upper bound in b 77 | const uint a_loc = base_idx 78 | + upper_bound(offs + merge_chunk_size, merge_chunk_size, input_keys[a_in]) 79 | - merge_chunk_size; 80 | // Lower bound in a 81 | const uint b_loc = base_idx + lower_bound(offs, merge_chunk_size, input_keys[b_in]); 82 | 83 | output_keys[a_loc] = input_keys[a_in]; 84 | output_values[a_loc] = input_values[a_in]; 85 | 86 | output_keys[b_loc] = input_keys[b_in]; 87 | output_values[b_loc] = input_values[b_in]; 88 | } 89 | } 90 | 91 | -------------------------------------------------------------------------------- /shaders/mark_block_active.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | #include "util.glsl" 4 | 5 | layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; 6 | 7 | layout(set = 0, binding = 0, std140) uniform VolumeParams 8 | { 9 | uvec4 volume_dims; 10 | uvec4 padded_dims; 11 | vec4 volume_scale; 12 | uint max_bits; 13 | float isovalue; 14 | uint image_width; 15 | }; 16 | 17 | layout(set = 0, binding = 1, std140) uniform LOD 18 | { 19 | uint LOD_threshold; 20 | }; 21 | 22 | layout(set = 0, binding = 2, std140) uniform ViewParams 23 | { 24 | mat4 proj_view; 25 | vec4 eye_pos; 26 | vec4 eye_dir; 27 | float near_plane; 28 | uint current_pass_index; 29 | }; 30 | 31 | layout(set = 0, binding = 3, std430) buffer BlockActive 32 | { 33 | uint block_active[]; 34 | }; 35 | 36 | layout(set = 0, binding = 5, std430) buffer RayInformation 37 | { 38 | RayInfo rays[]; 39 | }; 40 | 41 | layout(set = 0, binding = 6, std430) buffer BlockVisible 42 | { 43 | uint block_visible[]; 44 | }; 45 | 46 | layout(set = 0, binding = 7, std430) buffer RayBlockIDs 47 | { 48 | uint block_ids[]; 49 | }; 50 | 51 | uniform layout(set = 1, binding = 0, rgba8) writeonly image2D render_target; 52 | 53 | uvec3 block_id_to_pos(uint id) 54 | { 55 | uvec3 n_blocks = padded_dims.xyz / uvec3(4); 56 | return uvec3(id % n_blocks.x, 57 | (id / n_blocks.x) % n_blocks.y, 58 | id / (n_blocks.x * n_blocks.y)); 59 | } 60 | 61 | uint compute_block_id(uvec3 block_pos) 62 | { 63 | uvec3 n_blocks = padded_dims.xyz / uvec3(4); 64 | return block_pos.x + n_blocks.x * (block_pos.y + n_blocks.y * block_pos.z); 65 | } 66 | 67 | void main(void) 68 | { 69 | uint ray_index = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * image_width; 70 | if (rays[ray_index].t == FLT_MAX) { 71 | return; 72 | } 73 | 74 | const uvec3 n_blocks = padded_dims.xyz / uvec3(4); 75 | const uint block_id = block_ids[ray_index]; 76 | if (block_id == UINT_MAX) { 77 | return; 78 | } 79 | const uvec3 block_pos = block_id_to_pos(block_id); 80 | 81 | // May need to become an atomic or? I don't think it should 82 | // be an issue but let's see 83 | block_visible[block_id] = 1; 84 | 85 | // Count this ray for the block 86 | uint num_rays = atomicAdd(block_num_rays[block_id], 1) + 1; 87 | block_active[block_id] = 1; 88 | 89 | // Mark this ray's block's neighbors to the positive side as active 90 | // These blocks must be decompressed for neighbor data, but this ray 91 | // doesn't need to process them. 92 | if (num_rays == 1 && block_active[block_id] == 1) { 93 | for (int k = 0; k < 2; ++k) { 94 | for (int j = 0; j < 2; ++j) { 95 | for (int i = 0; i < 2; ++i) { 96 | const uvec3 neighbor = uvec3(i, j, k); 97 | const uvec3 coords = block_pos + neighbor; 98 | if (neighbor == uvec3(0) || any(lessThan(coords, uvec3(0))) 99 | || any(greaterThanEqual(coords, n_blocks))) 100 | { 101 | continue; 102 | } 103 | const uint neighbor_id = compute_block_id(coords); 104 | // May need to become an atomic or? I don't think it should 105 | // be an issue but let's see 106 | block_active[neighbor_id] = 1; 107 | } 108 | } 109 | } 110 | } 111 | } 112 | 113 | -------------------------------------------------------------------------------- /shaders/embed_shaders.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | import os 5 | import subprocess 6 | 7 | if len(sys.argv) < 3: 8 | print("Usage ") 9 | 10 | glslc = sys.argv[1] 11 | tint = sys.argv[2] 12 | 13 | output = "embedded_shaders.js" 14 | shaders = [ 15 | "prefix_sum.comp", 16 | "block_prefix_sum.comp", 17 | "add_block_sums.comp", 18 | "stream_compact.comp", 19 | "stream_compact_data.comp", 20 | "compute_initial_rays.vert", 21 | "compute_initial_rays.frag", 22 | "zfp_compute_block_range.comp", 23 | "zfp_decompress_block.comp", 24 | "lru_cache_init.comp", 25 | "lru_cache_mark_new_items.comp", 26 | "lru_cache_update.comp", 27 | "lru_copy_available_slot_age.comp", 28 | "lru_cache_age_slots.comp", 29 | "lru_cache_extract_slot_available.comp", 30 | "macro_traverse.comp", 31 | "radix_sort_chunk.comp", 32 | "reverse_buffer.comp", 33 | "merge_sorted_chunks.comp", 34 | "display_render.vert", 35 | "display_render.frag", 36 | "reset_rays.comp", 37 | # Must be manually ported to WGSL since it uses atomics 38 | # Tint cannot translate atomics from SPV -> WGSL due to 39 | # - https://bugs.chromium.org/p/tint/issues/detail?id=1207 40 | # - https://bugs.chromium.org/p/tint/issues/detail?id=1441 41 | #"mark_block_active.comp", 42 | "reset_block_active.comp", 43 | "reset_block_num_rays.comp", 44 | "debug_view_rays_per_block.comp", 45 | "write_ray_and_block_id.comp", 46 | "combine_block_information.comp", 47 | "raytrace_active_block.comp", 48 | "compute_voxel_range.comp", 49 | "compute_coarse_cell_range.comp", 50 | "reset_speculative_ids.comp", 51 | "depth_composite.comp", 52 | "mark_ray_active.comp" 53 | ] 54 | 55 | try: 56 | os.stat(output) 57 | os.remove(output) 58 | except: 59 | pass 60 | 61 | block_size = 512 62 | sort_chunk_size = 64 63 | draw_fog = False 64 | if "-fog" in sys.argv: 65 | draw_fog = True 66 | 67 | compiled_shaders = "" 68 | for shader in shaders: 69 | fname, ext = os.path.splitext(os.path.basename(shader)) 70 | var_name = "{}_{}_spv".format(fname, ext[1:]) 71 | print("Embedding {} as {}".format(shader, var_name)) 72 | args = [ 73 | "python3", 74 | "compile_shader.py", 75 | glslc, 76 | tint, 77 | shader, 78 | var_name, 79 | "-DBLOCK_SIZE={}".format(block_size), 80 | "-DSORT_CHUNK_SIZE={}".format(sort_chunk_size), 81 | ] 82 | if draw_fog: 83 | args.append("-DDRAW_FOG=1") 84 | compiled_shaders += subprocess.check_output(args).decode("utf-8") 85 | 86 | # TODO: Read and append hand port of mark_block_active.comp to embed the WGSL shader 87 | manual_wgsl_shaders = [ 88 | "mark_block_active.wgsl", 89 | "count_block_rays.wgsl" 90 | ] 91 | # TODO: Would also need to do a find/replace for the defines if we manually port any 92 | # shaders that use BLOCK_SIZE or SORT_CHUNK_SIZE but I don't think it'll be needed 93 | for shader in manual_wgsl_shaders: 94 | with open(shader, "r") as f: 95 | fname, ext = os.path.splitext(os.path.basename(shader)) 96 | var_name = "{}_{}_spv".format(fname, ext[1:]) 97 | print("Embedding manually WGSL'd shader {} as {}".format(shader, var_name)) 98 | compiled_shaders += "const " + var_name + " = `" + "".join(f.readlines()) + "`;\n"; 99 | 100 | with open(output, "w") as f: 101 | f.write("const ScanBlockSize = {};\n".format(block_size)) 102 | f.write("const SortChunkSize = {};\n".format(sort_chunk_size)) 103 | f.write(compiled_shaders) 104 | 105 | -------------------------------------------------------------------------------- /shaders/radix_sort_chunk.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | #include "sort_data.comp" 4 | 5 | layout(set = 1, binding = 0, std430) buffer Keys 6 | { 7 | uint keys[]; 8 | }; 9 | 10 | layout(set = 1, binding = 1, std430) buffer Values 11 | { 12 | uint values[]; 13 | }; 14 | 15 | shared uint key_buf[SORT_CHUNK_SIZE]; 16 | shared uint sorted_key_buf[SORT_CHUNK_SIZE]; 17 | shared uint scratch[SORT_CHUNK_SIZE]; 18 | shared uint total_false; 19 | 20 | shared uint val_buf[SORT_CHUNK_SIZE]; 21 | shared uint sorted_val_buf[SORT_CHUNK_SIZE]; 22 | 23 | void main(void) 24 | { 25 | // Also use the radix step to pad arrays out with UINT_MAX 26 | if (gl_GlobalInvocationID.x < size) { 27 | key_buf[gl_LocalInvocationID.x] = keys[gl_GlobalInvocationID.x]; 28 | val_buf[gl_LocalInvocationID.x] = values[gl_GlobalInvocationID.x]; 29 | } else { 30 | // Pad any missing data with uint max, which will be sorted out to the end 31 | key_buf[gl_LocalInvocationID.x] = UINT_MAX; 32 | val_buf[gl_LocalInvocationID.x] = UINT_MAX; 33 | } 34 | 35 | // Sort each bit, from LSB to MSB 36 | for (uint i = 0; i < 32; ++i) { 37 | barrier(); 38 | const uint mask = 1 << i; 39 | scratch[gl_LocalInvocationID.x] = (key_buf[gl_LocalInvocationID.x] & mask) != 0 ? 0 : 1; 40 | 41 | // A bit annoying to copy this code around, but we can't have unsized array 42 | // parameters to functions in GLSL 43 | uint offs = 1; 44 | // Reduce step up tree 45 | for (int d = SORT_CHUNK_SIZE >> 1; d > 0; d = d >> 1) { 46 | barrier(); 47 | if (gl_LocalInvocationID.x < d) { 48 | uint a = offs * (2 * gl_LocalInvocationID.x + 1) - 1; 49 | uint b = offs * (2 * gl_LocalInvocationID.x + 2) - 1; 50 | scratch[b] += scratch[a]; 51 | } 52 | offs = offs << 1; 53 | } 54 | 55 | if (gl_LocalInvocationID.x == 0) { 56 | total_false = scratch[SORT_CHUNK_SIZE - 1]; 57 | scratch[SORT_CHUNK_SIZE - 1] = 0; 58 | } 59 | 60 | // Sweep down the tree to finish the scan 61 | for (int d = 1; d < SORT_CHUNK_SIZE; d = d << 1) { 62 | offs = offs >> 1; 63 | barrier(); 64 | if (gl_LocalInvocationID.x < d) { 65 | uint a = offs * (2 * gl_LocalInvocationID.x + 1) - 1; 66 | uint b = offs * (2 * gl_LocalInvocationID.x + 2) - 1; 67 | const uint tmp = scratch[a]; 68 | scratch[a] = scratch[b]; 69 | scratch[b] += tmp; 70 | } 71 | } 72 | barrier(); 73 | 74 | // Now scatter the elements to their destinations 75 | const uint f = scratch[gl_LocalInvocationID.x]; 76 | const uint t = gl_LocalInvocationID.x - f + total_false; 77 | if ((key_buf[gl_LocalInvocationID.x] & mask) != 0) { 78 | sorted_key_buf[t] = key_buf[gl_LocalInvocationID.x]; 79 | sorted_val_buf[t] = val_buf[gl_LocalInvocationID.x]; 80 | } else { 81 | sorted_key_buf[f] = key_buf[gl_LocalInvocationID.x]; 82 | sorted_val_buf[f] = val_buf[gl_LocalInvocationID.x]; 83 | } 84 | barrier(); 85 | 86 | // Copy the sorted set to the buf for the next pass 87 | key_buf[gl_LocalInvocationID.x] = sorted_key_buf[gl_LocalInvocationID.x]; 88 | val_buf[gl_LocalInvocationID.x] = sorted_val_buf[gl_LocalInvocationID.x]; 89 | } 90 | barrier(); 91 | 92 | // Write back the sorted buffer 93 | keys[gl_GlobalInvocationID.x] = key_buf[gl_LocalInvocationID.x]; 94 | values[gl_GlobalInvocationID.x] = val_buf[gl_LocalInvocationID.x]; 95 | } 96 | 97 | -------------------------------------------------------------------------------- /js/run_benchmark.js: -------------------------------------------------------------------------------- 1 | const benchmarkIterations = 100; 2 | const cameraIterations = 10; 3 | 4 | var RandomIsovalueBenchmark = function(isovalueSlider, range) { 5 | this.name = "random"; 6 | this.iteration = 0; 7 | this.isovalueSlider = isovalueSlider; 8 | this.range = range; 9 | this.numIterations = benchmarkIterations; 10 | }; 11 | 12 | RandomIsovalueBenchmark.prototype.run = function() { 13 | if (this.iteration == this.numIterations) { 14 | return false; 15 | } 16 | var range = this.range[1] - this.range[0]; 17 | this.isovalueSlider.value = Math.random() * range + this.range[0]; 18 | this.iteration += 1; 19 | return true; 20 | }; 21 | 22 | RandomIsovalueBenchmark.prototype.reset = function() { 23 | this.iteration = 0; 24 | }; 25 | 26 | var SweepIsovalueBenchmark = function(isovalueSlider, range, sweepUp) { 27 | this.iteration = 0; 28 | this.isovalueSlider = isovalueSlider; 29 | this.range = range; 30 | this.sweepUp = sweepUp; 31 | this.numIterations = benchmarkIterations; 32 | if (this.sweepUp) { 33 | this.name = "sweepUp"; 34 | this.currentValue = range[0]; 35 | } else { 36 | this.name = "sweepDown"; 37 | this.currentValue = range[1]; 38 | } 39 | }; 40 | 41 | SweepIsovalueBenchmark.prototype.run = function() { 42 | if (this.iteration == this.numIterations) { 43 | return false; 44 | } 45 | var step = (this.range[1] - this.range[0]) / benchmarkIterations; 46 | if (this.sweepUp) { 47 | this.currentValue += step; 48 | } else { 49 | this.currentValue -= step; 50 | } 51 | this.isovalueSlider.value = this.currentValue; 52 | this.iteration += 1; 53 | return true; 54 | }; 55 | 56 | // ManualSingleBenchmark just re-runs whatever current isovalue we have picked 57 | var ManualSingleBenchmark = function() { 58 | this.done = false; 59 | this.name = "manualSingle"; 60 | }; 61 | 62 | ManualSingleBenchmark.prototype.run = function() { 63 | if (this.done) { 64 | return false; 65 | } 66 | this.done = true; 67 | return true; 68 | }; 69 | 70 | ManualSingleBenchmark.prototype.reset = function() { 71 | this.done = false; 72 | }; 73 | 74 | SweepIsovalueBenchmark.prototype.reset = function() { 75 | this.iteration = 0; 76 | }; 77 | 78 | var CameraOrbitBenchmark = function(radius) { 79 | this.iteration = 0; 80 | this.name = "cameraOrbit"; 81 | this.numIterations = cameraIterations; 82 | this.radius = radius; 83 | }; 84 | 85 | CameraOrbitBenchmark.prototype.run = function() { 86 | if (this.iteration == this.numIterations) { 87 | return false; 88 | } 89 | const increment = Math.PI * (3.0 - Math.sqrt(5.0)); 90 | const offset = 2.0 / this.numIterations; 91 | 92 | var y = ((this.iteration * offset) - 1.0) + offset / 2.0; 93 | const r = Math.sqrt(1.0 - y * y); 94 | const phi = this.iteration * increment; 95 | var x = r * Math.cos(phi); 96 | var z = r * Math.sin(phi); 97 | 98 | x *= this.radius; 99 | y *= this.radius; 100 | z *= this.radius; 101 | 102 | this.currentPoint = vec3.set(vec3.create(), x, y, z); 103 | this.iteration += 1; 104 | return true; 105 | }; 106 | 107 | CameraOrbitBenchmark.prototype.reset = function() { 108 | this.iteration = 0; 109 | }; 110 | 111 | var NestedBenchmark = function(outerLoop, innerLoop) { 112 | this.name = outerLoop.name + "-" + innerLoop.name; 113 | this.outerLoop = outerLoop; 114 | this.innerLoop = innerLoop; 115 | this.iteration = 0; 116 | }; 117 | 118 | NestedBenchmark.prototype.run = function() { 119 | if (this.iteration == 0) { 120 | this.outerLoop.run(); 121 | } 122 | if (!this.innerLoop.run()) { 123 | if (!this.outerLoop.run()) { 124 | return false; 125 | } 126 | this.innerLoop.reset(); 127 | this.innerLoop.run(); 128 | } 129 | this.iteration += 1; 130 | return true; 131 | } 132 | 133 | -------------------------------------------------------------------------------- /shaders/mark_block_active.wgsl: -------------------------------------------------------------------------------- 1 | /* 2 | // #include "util.glsl" 3 | */ 4 | 5 | const UINT_MAX: u32 = 0xffffffffu; 6 | const FLT_MAX: f32 = 3.402823466e+38; 7 | 8 | alias float2 = vec2; 9 | alias float3 = vec3; 10 | alias float4 = vec4; 11 | alias uint2 = vec2; 12 | alias uint3 = vec3; 13 | alias uint4 = vec4; 14 | 15 | struct RayInfo { 16 | ray_dir: float3, 17 | // block_id: u32, 18 | t: f32, 19 | // t_next: f32, 20 | // For WGSL we need to pad the struct up to 32 bytes so it matches 21 | // the GLSL struct alignment/padding rules we had before 22 | // @size(8) pad: f32 23 | }; 24 | 25 | /* 26 | layout(local_size_x = 8, local_size_y = 1, local_size_z = 1) in; 27 | */ 28 | /* 29 | layout(set = 0, binding = 0, std140) uniform VolumeParams 30 | { 31 | uvec4 volume_dims; 32 | uvec4 padded_dims; 33 | vec4 volume_scale; 34 | uint max_bits; 35 | float isovalue; 36 | uint image_width; 37 | }; 38 | */ 39 | struct VolumeParams { 40 | volume_dims: uint4, 41 | padded_dims: uint4, 42 | volume_scale: float4, 43 | max_bits: u32, 44 | isovalue: f32, 45 | image_width: u32, 46 | } 47 | 48 | @group(0) @binding(0) var volume_params : VolumeParams; 49 | 50 | /* 51 | layout(set = 0, binding = 1, std140) uniform LOD 52 | { 53 | uint LOD_threshold; 54 | }; 55 | */ 56 | struct LOD { 57 | threshold: f32, 58 | } 59 | @group(0) @binding(1) var lod_threshold : LOD; 60 | 61 | /* 62 | layout(set = 0, binding = 2, std140) uniform ViewParams 63 | { 64 | mat4 proj_view; 65 | vec4 eye_pos; 66 | vec4 eye_dir; 67 | float near_plane; 68 | uint current_pass_index; 69 | }; 70 | */ 71 | struct ViewParams { 72 | proj_view: mat4x4, 73 | eye_pos: float4, 74 | eye_dir: float4, 75 | near_plane : f32, 76 | current_pass_index: u32, 77 | } 78 | @group(0) @binding(2) var view_params : ViewParams; 79 | 80 | /* 81 | layout(set = 0, binding = 3, std430) buffer BlockActive 82 | { 83 | uint block_active[]; 84 | }; 85 | */ 86 | // TODO: Is this valid WGSL? Try compiling with Tint 87 | @group(0) @binding(3) var block_active : array; 88 | 89 | /* 90 | layout(set = 0, binding = 5, std430) buffer RayInformation 91 | { 92 | RayInfo rays[]; 93 | }; 94 | */ 95 | @group(0) @binding(4) var rays : array; 96 | 97 | /* 98 | layout(set = 0, binding = 6, std430) buffer BlockVisible 99 | { 100 | uint block_visible[]; 101 | }; 102 | */ 103 | @group(0) @binding(5) var block_visible : array>; 104 | @group(0) @binding(6) var block_ids : array; 105 | 106 | 107 | //uniform layout(set = 1, binding = 0, rgba8) writeonly image2D render_target; 108 | @group(1) @binding(0) var render_target : texture_storage_2d; 109 | 110 | fn block_id_to_pos(id: u32) -> uint3 { 111 | let n_blocks = volume_params.padded_dims.xyz / uint3(4u); 112 | return uint3(id % n_blocks.x, 113 | (id / n_blocks.x) % n_blocks.y, 114 | id / (n_blocks.x * n_blocks.y)); 115 | } 116 | 117 | fn compute_block_id(block_pos: uint3) -> u32 118 | { 119 | let n_blocks = volume_params.padded_dims.xyz / uint3(4u); 120 | return block_pos.x + n_blocks.x * (block_pos.y + n_blocks.y * block_pos.z); 121 | } 122 | 123 | @compute @workgroup_size(32, 1, 1) 124 | fn main(@builtin(global_invocation_id) g_invocation_id : vec3) { 125 | if (g_invocation_id.x >= volume_params.image_width) { 126 | return; 127 | } 128 | 129 | let ray_index = g_invocation_id.x + g_invocation_id.y * volume_params.image_width; 130 | 131 | let block_id = block_ids[ray_index]; 132 | if (block_id == UINT_MAX) { 133 | return; 134 | } 135 | let block_pos = block_id_to_pos(block_id); 136 | 137 | block_active[block_id] = 1u; 138 | //block_visible[block_id] = 1; 139 | let already_marked = atomicMax(&block_visible[block_id], 1u); 140 | 141 | // Count this ray for the block (this is now done in count_block_rays.wgsl 142 | //uint num_rays = atomicAdd(block_num_rays[block_id], uint(1)) + 1; 143 | //let num_rays = atomicAdd(&block_num_rays[block_id], 1u) + 1u; 144 | 145 | // Mark this ray's block's neighbors to the positive side as active 146 | // These blocks must be decompressed for neighbor data, but this ray 147 | // doesn't need to process them. 148 | if (already_marked == 0) { 149 | let n_blocks = volume_params.padded_dims.xyz / uint3(4u); 150 | for (var k = 0u; k < 2u; k += 1u) { 151 | for (var j = 0u; j < 2u; j += 1u) { 152 | for (var i = 0u; i < 2u; i += 1u) { 153 | let neighbor = uint3(i, j, k); 154 | let coords = block_pos + neighbor; 155 | if (all(neighbor == uint3(0u)) || any(coords < uint3(0u)) || any(coords >= n_blocks)) { 156 | continue; 157 | } 158 | let neighbor_id = compute_block_id(coords); 159 | block_active[neighbor_id] = 1u; 160 | } 161 | } 162 | } 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /js/FileSaver.js: -------------------------------------------------------------------------------- 1 | /* 2 | * FileSaver.js 3 | * A saveAs() FileSaver implementation. 4 | * 5 | * By Eli Grey, http://eligrey.com 6 | * 7 | * License : https://github.com/eligrey/FileSaver.js/blob/master/LICENSE.md (MIT) 8 | * source : http://purl.eligrey.com/github/FileSaver.js 9 | */ 10 | 11 | 12 | // The one and only way of getting global scope in all environments 13 | // https://stackoverflow.com/q/3277182/1008999 14 | var _global = typeof window === 'object' && window.window === window 15 | ? window : typeof self === 'object' && self.self === self 16 | ? self : typeof global === 'object' && global.global === global 17 | ? global 18 | : this 19 | 20 | function bom (blob, opts) { 21 | if (typeof opts === 'undefined') opts = { autoBom: false } 22 | else if (typeof opts !== 'object') { 23 | console.warn('Depricated: Expected third argument to be a object') 24 | opts = { autoBom: !opts } 25 | } 26 | 27 | // prepend BOM for UTF-8 XML and text/* types (including HTML) 28 | // note: your browser will automatically convert UTF-16 U+FEFF to EF BB BF 29 | if (opts.autoBom && /^\s*(?:text\/\S*|application\/xml|\S*\/\S*\+xml)\s*;.*charset\s*=\s*utf-8/i.test(blob.type)) { 30 | return new Blob([String.fromCharCode(0xFEFF), blob], { type: blob.type }) 31 | } 32 | return blob 33 | } 34 | 35 | function download (url, name, opts) { 36 | var xhr = new XMLHttpRequest() 37 | xhr.open('GET', url) 38 | xhr.responseType = 'blob' 39 | xhr.onload = function () { 40 | saveAs(xhr.response, name, opts) 41 | } 42 | xhr.onerror = function () { 43 | console.error('could not download file') 44 | } 45 | xhr.send() 46 | } 47 | 48 | function corsEnabled (url) { 49 | var xhr = new XMLHttpRequest() 50 | // use sync to avoid popup blocker 51 | xhr.open('HEAD', url, false) 52 | xhr.send() 53 | return xhr.status >= 200 && xhr.status <= 299 54 | } 55 | 56 | // `a.click()` doesn't work for all browsers (#465) 57 | function click(node) { 58 | try { 59 | node.dispatchEvent(new MouseEvent('click')) 60 | } catch (e) { 61 | var evt = document.createEvent('MouseEvents') 62 | evt.initMouseEvent('click', true, true, window, 0, 0, 0, 80, 63 | 20, false, false, false, false, 0, null) 64 | node.dispatchEvent(evt) 65 | } 66 | } 67 | 68 | var saveAs = _global.saveAs || 69 | // probably in some web worker 70 | (typeof window !== 'object' || window !== _global) 71 | ? function saveAs () { /* noop */ } 72 | 73 | // Use download attribute first if possible (#193 Lumia mobile) 74 | : 'download' in HTMLAnchorElement.prototype 75 | ? function saveAs (blob, name, opts) { 76 | var URL = _global.URL || _global.webkitURL 77 | var a = document.createElement('a') 78 | name = name || blob.name || 'download' 79 | 80 | a.download = name 81 | a.rel = 'noopener' // tabnabbing 82 | 83 | // TODO: detect chrome extensions & packaged apps 84 | // a.target = '_blank' 85 | 86 | if (typeof blob === 'string') { 87 | // Support regular links 88 | a.href = blob 89 | if (a.origin !== location.origin) { 90 | corsEnabled(a.href) 91 | ? download(blob, name, opts) 92 | : click(a, a.target = '_blank') 93 | } else { 94 | click(a) 95 | } 96 | } else { 97 | // Support blobs 98 | a.href = URL.createObjectURL(blob) 99 | setTimeout(function () { URL.revokeObjectURL(a.href) }, 4E4) // 40s 100 | setTimeout(function () { click(a) }, 0) 101 | } 102 | } 103 | 104 | // Use msSaveOrOpenBlob as a second approach 105 | : 'msSaveOrOpenBlob' in navigator 106 | ? function saveAs (blob, name, opts) { 107 | name = name || blob.name || 'download' 108 | 109 | if (typeof blob === 'string') { 110 | if (corsEnabled(blob)) { 111 | download(blob, name, opts) 112 | } else { 113 | var a = document.createElement('a') 114 | a.href = blob 115 | a.target = '_blank' 116 | setTimeout(function () { click(a) }) 117 | } 118 | } else { 119 | navigator.msSaveOrOpenBlob(bom(blob, opts), name) 120 | } 121 | } 122 | 123 | // Fallback to using FileReader and a popup 124 | : function saveAs (blob, name, opts, popup) { 125 | // Open a popup immediately do go around popup blocker 126 | // Mostly only avalible on user interaction and the fileReader is async so... 127 | popup = popup || open('', '_blank') 128 | if (popup) { 129 | popup.document.title = 130 | popup.document.body.innerText = 'downloading...' 131 | } 132 | 133 | if (typeof blob === 'string') return download(blob, name, opts) 134 | 135 | var force = blob.type === 'application/octet-stream' 136 | var isSafari = /constructor/i.test(_global.HTMLElement) || _global.safari 137 | var isChromeIOS = /CriOS\/[\d]+/.test(navigator.userAgent) 138 | 139 | if ((isChromeIOS || (force && isSafari)) && typeof FileReader === 'object') { 140 | // Safari doesn't allow downloading of blob urls 141 | var reader = new FileReader() 142 | reader.onloadend = function () { 143 | var url = reader.result 144 | url = isChromeIOS ? url : url.replace(/^data:[^;]*;/, 'data:attachment/file;') 145 | if (popup) popup.location.href = url 146 | else location = url 147 | popup = null // reverse-tabnabbing #460 148 | } 149 | reader.readAsDataURL(blob) 150 | } else { 151 | var URL = _global.URL || _global.webkitURL 152 | var url = URL.createObjectURL(blob) 153 | if (popup) popup.location = url 154 | else location.href = url 155 | popup = null // reverse-tabnabbing #460 156 | setTimeout(function () { URL.revokeObjectURL(url) }, 4E4) // 40s 157 | } 158 | } 159 | 160 | _global.saveAs = saveAs.saveAs = saveAs 161 | 162 | if (typeof module !== 'undefined') { 163 | module.exports = saveAs; 164 | } 165 | 166 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: JavaScript 3 | AccessModifierOffset: -4 4 | AlignAfterOpenBracket: Align 5 | AlignConsecutiveAssignments: false 6 | AlignConsecutiveDeclarations: false 7 | AlignEscapedNewlinesLeft: true 8 | AlignOperands: true 9 | AlignTrailingComments: true 10 | AllowAllParametersOfDeclarationOnNextLine: true 11 | AllowShortBlocksOnASingleLine: false 12 | AllowShortCaseLabelsOnASingleLine: false 13 | AllowShortFunctionsOnASingleLine: Empty 14 | AllowShortIfStatementsOnASingleLine: false 15 | AllowShortLoopsOnASingleLine: false 16 | AlwaysBreakAfterDefinitionReturnType: None 17 | AlwaysBreakAfterReturnType: None 18 | AlwaysBreakBeforeMultilineStrings: true 19 | AlwaysBreakTemplateDeclarations: true 20 | BinPackArguments: false 21 | BinPackParameters: false 22 | BraceWrapping: 23 | AfterClass: false 24 | AfterControlStatement: false 25 | AfterEnum: false 26 | AfterFunction: true 27 | AfterNamespace: false 28 | AfterStruct: false 29 | AfterUnion: false 30 | AfterExternBlock: false 31 | BeforeCatch: false 32 | BeforeElse: false 33 | IndentBraces: false 34 | BreakBeforeBinaryOperators: None 35 | BreakBeforeBraces: Custom 36 | BreakBeforeTernaryOperators: true 37 | BreakConstructorInitializersBeforeComma: false 38 | BreakStringLiterals: true 39 | ColumnLimit: 95 40 | CommentPragmas: '^ IWYU pragma:' 41 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 42 | Cpp11BracedListStyle: true 43 | DerivePointerAlignment: false 44 | DisableFormat: false 45 | ExperimentalAutoDetectBinPacking: false 46 | ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] 47 | IncludeCategories: 48 | - Regex: '^<[^\.]*>' 49 | Priority: 1 50 | - Regex: '^<.*\.h>' 51 | Priority: 2 52 | - Regex: '.*' 53 | Priority: 3 54 | SortIncludes: true 55 | ConstructorInitializerIndentWidth: 4 56 | ContinuationIndentWidth: 4 57 | IndentCaseLabels: false 58 | IndentWidth: 4 59 | IndentWrappedFunctionNames: false 60 | KeepEmptyLinesAtTheStartOfBlocks: false 61 | MacroBlockBegin: '' 62 | MacroBlockEnd: '' 63 | MaxEmptyLinesToKeep: 1 64 | NamespaceIndentation: Inner 65 | PenaltyBreakBeforeFirstCallParameter: 1 66 | PenaltyBreakComment: 300 67 | PenaltyBreakFirstLessLess: 120 68 | PenaltyBreakString: 1000 69 | PenaltyExcessCharacter: 1000000 70 | PenaltyReturnTypeOnItsOwnLine: 200 71 | PointerAlignment: Right 72 | ReflowComments: true 73 | SpaceAfterCStyleCast: false 74 | SpaceAfterTemplateKeyword: true 75 | SpaceBeforeAssignmentOperators: true 76 | SpaceBeforeParens: ControlStatements 77 | SpaceInEmptyParentheses: false 78 | SpacesBeforeTrailingComments: 2 79 | SpacesInAngles: false 80 | SpacesInContainerLiterals: false 81 | SpacesInCStyleCastParentheses: false 82 | SpacesInParentheses: false 83 | SpacesInSquareBrackets: false 84 | Standard: Cpp11 85 | TabWidth: 4 86 | FixNamespaceComments: false 87 | UseTab: Never 88 | ... 89 | --- 90 | Language: ObjC 91 | AccessModifierOffset: -4 92 | AlignAfterOpenBracket: Align 93 | AlignConsecutiveAssignments: false 94 | AlignConsecutiveDeclarations: false 95 | AlignEscapedNewlinesLeft: true 96 | AlignOperands: true 97 | AlignTrailingComments: true 98 | AllowAllParametersOfDeclarationOnNextLine: true 99 | AllowShortBlocksOnASingleLine: false 100 | AllowShortCaseLabelsOnASingleLine: false 101 | AllowShortFunctionsOnASingleLine: Empty 102 | AllowShortIfStatementsOnASingleLine: false 103 | AllowShortLoopsOnASingleLine: false 104 | AlwaysBreakAfterDefinitionReturnType: None 105 | AlwaysBreakAfterReturnType: None 106 | AlwaysBreakBeforeMultilineStrings: true 107 | AlwaysBreakTemplateDeclarations: true 108 | BinPackArguments: false 109 | BinPackParameters: false 110 | BraceWrapping: 111 | AfterClass: false 112 | AfterControlStatement: false 113 | AfterEnum: false 114 | AfterFunction: true 115 | AfterNamespace: false 116 | AfterStruct: false 117 | AfterUnion: false 118 | AfterExternBlock: false 119 | BeforeCatch: false 120 | BeforeElse: false 121 | IndentBraces: false 122 | BreakBeforeBinaryOperators: None 123 | BreakBeforeBraces: Custom 124 | BreakBeforeTernaryOperators: true 125 | BreakConstructorInitializersBeforeComma: false 126 | BreakStringLiterals: true 127 | ColumnLimit: 95 128 | CommentPragmas: '^ IWYU pragma:' 129 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 130 | Cpp11BracedListStyle: true 131 | DerivePointerAlignment: false 132 | DisableFormat: false 133 | ExperimentalAutoDetectBinPacking: false 134 | ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] 135 | IncludeCategories: 136 | - Regex: '^<[^\.]*>' 137 | Priority: 1 138 | - Regex: '^<.*\.h>' 139 | Priority: 2 140 | - Regex: '.*' 141 | Priority: 3 142 | SortIncludes: true 143 | ConstructorInitializerIndentWidth: 4 144 | ContinuationIndentWidth: 4 145 | IndentCaseLabels: false 146 | IndentWidth: 4 147 | IndentWrappedFunctionNames: false 148 | KeepEmptyLinesAtTheStartOfBlocks: false 149 | MacroBlockBegin: '' 150 | MacroBlockEnd: '' 151 | MaxEmptyLinesToKeep: 1 152 | NamespaceIndentation: Inner 153 | PenaltyBreakBeforeFirstCallParameter: 1 154 | PenaltyBreakComment: 300 155 | PenaltyBreakFirstLessLess: 120 156 | PenaltyBreakString: 1000 157 | PenaltyExcessCharacter: 1000000 158 | PenaltyReturnTypeOnItsOwnLine: 200 159 | PointerAlignment: Right 160 | ReflowComments: true 161 | SpaceAfterCStyleCast: false 162 | SpaceAfterTemplateKeyword: true 163 | SpaceBeforeAssignmentOperators: true 164 | SpaceBeforeParens: ControlStatements 165 | SpaceInEmptyParentheses: false 166 | SpacesBeforeTrailingComments: 2 167 | SpacesInAngles: false 168 | SpacesInContainerLiterals: false 169 | SpacesInCStyleCastParentheses: false 170 | SpacesInParentheses: false 171 | SpacesInSquareBrackets: false 172 | Standard: Cpp11 173 | TabWidth: 4 174 | FixNamespaceComments: false 175 | UseTab: Never 176 | ... 177 | 178 | 179 | 180 | -------------------------------------------------------------------------------- /js/volumes.js: -------------------------------------------------------------------------------- 1 | var requestRecompute = false; 2 | var requestBenchmark = null; 3 | var saveScreenshot = false; 4 | 5 | var datasets = { 6 | plane_x: { 7 | compressionRate: 2, 8 | name: "plane_x_32x32x32_float32.gen.crate2", 9 | range: [0, 1], 10 | scale: [1, 1, 1], 11 | }, 12 | sphere: { 13 | compressionRate: 2, 14 | name: "sphere_32x32x32_float32.gen.crate2", 15 | range: [0, 1], 16 | scale: [1, 1, 1], 17 | }, 18 | quarter_sphere: { 19 | compressionRate: 2, 20 | name: "quarter_sphere_32x32x32_float32.gen.crate2", 21 | range: [0, 1], 22 | scale: [1, 1, 1], 23 | }, 24 | wavelet: { 25 | compressionRate: 2, 26 | name: "wavelet_32x32x32_float32.gen.crate2", 27 | range: [-3, 3], 28 | scale: [1, 1, 1], 29 | }, 30 | fuel: { 31 | compressionRate: 4, 32 | name: "fuel_64x64x64_uint8.raw.crate4", 33 | range: [10, 255], 34 | scale: [1, 1, 1], 35 | }, 36 | aneurism: { 37 | compressionRate: 4, 38 | name: "vertebra_512x512x512_uint16.raw.crate4", 39 | range: [550, 2100], 40 | scale: [1, 1, 1], 41 | }, 42 | duct: { 43 | compressionRate: 2, 44 | name: "duct_193x194x1000_float32.raw.crate2", 45 | range: [0, 4], 46 | scale: [1, 1, 1000 / 193], 47 | step: 4 / 100 48 | }, 49 | stagbeetle: { 50 | compressionRate: 2, 51 | name: "stag_beetle_832x832x494_uint16.raw.crate2", 52 | range: [100, 4096], 53 | scale: [1, 1, 1], 54 | step: 1.0 / 4096, 55 | }, 56 | foot: { 57 | compressionRate: 2, 58 | name: "foot_256x256x256_uint8.raw.crate2", 59 | range: [10, 255], 60 | scale: [1, 1, 1], 61 | }, 62 | backpack: { 63 | compressionRate: 4, 64 | name: "backpack_512x512x373_uint16.raw.crate4", 65 | range: [200, 4000], 66 | scale: [1, 1, 1], 67 | step: 1.0 / 3800.0, 68 | }, 69 | // For benchmarks: 70 | tacc_turbulence: { 71 | compressionRate: 2, 72 | name: "tacc_turbulence_256x256x256_float32.raw.crate2", 73 | range: [1, 10], 74 | scale: [1, 1, 1], 75 | step: 10 / 100, 76 | }, 77 | skull: { 78 | compressionRate: 2, 79 | name: "skull_256x256x256_uint8.raw.crate2", 80 | range: [10, 255], 81 | scale: [1, 1, 1], 82 | }, 83 | magnetic: { 84 | compressionRate: 4, 85 | name: "magnetic_reconnection_512x512x512_float32.raw.crate4", 86 | range: [0.1, 3.5], 87 | scale: [1, 1, 1], 88 | step: 1.0 / 8192, 89 | }, 90 | kingsnake: { 91 | compressionRate: 2, 92 | name: "kingsnake_1024x1024x795_uint8.raw.crate2", 93 | range: [100, 150], 94 | scale: [1, 1, 1], 95 | }, 96 | chameleon: { 97 | compressionRate: 2, 98 | name: "chameleon_1024x1024x1080_uint16.raw.crate2", 99 | range: [11000, 52000], 100 | scale: [1, 1, 1], 101 | step: 1.0 / 8192, 102 | }, 103 | beechnut: { 104 | compressionRate: 1, 105 | name: "beechnut_1024x1024x1546_uint16.raw.crate1", 106 | range: [13200, 17000], 107 | scale: [1, 1, 1], 108 | step: (17000 - 13200) / 100.0, 109 | }, 110 | miranda: { 111 | compressionRate: 4, 112 | name: "miranda_1024x1024x1024_float32.raw.crate4", 113 | range: [1.05, 2.9], 114 | scale: [1, 1, 1], 115 | step: 1.0 / 8192, 116 | }, 117 | jicf_q: { 118 | compressionRate: 2, 119 | name: "jicf_q_1408x1080x1100_float32.raw.crate2", 120 | range: [-15, 15], 121 | scale: [1, 1, 1], 122 | step: 30 / 100 123 | }, 124 | truss: { 125 | compressionRate: 2, 126 | name: "synthetic_truss_with_five_defects_1200x1200x1200_float32.raw.crate2", 127 | range: [0, 0.01], 128 | scale: [1, 1, 1], 129 | }, 130 | dns_large: { 131 | compressionRate: 2, 132 | name: "dns_1920x1440x288_float64.raw.crate2", 133 | range: [0.75, 1.15], 134 | scale: [1, 1440 / 1920, 288 / 1920], 135 | step: 1.0 / 8192, 136 | }, 137 | richtmyer_meshkov: { 138 | compressionRate: 1, 139 | name: "richtmyer_meshkov_2048x2048x1920_uint8.raw.crate1", 140 | range: [40, 190], 141 | scale: [1, 1, 1920.0 / 2048.0], 142 | step: 1.0, 143 | }, 144 | }; 145 | 146 | var fileRegex = /(\w+)_(\d+)x(\d+)x(\d+)_(\w+)\.*/; 147 | 148 | var getVolumeDimensions = function(filename) { 149 | var m = filename.match(fileRegex); 150 | return [parseInt(m[2]), parseInt(m[3]), parseInt(m[4])]; 151 | }; 152 | 153 | function runBenchmark(benchmark) 154 | { 155 | requestBenchmark = benchmark; 156 | } 157 | 158 | function saveScreenShotButton() 159 | { 160 | saveScreenshot = true; 161 | } 162 | 163 | // Assumes the input renderTarget and outCanvas have the same image dimensions 164 | async function takeScreenshot(device, name, renderTarget, imageBuffer, outCanvas) 165 | { 166 | var commandEncoder = device.createCommandEncoder(); 167 | commandEncoder.copyTextureToBuffer({texture: renderTarget}, 168 | {buffer: imageBuffer, bytesPerRow: outCanvas.width * 4}, 169 | [outCanvas.width, outCanvas.height, 1]); 170 | device.queue.submit([commandEncoder.finish()]); 171 | await device.queue.onSubmittedWorkDone(); 172 | 173 | await imageBuffer.mapAsync(GPUMapMode.READ); 174 | var imageReadbackArray = new Uint8ClampedArray(imageBuffer.getMappedRange()); 175 | 176 | var context = outCanvas.getContext('2d'); 177 | var imgData = context.createImageData(outCanvas.width, outCanvas.height); 178 | imgData.data.set(imageReadbackArray); 179 | context.putImageData(imgData, 0, 0); 180 | outCanvas.toBlob(function(b) { 181 | saveAs(b, `${name}.png`); 182 | }, "image/png"); 183 | 184 | imageBuffer.unmap(); 185 | } 186 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 10 | 11 | WebGPU Speculative Progressive Isosurface Raycaster 12 | 13 | 14 | 15 |

16 |

17 |

18 |

WebGPU Speculative Progressive Isosurface Raycaster

19 |

20 |

21 | 22 |

23 |

24 | Isovalue 25 | 26 |

27 |

28 | 33 | Resolution 34 |

35 |

36 | 37 | Enable Cache 38 |

39 |

40 | 41 | Output Images 42 |

43 |

44 | 45 | Record Active/Visible Blocks Stats 46 |

47 |

48 | 49 | Enable Speculation 50 |

51 |

52 |

53 |

See the paper for more details (link soon!), or checkout 54 | the code on Github! 55 |

56 |

57 |

58 |

59 |

60 |

61 |

62 |

63 |

64 |

65 |

66 |

67 |

68 |

69 |

70 |

71 |

72 |

73 |

74 |

75 |

76 |

77 |

78 |

79 |

80 | 81 | 83 | 86 | 89 | 92 | 95 | 98 |

99 |

100 |

101 |

102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /shaders/util.glsl: -------------------------------------------------------------------------------- 1 | #ifndef UTIL_GLSL 2 | #define UTIL_GLSL 3 | 4 | #define UINT_MAX uint(0xffffffff) 5 | #define FLT_MAX ( 3.402823466e+38f ) 6 | 7 | struct RayInfo { 8 | vec3 ray_dir; 9 | // uint block_id; 10 | float t; 11 | // float t_next; 12 | // NOTE: std430 layout rules dictate the struct alignment is that of its 13 | // largest member, which is the vec3 ray dir (whose alignment is same as vec4). 14 | // This results in the struct size rounding up to 32, since it has to start 15 | // on 16 byte boundaries. 16 | // So we have two free 4 byte values to use if needed. 17 | }; 18 | 19 | struct BlockInfo { 20 | uint id; 21 | uint ray_offset; 22 | uint num_rays; 23 | // Note: even if we remove this, the struct will still align to sizeof(vec4) 24 | uint lod; 25 | }; 26 | 27 | struct GridIterator { 28 | ivec3 grid_dims; 29 | ivec3 grid_step; 30 | vec3 t_delta; 31 | 32 | ivec3 cell; 33 | vec3 t_max; 34 | float t; 35 | }; 36 | 37 | // The state we save for saving/restoring the grid iterator state 38 | struct GridIteratorState { 39 | vec3 t_max; 40 | int cell_id; 41 | }; 42 | 43 | bool outside_grid(const vec3 p, const vec3 grid_dims) { 44 | return any(lessThan(p, vec3(0))) || any(greaterThanEqual(p, grid_dims)); 45 | } 46 | 47 | bool outside_dual_grid(const vec3 p, const vec3 grid_dims) { 48 | return any(lessThan(p, vec3(0))) || any(greaterThanEqual(p, grid_dims - vec3(1))); 49 | } 50 | 51 | bool outside_grid(const ivec3 p, const ivec3 grid_dims) { 52 | return any(lessThan(p, ivec3(0))) || any(greaterThanEqual(p, grid_dims)); 53 | } 54 | 55 | bool outside_dual_grid(const ivec3 p, const ivec3 grid_dims) { 56 | return any(lessThan(p, ivec3(0))) || any(greaterThanEqual(p, grid_dims - ivec3(1))); 57 | } 58 | 59 | // Initialize the grid traversal state. All positions/directions passed must be in the 60 | // grid coordinate system where a grid cell is 1^3 in size. 61 | GridIterator init_grid_iterator(vec3 ray_org, vec3 ray_dir, float t, ivec3 grid_dims) { 62 | GridIterator grid_iter; 63 | grid_iter.grid_dims = grid_dims; 64 | grid_iter.grid_step = ivec3(sign(ray_dir)); 65 | 66 | const vec3 inv_ray_dir = 1.0 / ray_dir; 67 | grid_iter.t_delta = abs(inv_ray_dir); 68 | 69 | vec3 p = (ray_org + t * ray_dir); 70 | p = clamp(p, vec3(0), vec3(grid_dims - 1)); 71 | vec3 cell = floor(p); 72 | const vec3 t_max_neg = (cell - ray_org) * inv_ray_dir; 73 | const vec3 t_max_pos = (cell + vec3(1) - ray_org) * inv_ray_dir; 74 | 75 | // Pick between positive/negative t_max based on the ray sign 76 | const bvec3 is_neg_dir = lessThan(ray_dir, vec3(0)); 77 | grid_iter.t_max = mix(t_max_pos, t_max_neg, is_neg_dir); 78 | 79 | grid_iter.cell = ivec3(cell); 80 | 81 | grid_iter.t = t; 82 | 83 | return grid_iter; 84 | } 85 | 86 | GridIterator restore_grid_iterator(vec3 ray_org, 87 | vec3 ray_dir, 88 | ivec3 grid_dims, 89 | in GridIteratorState state) 90 | { 91 | GridIterator grid_iter; 92 | grid_iter.grid_dims = grid_dims; 93 | grid_iter.grid_step = ivec3(sign(ray_dir)); 94 | 95 | const vec3 inv_ray_dir = 1.0 / ray_dir; 96 | grid_iter.t_delta = abs(inv_ray_dir); 97 | 98 | grid_iter.cell = ivec3(state.cell_id % grid_dims.x, 99 | (state.cell_id / grid_dims.x) % grid_dims.y, 100 | state.cell_id / (grid_dims.x * grid_dims.y)); 101 | grid_iter.t_max = state.t_max; 102 | // We don't really care about this value when restoring 103 | grid_iter.t = min(state.t_max.x, min(state.t_max.y, state.t_max.z)); 104 | 105 | return grid_iter; 106 | } 107 | 108 | // Get the current cell the iterator is in and its t interval. Returns false if the iterator is 109 | // outside the grid or the t interval is empty, indicating traversal should stop. 110 | bool grid_iterator_get_cell(inout GridIterator iter, out vec2 cell_t_range, out ivec3 cell_id) { 111 | if (outside_grid(iter.cell, iter.grid_dims)) { 112 | return false; 113 | } 114 | // Return the current cell range and ID to the caller 115 | cell_t_range.x = iter.t; 116 | cell_t_range.y = min(iter.t_max.x, min(iter.t_max.y, iter.t_max.z)); 117 | cell_id = iter.cell; 118 | if (cell_t_range.y < cell_t_range.x) { 119 | return false; 120 | } 121 | return true; 122 | } 123 | 124 | int grid_iterator_get_cell_id(in GridIterator iter) { 125 | return iter.cell.x + iter.grid_dims.x * (iter.cell.y + iter.grid_dims.y * iter.cell.z); 126 | } 127 | 128 | // Advance the iterator to the next cell in the grid. 129 | void grid_iterator_advance(inout GridIterator iter) { 130 | // Move the iterator to the next cell we'll traverse 131 | iter.t = min(iter.t_max.x, min(iter.t_max.y, iter.t_max.z)); 132 | if (iter.t == iter.t_max.x) { 133 | iter.cell.x += iter.grid_step.x; 134 | iter.t_max.x += iter.t_delta.x; 135 | } else if (iter.t == iter.t_max.y) { 136 | iter.cell.y += iter.grid_step.y; 137 | iter.t_max.y += iter.t_delta.y; 138 | } else { 139 | iter.cell.z += iter.grid_step.z; 140 | iter.t_max.z += iter.t_delta.z; 141 | } 142 | } 143 | 144 | vec2 intersect_box(vec3 orig, vec3 dir, const vec3 box_min, const vec3 box_max) { 145 | vec3 inv_dir = 1.0 / dir; 146 | vec3 tmin_tmp = (box_min - orig) * inv_dir; 147 | vec3 tmax_tmp = (box_max - orig) * inv_dir; 148 | vec3 tmin = min(tmin_tmp, tmax_tmp); 149 | vec3 tmax = max(tmin_tmp, tmax_tmp); 150 | float t0 = max(tmin.x, max(tmin.y, tmin.z)); 151 | float t1 = min(tmax.x, min(tmax.y, tmax.z)); 152 | return vec2(t0, t1); 153 | } 154 | 155 | // Pass a float color in [0,1] to pack 156 | int pack_color(vec3 rgb) { 157 | ivec3 rbg256 = clamp(ivec3(rgb * 255), ivec3(0), ivec3(255)); 158 | int c = 0; 159 | c |= (rbg256.x << 24) & 0xff000000; 160 | c |= (rbg256.y << 16) & 0x00ff0000; 161 | c |= (rbg256.z << 8) & 0x0000ff00; 162 | return c; 163 | } 164 | 165 | // Returns a float color in [0,1] 166 | vec3 unpack_color(int rgb8) { 167 | vec3 rgb = vec3(0); 168 | rgb.x = ((rgb8 >> 24) & 0x000000ff) / 255.0; 169 | rgb.y = ((rgb8 >> 16) & 0x000000ff) / 255.0; 170 | rgb.z = ((rgb8 >> 8) & 0x000000ff) / 255.0; 171 | return rgb; 172 | } 173 | 174 | #endif 175 | 176 | -------------------------------------------------------------------------------- /shaders/macro_traverse.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | #include "util.glsl" 4 | 5 | layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; 6 | 7 | layout(set = 0, binding = 0, std140) uniform VolumeParams 8 | { 9 | uvec4 volume_dims; 10 | uvec4 padded_dims; 11 | vec4 volume_scale; 12 | uint max_bits; 13 | float isovalue; 14 | uint image_width; 15 | }; 16 | 17 | layout(set = 0, binding = 1, std140) uniform ViewParams 18 | { 19 | mat4 proj_view; 20 | vec4 eye_pos; 21 | vec4 eye_dir; 22 | float near_plane; 23 | uint current_pass_index; 24 | uint speculation_count; 25 | }; 26 | 27 | layout(set = 0, binding = 2, std430) buffer RayInformation 28 | { 29 | RayInfo rays[]; 30 | }; 31 | 32 | layout(set = 0, binding = 4, std430) buffer GridIterState 33 | { 34 | // Each ray stores 2 iterator states, the coarse one followed by the fine one. 35 | GridIteratorState iterator_state[]; 36 | }; 37 | 38 | // speculativeRayIDBuffer 39 | layout(set = 0, binding = 5, std430) buffer RayIDs 40 | { 41 | uint ray_ids[]; 42 | }; 43 | 44 | // speculativeRayOffsetBuffer 45 | layout(set = 0, binding = 6, std430) buffer RayOffsets 46 | { 47 | uint ray_offsets[]; 48 | }; 49 | 50 | layout(set = 0, binding = 7, std430) buffer RayBlockIDs 51 | { 52 | uint block_ids[]; 53 | }; 54 | 55 | uniform layout(set = 0, binding = 3, rgba8) writeonly image2D render_target; 56 | 57 | layout(set = 1, binding = 0, std430) buffer VoxelInformation 58 | { 59 | vec2 voxel_ranges[]; 60 | }; 61 | 62 | layout(set = 1, binding = 1, std430) buffer CoarseCellRange 63 | { 64 | vec2 coarse_cell_ranges[]; 65 | }; 66 | 67 | vec4 block_id_to_color(const uvec3 block_id) { 68 | const vec3 n_blocks = padded_dims.xyz / uvec3(4); 69 | return vec4(vec3(block_id) / n_blocks, 1.0); 70 | } 71 | 72 | void main() { 73 | if (gl_GlobalInvocationID.x >= image_width) { 74 | return; 75 | } 76 | 77 | uint ray_index = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * image_width; 78 | 79 | if (rays[ray_index].t == FLT_MAX) { 80 | return; 81 | } 82 | 83 | // Fill speculated ray ID buffer 84 | if (speculation_count > 1) { 85 | for (int i = 0; i < speculation_count; i++) { 86 | ray_ids[ray_offsets[ray_index] * speculation_count + i] = ray_index; 87 | } 88 | } else { 89 | ray_ids[ray_index] = ray_index; 90 | ray_offsets[ray_index] = ray_index; 91 | } 92 | 93 | const uvec3 n_blocks = padded_dims.xyz / uvec3(4); 94 | const ivec3 macrogrid_dims = ivec3(n_blocks); 95 | 96 | // Coarse grid is 1/4 the size of the macrocell grid 97 | const ivec3 coarse_grid_dims = ivec3(ceil(vec3(macrogrid_dims) / vec3(4))); 98 | 99 | // Transform the ray into the dual grid space 100 | vec3 volume_translation = vec3(0) - volume_scale.xyz * 0.5; 101 | vec3 transformed_eye = (eye_pos.xyz - volume_translation) / volume_scale.xyz; 102 | const vec3 ray_org = transformed_eye * volume_dims.xyz - vec3(0.5); 103 | 104 | // Compute ray origin and dir on the macrocell grid 105 | // Note: ray dir is NOT normalized here, because the t values are relative 106 | // to the full volume grid. 107 | const vec3 macrocell_grid_org = ray_org * 0.25; 108 | const vec3 macrocell_grid_ray_dir = rays[ray_index].ray_dir * 0.25; 109 | 110 | // Compute ray origin and dir on the coarse cell grid 111 | const vec3 coarse_grid_org = macrocell_grid_org * 0.25; 112 | const vec3 coarse_grid_ray_dir = macrocell_grid_ray_dir * 0.25; 113 | 114 | bool first_coarse_iter = true; 115 | 116 | // Traverse the coarse grid 117 | GridIterator coarse_grid_iter; 118 | if (current_pass_index == 0) { 119 | coarse_grid_iter = init_grid_iterator(coarse_grid_org, 120 | coarse_grid_ray_dir, rays[ray_index].t, coarse_grid_dims); 121 | } else { 122 | coarse_grid_iter = restore_grid_iterator(coarse_grid_org, 123 | coarse_grid_ray_dir, coarse_grid_dims, iterator_state[ray_index * 2]); 124 | } 125 | 126 | ivec3 coarse_cell_id; 127 | vec2 coarse_cell_t_range; 128 | uint speculated = 0; 129 | while (grid_iterator_get_cell(coarse_grid_iter, coarse_cell_t_range, coarse_cell_id)) { 130 | const uint coarse_cell_index = coarse_cell_id.x 131 | + coarse_grid_dims.x * (coarse_cell_id.y + coarse_grid_dims.y * coarse_cell_id.z); 132 | vec2 coarse_cell_range = coarse_cell_ranges[coarse_cell_index]; 133 | // Skip all bricks in this coarse cell if we know they won't contain the isovalue 134 | if (isovalue < coarse_cell_range.x || isovalue > coarse_cell_range.y) { 135 | first_coarse_iter = false; 136 | grid_iterator_advance(coarse_grid_iter); 137 | continue; 138 | } 139 | 140 | const ivec3 coarse_grid_cell_org = coarse_cell_id * 4; 141 | const ivec3 macrocell_grid_dims = 142 | ivec3(min(coarse_grid_cell_org + vec3(4), macrogrid_dims) - coarse_grid_cell_org); 143 | // Traverse the macrocell grid within this coarse grid 144 | // We also translate the coarse grid cell to be at the origin for the grid iterator 145 | GridIterator grid_iter; 146 | if (current_pass_index == 0 || !first_coarse_iter) { 147 | grid_iter = init_grid_iterator(macrocell_grid_org - coarse_grid_cell_org, 148 | macrocell_grid_ray_dir, coarse_cell_t_range.x, macrocell_grid_dims); 149 | } else { 150 | grid_iter = restore_grid_iterator(macrocell_grid_org - coarse_grid_cell_org, 151 | macrocell_grid_ray_dir, macrocell_grid_dims, 152 | iterator_state[ray_index * 2 + 1]); 153 | grid_iterator_advance(grid_iter); 154 | } 155 | 156 | ivec3 cell_id; 157 | vec2 cell_t_range; 158 | while (grid_iterator_get_cell(grid_iter, cell_t_range, cell_id)) { 159 | // Value range for a block is it's range combined with that of its neighbors to 160 | // the positive side 161 | const uint block_index = coarse_grid_cell_org.x + cell_id.x 162 | + n_blocks.x * (coarse_grid_cell_org.y + cell_id.y 163 | + n_blocks.y * (coarse_grid_cell_org.z + cell_id.z)); 164 | const vec2 cell_range = voxel_ranges[block_index]; 165 | 166 | if (isovalue >= cell_range.x && isovalue <= cell_range.y) { 167 | block_ids[ray_offsets[ray_index] * speculation_count + speculated] = 168 | block_index; 169 | speculated++; 170 | 171 | // We only save the last iterator state to resume from so we can skip these writes 172 | // until we're on the last speculated ray 173 | if (speculated == speculation_count) { 174 | // Note: grid dims is redundant and an easy one to not store 175 | //save_grid_iterator(coarse_grid_iter, iterator_state[ray_index * 2]); 176 | //save_grid_iterator(grid_iter, iterator_state[ray_index * 2 + 1]); 177 | 178 | iterator_state[ray_index * 2].cell_id = 179 | grid_iterator_get_cell_id(coarse_grid_iter); 180 | iterator_state[ray_index * 2].t_max = coarse_grid_iter.t_max; 181 | // Writing it this way seems to result in some invalid code generated 182 | // for the SPV -> WGSL step so it fails to compile 183 | iterator_state[ray_index * 2 + 1].cell_id = 184 | grid_iterator_get_cell_id(grid_iter); 185 | iterator_state[ray_index * 2 + 1].t_max = grid_iter.t_max; 186 | return; 187 | } 188 | } 189 | grid_iterator_advance(grid_iter); 190 | } 191 | first_coarse_iter = false; 192 | grid_iterator_advance(coarse_grid_iter); 193 | } 194 | 195 | if (speculated == 0) { 196 | // If we didn't intersect this ray with any blocks (no speculated rays spawned), 197 | // then this ray has terminated. 198 | rays[ray_index].t = FLT_MAX; 199 | } else if (speculated < speculation_count) { 200 | // If we speculated some rays, then this ray is partially terminated. We need 201 | // to finish tracing the speculated rays, then can mark it complete in depth_composite 202 | rays[ray_index].t = -FLT_MAX; 203 | } 204 | } 205 | 206 | -------------------------------------------------------------------------------- /shaders/load_block.comp: -------------------------------------------------------------------------------- 1 | #define BLOCK_NUM_VOXELS 64 2 | 3 | // For ghost voxels, we only need those in the positive dir, 4 | // since verts for triangles ''behind'' us are the job of the neighboring 5 | // block to that side. So our max size is 5^3 elements if we have a ghost 6 | // layer on each side, which is rounded up to 128 7 | shared float volume_block[128]; 8 | 9 | layout(set = 0, binding = 0, std140) uniform VolumeParams 10 | { 11 | uvec4 volume_dims; 12 | uvec4 padded_dims; 13 | vec4 volume_scale; 14 | uint max_bits; 15 | float isovalue; 16 | uint image_width; 17 | }; 18 | 19 | layout(set = 0, binding = 1, std430) buffer Decompressed 20 | { 21 | float decompressed[]; 22 | }; 23 | 24 | layout(set = 0, binding = 2, std430) buffer BlockLocations 25 | { 26 | // Cached item slots in the cache 27 | // this is lruCache.cachedItemSlots 28 | int block_locations[]; 29 | }; 30 | 31 | const ivec3 index_to_vertex[8] = { 32 | ivec3(0, 0, 0), // v000 = 0 33 | ivec3(1, 0, 0), // v100 = 1 34 | ivec3(0, 1, 0), // v010 = 2 35 | ivec3(1, 1, 0), // v110 = 3 36 | ivec3(0, 0, 1), // v001 = 4 37 | ivec3(1, 0, 1), // v101 = 5 38 | ivec3(0, 1, 1), // v011 = 6 39 | ivec3(1, 1, 1) // v111 = 7 40 | }; 41 | 42 | uvec2 ray_id_to_pos(uint id) 43 | { 44 | return uvec2(id % image_width, id / image_width); 45 | } 46 | 47 | uvec3 block_id_to_pos(uint id) 48 | { 49 | uvec3 n_blocks = padded_dims.xyz / uvec3(4); 50 | return uvec3(id % n_blocks.x, 51 | (id / n_blocks.x) % n_blocks.y, 52 | id / (n_blocks.x * n_blocks.y)); 53 | } 54 | 55 | uint compute_block_id(uvec3 block_pos) 56 | { 57 | uvec3 n_blocks = padded_dims.xyz / uvec3(4); 58 | return block_pos.x + n_blocks.x * (block_pos.y + n_blocks.y * block_pos.z); 59 | } 60 | 61 | uvec3 voxel_id_to_voxel(uint id) 62 | { 63 | return uvec3(id % 4, (id / 4) % 4, id / 16); 64 | } 65 | 66 | uint compute_voxel_id(uvec3 voxel_pos, uvec3 block_dims) 67 | { 68 | return voxel_pos.x + block_dims.x * (voxel_pos.y + block_dims.y * voxel_pos.z); 69 | } 70 | 71 | void compute_vertex_values(uvec3 voxel_pos, uvec3 block_dims, out float values[8], out vec2 value_range) 72 | { 73 | value_range.x = 1e20f; 74 | value_range.y = -1e20f; 75 | for (int i = 0; i < 8; ++i) { 76 | const uvec3 v = index_to_vertex[i]; 77 | uint voxel = ((voxel_pos.z + v.z) * block_dims.y + voxel_pos.y + v.y) * block_dims.x 78 | + voxel_pos.x + v.x; 79 | values[i] = volume_block[voxel]; 80 | value_range.x = min(value_range.x, values[i]); 81 | value_range.y = max(value_range.y, values[i]); 82 | } 83 | } 84 | 85 | // Compute the dimensions of the block + its ghost voxels 86 | uvec3 compute_block_dims_with_ghost(const uvec3 block_pos) 87 | { 88 | const uvec3 n_blocks = padded_dims.xyz / uvec3(4); 89 | const uvec3[3] face_neighbors = { uvec3(1, 0, 0), uvec3(0, 1, 0), uvec3(0, 0, 1) }; 90 | const uvec3[3] edge_neighbors = { uvec3(1, 1, 0), uvec3(1, 0, 1), uvec3(0, 1, 1) }; 91 | 92 | uvec3 block_dims = uvec3(4); 93 | if (block_pos.x + 1 < n_blocks.x) { 94 | block_dims.x = 5; 95 | } 96 | if (block_pos.y + 1 < n_blocks.y) { 97 | block_dims.y = 5; 98 | } 99 | if (block_pos.z + 1 < n_blocks.z) { 100 | block_dims.z = 5; 101 | } 102 | 103 | if (block_dims == uvec3(5)) { 104 | const uint corner = compute_block_id(block_pos + uvec3(1)); 105 | if (block_locations[corner] == -1) { 106 | block_dims = uvec3(4); 107 | } 108 | } 109 | if (block_dims.xy == uvec2(5)) { 110 | const uint edge = compute_block_id(block_pos + edge_neighbors[0]); 111 | if (block_locations[edge] == -1) { 112 | block_dims.xy = uvec2(4); 113 | } 114 | } 115 | if (block_dims.xz == uvec2(5)) { 116 | const uint edge = compute_block_id(block_pos + edge_neighbors[1]); 117 | if (block_locations[edge] == -1) { 118 | block_dims.xz = uvec2(4); 119 | } 120 | } 121 | if (block_dims.yz == uvec2(5)) { 122 | const uint edge = compute_block_id(block_pos + edge_neighbors[2]); 123 | if (block_locations[edge] == -1) { 124 | block_dims.yz = uvec2(4); 125 | } 126 | } 127 | if (block_dims.x == 5) { 128 | const uint face = compute_block_id(block_pos + face_neighbors[0]); 129 | if (block_locations[face] == -1) { 130 | block_dims.x = 4; 131 | } 132 | } 133 | if (block_dims.y == 5) { 134 | const uint face = compute_block_id(block_pos + face_neighbors[1]); 135 | if (block_locations[face] == -1) { 136 | block_dims.y = 4; 137 | } 138 | } 139 | if (block_dims.z == 5) { 140 | const uint face = compute_block_id(block_pos + face_neighbors[2]); 141 | if (block_locations[face] == -1) { 142 | block_dims.z = 4; 143 | } 144 | } 145 | return block_dims; 146 | } 147 | 148 | void load_voxel(const uint neighbor_id, 149 | const uvec3 ghost_voxel_pos, 150 | const uvec3 neighbor_voxel_pos, 151 | const uvec3 block_dims) 152 | { 153 | const uint neighbor_location = block_locations[neighbor_id]; 154 | const uint ghost_voxel_id = compute_voxel_id(ghost_voxel_pos, block_dims); 155 | const uint neighbor_voxel_id = compute_voxel_id(neighbor_voxel_pos, uvec3(4)); 156 | volume_block[ghost_voxel_id] = decompressed[neighbor_location * BLOCK_NUM_VOXELS + neighbor_voxel_id]; 157 | } 158 | 159 | // Load the volume block and any ghost voxels needed from neighbors to 160 | // compute the dual grid vertex values. Returns the block volume dimensions 161 | uvec3 load_block(const uint block_id) 162 | { 163 | volume_block[gl_LocalInvocationID.x * 2] = 0; 164 | volume_block[gl_LocalInvocationID.x * 2 + 1] = 0; 165 | barrier(); 166 | 167 | const uvec3 block_pos = block_id_to_pos(block_id); 168 | const uvec3 n_blocks = padded_dims.xyz / uvec3(4); 169 | 170 | const uvec3[3] face_neighbors = { uvec3(1, 0, 0), uvec3(0, 1, 0), uvec3(0, 0, 1) }; 171 | const uvec3[3] edge_neighbors = { uvec3(1, 1, 0), uvec3(1, 0, 1), uvec3(0, 1, 1) }; 172 | 173 | const uvec3 block_dims = compute_block_dims_with_ghost(block_pos); 174 | 175 | const uvec3 voxel_pos = voxel_id_to_voxel(gl_LocalInvocationID.x); 176 | load_voxel(block_id, voxel_pos, voxel_pos, block_dims); 177 | 178 | // Load the faces of our face neighbors 179 | for (uint i = 0; i < 3; ++i) { 180 | if (block_dims[i] == 5 && voxel_pos[i] == 3) { 181 | const uvec3 ghost_voxel_pos = voxel_pos + face_neighbors[i]; 182 | uvec3 neighbor_voxel_pos = ghost_voxel_pos; 183 | if (face_neighbors[i].x == 1) { 184 | neighbor_voxel_pos.x = 0; 185 | } else if (face_neighbors[i].y == 1) { 186 | neighbor_voxel_pos.y = 0; 187 | } else { 188 | neighbor_voxel_pos.z = 0; 189 | } 190 | 191 | const uvec3 neighbor_block_pos = block_pos + face_neighbors[i]; 192 | const uint neighbor_id = compute_block_id(neighbor_block_pos); 193 | 194 | load_voxel(neighbor_id, ghost_voxel_pos, neighbor_voxel_pos, block_dims); 195 | } 196 | } 197 | 198 | // Load the scanlines of our edge neighbors 199 | for (uint i = 0; i < 3; ++i) { 200 | uvec3 b = block_dims * edge_neighbors[i]; 201 | uvec3 p = voxel_pos * edge_neighbors[i]; 202 | if (b.x + b.y + b.z == 10 && p.x + p.y + p.z == 6) { 203 | const uvec3 ghost_voxel_pos = voxel_pos + edge_neighbors[i]; 204 | uvec3 neighbor_voxel_pos = ghost_voxel_pos; 205 | if (edge_neighbors[i].x == 1) { 206 | neighbor_voxel_pos.x = 0; 207 | } 208 | if (edge_neighbors[i].y == 1) { 209 | neighbor_voxel_pos.y = 0; 210 | } 211 | if (edge_neighbors[i].z == 1) { 212 | neighbor_voxel_pos.z = 0; 213 | } 214 | 215 | const uvec3 neighbor_block_pos = block_pos + edge_neighbors[i]; 216 | const uint neighbor_id = compute_block_id(neighbor_block_pos); 217 | 218 | load_voxel(neighbor_id, ghost_voxel_pos, neighbor_voxel_pos, block_dims); 219 | } 220 | } 221 | 222 | // Load the corner voxel of our corner neighbor 223 | if (block_dims == uvec3(5) && voxel_pos == uvec3(3)) { 224 | const uvec3 ghost_voxel_pos = voxel_pos + uvec3(1); 225 | 226 | const uvec3 neighbor_block_pos = block_pos + uvec3(1); 227 | const uint neighbor_id = compute_block_id(neighbor_block_pos); 228 | 229 | load_voxel(neighbor_id, ghost_voxel_pos, uvec3(0), block_dims); 230 | } 231 | 232 | barrier(); 233 | return block_dims; 234 | } 235 | 236 | -------------------------------------------------------------------------------- /js/stream_compact.js: -------------------------------------------------------------------------------- 1 | var StreamCompact = function(device) { 2 | this.device = device; 3 | 4 | // Not sure how to query this limit, assuming this size based on OpenGL 5 | // In a less naive implementation doing some block-based implementation w/ 6 | // larger group sizes might be better as well 7 | // We also need to make sure the offset we'll end up using for the 8 | // dynamic offsets is aligned to 256 bytes. We're offsetting into arrays 9 | // of uint32, so determine the max dispatch size we should use for each 10 | // individual aligned chunk 11 | this.maxDispatchSize = Math.floor(65535 / 256) * 256; 12 | 13 | this.streamCompactBGLayout = device.createBindGroupLayout({ 14 | entries: [ 15 | { 16 | binding: 0, 17 | visibility: GPUShaderStage.COMPUTE, 18 | buffer: { 19 | type: "storage", 20 | } 21 | }, 22 | { 23 | binding: 1, 24 | visibility: GPUShaderStage.COMPUTE, 25 | buffer: { 26 | type: "storage", 27 | } 28 | }, 29 | { 30 | binding: 2, 31 | visibility: GPUShaderStage.COMPUTE, 32 | buffer: { 33 | type: "uniform", 34 | } 35 | }, 36 | { 37 | binding: 3, 38 | visibility: GPUShaderStage.COMPUTE, 39 | buffer: { 40 | type: "storage", 41 | } 42 | }, 43 | ], 44 | }); 45 | this.streamCompactPipeline = device.createComputePipeline({ 46 | layout: device.createPipelineLayout({ 47 | bindGroupLayouts: [this.streamCompactBGLayout], 48 | }), 49 | compute: { 50 | module: device.createShaderModule({code: stream_compact_comp_spv}), 51 | entryPoint: "main", 52 | }, 53 | }); 54 | 55 | this.dataBGLayout = device.createBindGroupLayout({ 56 | entries: [{ 57 | binding: 0, 58 | visibility: GPUShaderStage.COMPUTE, 59 | buffer: { 60 | type: "storage", 61 | } 62 | }] 63 | }); 64 | this.streamCompactDataPipeline = device.createComputePipeline({ 65 | layout: device.createPipelineLayout( 66 | {bindGroupLayouts: [this.streamCompactBGLayout, this.dataBGLayout]}), 67 | compute: { 68 | module: device.createShaderModule({code: stream_compact_data_comp_spv}), 69 | entryPoint: "main", 70 | } 71 | }); 72 | }; 73 | 74 | StreamCompact.prototype.compactActiveIDs = 75 | async function(numElements, isActiveBuffer, offsetsBuffer, outputBuffer) { 76 | // No push constants in the API? This is really a hassle to hack together 77 | // because I also have to obey (at least Dawn's rule is it part of the spec?) 78 | // that the dynamic offsets be 256b aligned 79 | // Please add push constants! 80 | var numChunks = Math.ceil(numElements / this.maxDispatchSize); 81 | var compactPassOffset = this.device.createBuffer({ 82 | size: numChunks * 256, 83 | usage: GPUBufferUsage.UNIFORM, 84 | mappedAtCreation: true, 85 | }); 86 | { 87 | var map = new Uint32Array(compactPassOffset.getMappedRange()); 88 | for (var i = 0; i < numChunks; ++i) { 89 | map[i * 64] = i * this.maxDispatchSize; 90 | } 91 | compactPassOffset.unmap(); 92 | } 93 | var commandEncoder = this.device.createCommandEncoder(); 94 | var pass = commandEncoder.beginComputePass(); 95 | pass.setPipeline(this.streamCompactPipeline); 96 | for (var i = 0; i < numChunks; ++i) { 97 | var numWorkGroups = 98 | Math.min(numElements - i * this.maxDispatchSize, this.maxDispatchSize); 99 | var offset = i * this.maxDispatchSize * 4; 100 | // Have to create bind groups here because dynamic offsets are not allowed 101 | // for security 102 | // TODO: Was this re-enabled? 103 | var streamCompactBG = null; 104 | if (numWorkGroups === this.maxDispatchSize) { 105 | streamCompactBG = this.device.createBindGroup({ 106 | layout: this.streamCompactBGLayout, 107 | entries: [ 108 | { 109 | binding: 0, 110 | resource: { 111 | buffer: isActiveBuffer, 112 | size: 4 * Math.min(numElements, this.maxDispatchSize), 113 | offset: offset, 114 | }, 115 | }, 116 | { 117 | binding: 1, 118 | resource: { 119 | buffer: offsetsBuffer, 120 | size: 4 * Math.min(numElements, this.maxDispatchSize), 121 | offset: offset, 122 | }, 123 | }, 124 | { 125 | binding: 2, 126 | resource: { 127 | buffer: compactPassOffset, 128 | size: 4, 129 | offset: i * 256, 130 | }, 131 | }, 132 | { 133 | binding: 3, 134 | resource: { 135 | buffer: outputBuffer, 136 | }, 137 | }, 138 | ], 139 | }); 140 | } else { 141 | streamCompactBG = this.device.createBindGroup({ 142 | layout: this.streamCompactBGLayout, 143 | entries: [ 144 | { 145 | binding: 0, 146 | resource: { 147 | buffer: isActiveBuffer, 148 | size: 4 * (numElements % this.maxDispatchSize), 149 | offset: offset, 150 | }, 151 | }, 152 | { 153 | binding: 1, 154 | resource: { 155 | buffer: offsetsBuffer, 156 | size: 4 * (numElements % this.maxDispatchSize), 157 | offset: offset, 158 | }, 159 | }, 160 | { 161 | binding: 2, 162 | resource: { 163 | buffer: compactPassOffset, 164 | size: 4, 165 | offset: i * 256, 166 | }, 167 | }, 168 | { 169 | binding: 3, 170 | resource: { 171 | buffer: outputBuffer, 172 | }, 173 | }, 174 | ], 175 | }); 176 | } 177 | pass.setBindGroup(0, streamCompactBG); 178 | pass.dispatchWorkgroups(Math.ceil(numWorkGroups / 8), 1, 1); 179 | } 180 | pass.end(); 181 | this.device.queue.submit([commandEncoder.finish()]); 182 | await this.device.queue.onSubmittedWorkDone(); 183 | }; 184 | 185 | StreamCompact.prototype.compactActive = 186 | async function(numElements, isActiveBuffer, offsetsBuffer, dataBuffer, outputBuffer) { 187 | // No push constants in the API? This is really a hassle to hack together 188 | // because I also have to obey (at least Dawn's rule is it part of the spec?) 189 | // that the dynamic offsets be 256b aligned 190 | // Please add push constants! 191 | var numChunks = Math.ceil(numElements / this.maxDispatchSize); 192 | var compactPassOffset = this.device.createBuffer({ 193 | size: numChunks * 256, 194 | usage: GPUBufferUsage.UNIFORM, 195 | mappedAtCreation: true, 196 | }); 197 | { 198 | var map = new Uint32Array(compactPassOffset.getMappedRange()); 199 | for (var i = 0; i < numChunks; ++i) { 200 | map[i * 64] = i * this.maxDispatchSize; 201 | } 202 | compactPassOffset.unmap(); 203 | } 204 | 205 | var dataBG = this.device.createBindGroup( 206 | {layout: this.dataBGLayout, entries: [{binding: 0, resource: {buffer: dataBuffer}}]}); 207 | 208 | var commandEncoder = this.device.createCommandEncoder(); 209 | var pass = commandEncoder.beginComputePass(); 210 | pass.setPipeline(this.streamCompactDataPipeline); 211 | for (var i = 0; i < numChunks; ++i) { 212 | var numWorkGroups = 213 | Math.min(numElements - i * this.maxDispatchSize, this.maxDispatchSize); 214 | 215 | // Have to create bind groups here because dynamic offsets are not allowed 216 | var streamCompactBG = this.device.createBindGroup({ 217 | layout: this.streamCompactBGLayout, 218 | entries: [ 219 | { 220 | binding: 0, 221 | resource: { 222 | buffer: isActiveBuffer, 223 | }, 224 | }, 225 | { 226 | binding: 1, 227 | resource: { 228 | buffer: offsetsBuffer, 229 | }, 230 | }, 231 | { 232 | binding: 2, 233 | resource: { 234 | buffer: compactPassOffset, 235 | size: 4, 236 | offset: i * 256, 237 | }, 238 | }, 239 | { 240 | binding: 3, 241 | resource: { 242 | buffer: outputBuffer, 243 | }, 244 | }, 245 | ], 246 | }); 247 | pass.setBindGroup(0, streamCompactBG); 248 | pass.setBindGroup(1, dataBG); 249 | pass.dispatchWorkgroups(Math.ceil(numWorkGroups / 8), 1, 1); 250 | } 251 | pass.end(); 252 | this.device.queue.submit([commandEncoder.finish()]); 253 | await this.device.queue.onSubmittedWorkDone(); 254 | }; 255 | -------------------------------------------------------------------------------- /shaders/zfp_decompress.comp: -------------------------------------------------------------------------------- 1 | #define UINT_MAX uint(0xffffffff) 2 | 3 | struct EmulateUint64 { 4 | uint lo; 5 | uint hi; 6 | }; 7 | 8 | layout(set = 0, binding = 0, std430) buffer Compressed 9 | { 10 | EmulateUint64 compressed[]; 11 | }; 12 | 13 | layout(set = 0, binding = 1, std140) uniform VolumeParams 14 | { 15 | uvec4 volume_dims; 16 | uvec4 padded_dims; 17 | vec4 volume_scale; 18 | uint max_bits; 19 | float isovalue; 20 | uint image_width; 21 | }; 22 | 23 | // Each ZFP block is 4^3 24 | const uint ZFP_BLOCK_SIZE = 64; 25 | 26 | EmulateUint64 make_emulate_uint64(uint hi, uint lo) 27 | { 28 | EmulateUint64 a; 29 | a.lo = lo; 30 | a.hi = hi; 31 | return a; 32 | } 33 | 34 | EmulateUint64 bitwise_and(const EmulateUint64 a, const EmulateUint64 b) 35 | { 36 | EmulateUint64 c; 37 | c.lo = a.lo & b.lo; 38 | c.hi = a.hi & b.hi; 39 | return c; 40 | } 41 | 42 | EmulateUint64 bitwise_or(const EmulateUint64 a, const EmulateUint64 b) 43 | { 44 | EmulateUint64 c; 45 | c.lo = a.lo | b.lo; 46 | c.hi = a.hi | b.hi; 47 | return c; 48 | } 49 | 50 | EmulateUint64 shift_left(const EmulateUint64 a, uint n) 51 | { 52 | // TODO: cleaner implementation? 53 | if (n == 0) { 54 | return a; 55 | } 56 | EmulateUint64 b; 57 | if (n < 32) { 58 | const uint carry = a.lo & (UINT_MAX << (32 - n)); 59 | b.lo = a.lo << n; 60 | b.hi = (a.hi << n) | (carry >> (32 - n)); 61 | } else { 62 | b.lo = 0; 63 | b.hi = a.lo << (n - 32); 64 | } 65 | return b; 66 | } 67 | 68 | EmulateUint64 shift_right(const EmulateUint64 a, uint n) 69 | { 70 | if (n == 0) { 71 | return a; 72 | } 73 | EmulateUint64 b; 74 | if (n < 32) { 75 | const uint carry = a.hi & (UINT_MAX >> (32 - n)); 76 | b.lo = (a.lo >> n) | (carry << (32 - n)); 77 | b.hi = a.hi >> n; 78 | } else { 79 | b.lo = a.hi >> (n - 32); 80 | b.hi = 0; 81 | } 82 | return b; 83 | } 84 | 85 | EmulateUint64 make_mask(uint n) 86 | { 87 | EmulateUint64 a = make_emulate_uint64(0, 0); 88 | if (n > 0 && n < 65) { 89 | if (n > 32) { 90 | a.lo = UINT_MAX; 91 | a.hi = UINT_MAX >> (64 - n); 92 | } else { 93 | a.lo = UINT_MAX >> (32 - n); 94 | a.hi = 0; 95 | } 96 | } 97 | return a; 98 | } 99 | 100 | 101 | #define zfp_index_3d(x, y, z) ((x) + 4 * ((y) + 4 * (z))) 102 | const uint ZFP_PERM3D[64] = { 103 | zfp_index_3d(0, 0, 0), // 0 : 0 104 | 105 | zfp_index_3d(1, 0, 0), // 1 : 1 106 | zfp_index_3d(0, 1, 0), // 2 : 1 107 | zfp_index_3d(0, 0, 1), // 3 : 1 108 | 109 | zfp_index_3d(0, 1, 1), // 4 : 2 110 | zfp_index_3d(1, 0, 1), // 5 : 2 111 | zfp_index_3d(1, 1, 0), // 6 : 2 112 | 113 | zfp_index_3d(2, 0, 0), // 7 : 2 114 | zfp_index_3d(0, 2, 0), // 8 : 2 115 | zfp_index_3d(0, 0, 2), // 9 : 2 116 | 117 | zfp_index_3d(1, 1, 1), // 10 : 3 118 | 119 | zfp_index_3d(2, 1, 0), // 11 : 3 120 | zfp_index_3d(2, 0, 1), // 12 : 3 121 | zfp_index_3d(0, 2, 1), // 13 : 3 122 | zfp_index_3d(1, 2, 0), // 14 : 3 123 | zfp_index_3d(1, 0, 2), // 15 : 3 124 | zfp_index_3d(0, 1, 2), // 16 : 3 125 | 126 | zfp_index_3d(3, 0, 0), // 17 : 3 127 | zfp_index_3d(0, 3, 0), // 18 : 3 128 | zfp_index_3d(0, 0, 3), // 19 : 3 129 | 130 | zfp_index_3d(2, 1, 1), // 20 : 4 131 | zfp_index_3d(1, 2, 1), // 21 : 4 132 | zfp_index_3d(1, 1, 2), // 22 : 4 133 | 134 | zfp_index_3d(0, 2, 2), // 23 : 4 135 | zfp_index_3d(2, 0, 2), // 24 : 4 136 | zfp_index_3d(2, 2, 0), // 25 : 4 137 | 138 | zfp_index_3d(3, 1, 0), // 26 : 4 139 | zfp_index_3d(3, 0, 1), // 27 : 4 140 | zfp_index_3d(0, 3, 1), // 28 : 4 141 | zfp_index_3d(1, 3, 0), // 29 : 4 142 | zfp_index_3d(1, 0, 3), // 30 : 4 143 | zfp_index_3d(0, 1, 3), // 31 : 4 144 | 145 | zfp_index_3d(1, 2, 2), // 32 : 5 146 | zfp_index_3d(2, 1, 2), // 33 : 5 147 | zfp_index_3d(2, 2, 1), // 34 : 5 148 | 149 | zfp_index_3d(3, 1, 1), // 35 : 5 150 | zfp_index_3d(1, 3, 1), // 36 : 5 151 | zfp_index_3d(1, 1, 3), // 37 : 5 152 | 153 | zfp_index_3d(3, 2, 0), // 38 : 5 154 | zfp_index_3d(3, 0, 2), // 39 : 5 155 | zfp_index_3d(0, 3, 2), // 40 : 5 156 | zfp_index_3d(2, 3, 0), // 41 : 5 157 | zfp_index_3d(2, 0, 3), // 42 : 5 158 | zfp_index_3d(0, 2, 3), // 43 : 5 159 | 160 | zfp_index_3d(2, 2, 2), // 44 : 6 161 | 162 | zfp_index_3d(3, 2, 1), // 45 : 6 163 | zfp_index_3d(3, 1, 2), // 46 : 6 164 | zfp_index_3d(1, 3, 2), // 47 : 6 165 | zfp_index_3d(2, 3, 1), // 48 : 6 166 | zfp_index_3d(2, 1, 3), // 49 : 6 167 | zfp_index_3d(1, 2, 3), // 50 : 6 168 | 169 | zfp_index_3d(0, 3, 3), // 51 : 6 170 | zfp_index_3d(3, 0, 3), // 52 : 6 171 | zfp_index_3d(3, 3, 0), // 53 : 6 172 | 173 | zfp_index_3d(3, 2, 2), // 54 : 7 174 | zfp_index_3d(2, 3, 2), // 55 : 7 175 | zfp_index_3d(2, 2, 3), // 56 : 7 176 | 177 | zfp_index_3d(1, 3, 3), // 57 : 7 178 | zfp_index_3d(3, 1, 3), // 58 : 7 179 | zfp_index_3d(3, 3, 1), // 59 : 7 180 | 181 | zfp_index_3d(2, 3, 3), // 60 : 8 182 | zfp_index_3d(3, 2, 3), // 61 : 8 183 | zfp_index_3d(3, 3, 2), // 62 : 8 184 | 185 | zfp_index_3d(3, 3, 3), // 63 : 9 186 | }; 187 | #undef zfp_index_3d 188 | 189 | struct BlockReader { 190 | uint current_bit; 191 | // Index of our current word in the Compressed buffer 192 | uint current_word; 193 | EmulateUint64 word_buffer; 194 | }; 195 | 196 | BlockReader create_block_reader(uint block_index) 197 | { 198 | BlockReader reader; 199 | if (max_bits != 64) { 200 | reader.current_word = (block_index * max_bits) / 64; // sizeof(Word) * 8 = 64 201 | reader.current_bit = (block_index * max_bits) % 64; 202 | } else { 203 | // For large datasets we use 1 bit per voxel, and must skip multiplying by 64 to 204 | // avoid overflowing 205 | reader.current_word = block_index; 206 | reader.current_bit = 0; 207 | } 208 | 209 | reader.word_buffer = compressed[reader.current_word]; 210 | reader.word_buffer = shift_right(reader.word_buffer, reader.current_bit); 211 | return reader; 212 | } 213 | 214 | void advance_word(inout BlockReader reader) 215 | { 216 | reader.current_bit = 0; 217 | ++reader.current_word; 218 | reader.word_buffer = compressed[reader.current_word]; 219 | } 220 | 221 | uint read_bit(inout BlockReader reader) 222 | { 223 | uint bit = reader.word_buffer.lo & 1; 224 | ++reader.current_bit; 225 | reader.word_buffer = shift_right(reader.word_buffer, 1); 226 | 227 | // Advance to next bit if we left the current word 228 | if (reader.current_bit >= 64) { 229 | advance_word(reader); 230 | } 231 | return bit; 232 | } 233 | 234 | // Same as ZFP CUDA, assumes n_bits <= 64 235 | EmulateUint64 read_bits(inout BlockReader reader, const uint n_bits) 236 | { 237 | uint rem_bits = 64 - reader.current_bit; 238 | uint first_read = min(rem_bits, n_bits); 239 | 240 | EmulateUint64 mask = make_mask(first_read); 241 | EmulateUint64 bits = bitwise_and(reader.word_buffer, mask); 242 | reader.word_buffer = shift_right(reader.word_buffer, n_bits); 243 | reader.current_bit += first_read; 244 | 245 | // If we're reading more bits than we had in the buffer, we need to 246 | // get the next word and read some bits from it 247 | uint next_read = 0; 248 | if (n_bits >= rem_bits) { 249 | advance_word(reader); 250 | next_read = n_bits - first_read; 251 | } 252 | 253 | mask = make_mask(next_read); 254 | bits = bitwise_or(bits, shift_left(bitwise_and(reader.word_buffer, mask), first_read)); 255 | 256 | reader.word_buffer = shift_right(reader.word_buffer, next_read); 257 | reader.current_bit += next_read; 258 | return bits; 259 | } 260 | 261 | // Map negabinary unsigned int to two's complement int 262 | int uint2int(uint x) 263 | { 264 | return int((x ^ 0xaaaaaaaau) - 0xaaaaaaaau); 265 | } 266 | 267 | void decode_ints(inout BlockReader reader, 268 | const uint block_max_bits, 269 | inout uint block[ZFP_BLOCK_SIZE]) 270 | { 271 | for (uint i = 0; i < ZFP_BLOCK_SIZE; ++i) { 272 | block[i] = 0; 273 | } 274 | 275 | const uint intprec = 32; 276 | EmulateUint64 x = make_emulate_uint64(0, 0); 277 | const EmulateUint64 one = make_emulate_uint64(0, 1); 278 | 279 | uint bits = block_max_bits; 280 | for (uint k = intprec, n = 0; bits != 0 && k-- > 0;) { 281 | uint m = min(n, bits); 282 | bits -= m; 283 | x = read_bits(reader, m); 284 | for (; n < ZFP_BLOCK_SIZE && bits != 0 && (bits--, read_bit(reader) != 0); 285 | x = bitwise_or(x, shift_left(one, n++))) { 286 | for (; n < (ZFP_BLOCK_SIZE - 1) && bits != 0 && (bits--, read_bit(reader) == 0); 287 | ++n) 288 | ; 289 | } 290 | 291 | // Deposit the bit plane 292 | for (uint i = 0; i < ZFP_BLOCK_SIZE; ++i, x = shift_right(x, 1)) { 293 | block[i] += (x.lo & 1) << k; 294 | } 295 | } 296 | } 297 | 298 | void inverse_lift(inout int block[ZFP_BLOCK_SIZE], const uint s, const uint idx) 299 | { 300 | ivec4 v; 301 | for (uint i = 0; i < 4; ++i) { 302 | v[i] = block[idx + i * s]; 303 | } 304 | 305 | /* Non-orthogonal transform for ZFP: 306 | * [4 6 -4 -1] [x] 307 | * 1/4 * [4 2 4 5] [y] 308 | * [4 -2 4 -5] [z] 309 | * [4 -6 -4 1] [w] 310 | */ 311 | 312 | v.y += v.w >> 1; 313 | v.w -= v.y >> 1; 314 | 315 | v.y += v.w; 316 | v.w <<= 1; 317 | v.w -= v.y; 318 | 319 | v.z += v.x; 320 | v.x <<= 1; 321 | v.x -= v.z; 322 | 323 | v.y += v.z; 324 | v.z <<= 1; 325 | v.z -= v.y; 326 | 327 | v.w += v.x; 328 | v.x <<= 1; 329 | v.x -= v.w; 330 | 331 | for (uint i = 0; i < 4; ++i) { 332 | block[idx + i * s] = v[i]; 333 | } 334 | } 335 | 336 | void inverse_transform(inout int block[ZFP_BLOCK_SIZE]) 337 | { 338 | // Transform along z 339 | for (uint y = 0; y < 4; ++y) { 340 | for (uint x = 0; x < 4; ++x) { 341 | inverse_lift(block, 16, x + 4 * y); 342 | } 343 | } 344 | // Transform along y 345 | for (uint x = 0; x < 4; ++x) { 346 | for (uint z = 0; z < 4; ++z) { 347 | inverse_lift(block, 4, 16 * z + x); 348 | } 349 | } 350 | // Transform along x 351 | for (uint z = 0; z < 4; ++z) { 352 | for (uint y = 0; y < 4; ++y) { 353 | inverse_lift(block, 1, 4 * y + 16 * z); 354 | } 355 | } 356 | } 357 | 358 | void decompress_block(in BlockReader reader, inout float decompressed_block[ZFP_BLOCK_SIZE]) 359 | { 360 | // Note: not porting over the int decompression support from ZFP CUDA 361 | // and only supporting float32 data 362 | uint s_cont = read_bit(reader); 363 | if (s_cont != 0) { 364 | // Hard-coded for float32 365 | const uint ebits = 9; 366 | const uint ebias = 127; 367 | 368 | const int emax = int(read_bits(reader, ebits - 1).lo - ebias); 369 | uint block_max_bits = max_bits - ebits; 370 | 371 | uint uint_block[ZFP_BLOCK_SIZE]; 372 | decode_ints(reader, block_max_bits, uint_block); 373 | 374 | int int_block[ZFP_BLOCK_SIZE]; 375 | for (uint i = 0; i < ZFP_BLOCK_SIZE; ++i) { 376 | int_block[ZFP_PERM3D[i]] = uint2int(uint_block[i]); 377 | } 378 | 379 | inverse_transform(int_block); 380 | const float inv_w = ldexp(1.f, emax - 30); 381 | 382 | for (uint i = 0; i < ZFP_BLOCK_SIZE; ++i) { 383 | decompressed_block[i] = inv_w * float(int_block[i]); 384 | } 385 | } 386 | } 387 | 388 | 389 | -------------------------------------------------------------------------------- /js/radix_sort_by_key.js: -------------------------------------------------------------------------------- 1 | var RadixSorter = function(device) { 2 | this.device = device; 3 | 4 | this.bgLayout = this.device.createBindGroupLayout({ 5 | entries: [ 6 | { 7 | binding: 0, 8 | visibility: GPUShaderStage.COMPUTE, 9 | buffer: { 10 | type: "uniform", 11 | } 12 | }, 13 | ], 14 | }); 15 | 16 | this.radixSortBGLayout = this.device.createBindGroupLayout({ 17 | entries: [ 18 | { 19 | binding: 0, 20 | visibility: GPUShaderStage.COMPUTE, 21 | buffer: { 22 | type: "storage", 23 | } 24 | }, 25 | { 26 | binding: 1, 27 | visibility: GPUShaderStage.COMPUTE, 28 | buffer: { 29 | type: "storage", 30 | } 31 | }, 32 | ], 33 | }); 34 | 35 | this.mergeBGLayout = this.device.createBindGroupLayout({ 36 | entries: [ 37 | { 38 | binding: 0, 39 | visibility: GPUShaderStage.COMPUTE, 40 | buffer: { 41 | type: "storage", 42 | } 43 | }, 44 | { 45 | binding: 1, 46 | visibility: GPUShaderStage.COMPUTE, 47 | buffer: { 48 | type: "storage", 49 | } 50 | }, 51 | { 52 | binding: 2, 53 | visibility: GPUShaderStage.COMPUTE, 54 | buffer: { 55 | type: "storage", 56 | } 57 | }, 58 | { 59 | binding: 3, 60 | visibility: GPUShaderStage.COMPUTE, 61 | buffer: { 62 | type: "storage", 63 | } 64 | }, 65 | ], 66 | }); 67 | 68 | this.numWorkGroupsBGLayout = this.device.createBindGroupLayout({ 69 | entries: [ 70 | { 71 | binding: 0, 72 | visibility: GPUShaderStage.COMPUTE, 73 | buffer: { 74 | type: "uniform", 75 | } 76 | }, 77 | ], 78 | }); 79 | 80 | this.reverseBGLayout = this.device.createBindGroupLayout({ 81 | entries: [ 82 | { 83 | binding: 0, 84 | visibility: GPUShaderStage.COMPUTE, 85 | buffer: { 86 | type: "storage", 87 | } 88 | }, 89 | ], 90 | }); 91 | 92 | this.sortPipeline = this.device.createComputePipeline({ 93 | layout: this.device.createPipelineLayout({ 94 | bindGroupLayouts: [this.bgLayout, this.radixSortBGLayout], 95 | }), 96 | compute: { 97 | module: this.device.createShaderModule({ 98 | code: radix_sort_chunk_comp_spv, 99 | }), 100 | entryPoint: "main", 101 | }, 102 | }); 103 | 104 | this.mergePipeline = this.device.createComputePipeline({ 105 | layout: this.device.createPipelineLayout({ 106 | bindGroupLayouts: [ 107 | this.bgLayout, 108 | this.mergeBGLayout, 109 | this.numWorkGroupsBGLayout, 110 | ], 111 | }), 112 | compute: { 113 | module: this.device.createShaderModule({ 114 | code: merge_sorted_chunks_comp_spv, 115 | }), 116 | entryPoint: "main", 117 | }, 118 | }); 119 | 120 | this.reversePipeline = this.device.createComputePipeline({ 121 | layout: this.device.createPipelineLayout({ 122 | bindGroupLayouts: [this.bgLayout, this.reverseBGLayout], 123 | }), 124 | compute: { 125 | module: this.device.createShaderModule({code: reverse_buffer_comp_spv}), 126 | entryPoint: "main", 127 | }, 128 | }); 129 | }; 130 | 131 | var nextPow2 = function(x) { 132 | var a = x - 1; 133 | a |= a >> 1; 134 | a |= a >> 2; 135 | a |= a >> 4; 136 | a |= a >> 8; 137 | a |= a >> 16; 138 | return a + 1; 139 | }; 140 | 141 | RadixSorter.prototype.getAlignedSize = function(size) { 142 | var chunkCount = nextPow2(Math.ceil(size / SortChunkSize)); 143 | return chunkCount * SortChunkSize; 144 | }; 145 | 146 | // Input buffers are assumed to be of size "alignedSize" 147 | RadixSorter.prototype.sort = async function(keys, values, size, reverse) { 148 | // Has to be a pow2 * chunkSize elements, since we do log_2 merge steps up 149 | var chunkCount = nextPow2(Math.ceil(size / SortChunkSize)); 150 | var alignedSize = chunkCount * SortChunkSize; 151 | var numMergeSteps = Math.log2(chunkCount); 152 | 153 | var buffers = { 154 | keys: keys, 155 | values: values, 156 | }; 157 | 158 | var scratch = { 159 | keys: this.device.createBuffer({ 160 | size: alignedSize * 4, 161 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST, 162 | }), 163 | values: this.device.createBuffer({ 164 | size: alignedSize * 4, 165 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST, 166 | }), 167 | }; 168 | 169 | var arrayInfoBuf = this.device.createBuffer({ 170 | size: 4, 171 | usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_SRC, 172 | mappedAtCreation: true, 173 | }); 174 | new Uint32Array(arrayInfoBuf.getMappedRange()).set([size]); 175 | arrayInfoBuf.unmap(); 176 | 177 | // We'll send the workgroup count through a UBO w/ dynamic offset, so we need 178 | // to obey the dynamic offset alignment rules as well 179 | var numWorkGroupsBuf = this.device.createBuffer({ 180 | size: Math.max(numMergeSteps, 1) * 256, 181 | usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_SRC, 182 | mappedAtCreation: true, 183 | }); 184 | { 185 | var upload = new Uint32Array(numWorkGroupsBuf.getMappedRange()); 186 | for (var i = 0; i < numMergeSteps; ++i) { 187 | upload[i * 64] = chunkCount / (2 << i); 188 | } 189 | } 190 | numWorkGroupsBuf.unmap(); 191 | 192 | var infoBindGroup = this.device.createBindGroup({ 193 | layout: this.bgLayout, 194 | entries: [ 195 | { 196 | binding: 0, 197 | resource: { 198 | buffer: arrayInfoBuf, 199 | }, 200 | }, 201 | ], 202 | }); 203 | 204 | var radixSortBG = this.device.createBindGroup({ 205 | layout: this.radixSortBGLayout, 206 | entries: [ 207 | { 208 | binding: 0, 209 | resource: { 210 | buffer: buffers.keys, 211 | }, 212 | }, 213 | { 214 | binding: 1, 215 | resource: { 216 | buffer: buffers.values, 217 | }, 218 | }, 219 | ], 220 | }); 221 | 222 | var mergeBindGroups = [ 223 | this.device.createBindGroup({ 224 | layout: this.mergeBGLayout, 225 | entries: [ 226 | { 227 | binding: 0, 228 | resource: { 229 | buffer: buffers.keys, 230 | }, 231 | }, 232 | { 233 | binding: 1, 234 | resource: { 235 | buffer: buffers.values, 236 | }, 237 | }, 238 | { 239 | binding: 2, 240 | resource: { 241 | buffer: scratch.keys, 242 | }, 243 | }, 244 | { 245 | binding: 3, 246 | resource: { 247 | buffer: scratch.values, 248 | }, 249 | }, 250 | ], 251 | }), 252 | this.device.createBindGroup({ 253 | layout: this.mergeBGLayout, 254 | entries: [ 255 | { 256 | binding: 0, 257 | resource: { 258 | buffer: scratch.keys, 259 | }, 260 | }, 261 | { 262 | binding: 1, 263 | resource: { 264 | buffer: scratch.values, 265 | }, 266 | }, 267 | { 268 | binding: 2, 269 | resource: { 270 | buffer: buffers.keys, 271 | }, 272 | }, 273 | { 274 | binding: 3, 275 | resource: { 276 | buffer: buffers.values, 277 | }, 278 | }, 279 | ], 280 | }), 281 | ]; 282 | 283 | var reverseBG = this.device.createBindGroup({ 284 | layout: this.reverseBGLayout, 285 | entries: [ 286 | { 287 | binding: 0, 288 | resource: { 289 | buffer: numMergeSteps % 2 == 0 ? buffers.values : scratch.values, 290 | }, 291 | }, 292 | ], 293 | }); 294 | 295 | var commandEncoder = this.device.createCommandEncoder(); 296 | var pass = commandEncoder.beginComputePass(); 297 | pass.setPipeline(this.sortPipeline); 298 | pass.setBindGroup(0, infoBindGroup); 299 | pass.setBindGroup(1, radixSortBG); 300 | pass.dispatchWorkgroups(chunkCount, 1, 1); 301 | pass.end(); 302 | 303 | // Merge the chunks up 304 | var pass = commandEncoder.beginComputePass(); 305 | pass.setPipeline(this.mergePipeline); 306 | pass.setBindGroup(0, infoBindGroup); 307 | for (var i = 0; i < numMergeSteps; ++i) { 308 | var numWorkGroupsBG = this.device.createBindGroup({ 309 | layout: this.numWorkGroupsBGLayout, 310 | entries: [ 311 | { 312 | binding: 0, 313 | resource: { 314 | buffer: numWorkGroupsBuf, 315 | size: 4, 316 | offset: i * 256, 317 | }, 318 | }, 319 | ], 320 | }); 321 | pass.setBindGroup(1, mergeBindGroups[i % 2]); 322 | pass.setBindGroup(2, numWorkGroupsBG); 323 | pass.dispatchWorkgroups(chunkCount / (2 << i), 1, 1); 324 | } 325 | pass.end(); 326 | this.device.queue.submit([commandEncoder.finish()]); 327 | 328 | var commandEncoder = this.device.createCommandEncoder(); 329 | if (reverse) { 330 | var pass = commandEncoder.beginComputePass(); 331 | pass.setPipeline(this.reversePipeline); 332 | pass.setBindGroup(0, infoBindGroup); 333 | pass.setBindGroup(1, reverseBG); 334 | pass.dispatchWorkgroups(Math.ceil(chunkCount / 2), 1, 1); 335 | pass.end(); 336 | } 337 | 338 | var readbackOffset = reverse ? alignedSize - size : 0; 339 | // Copy the sorted real data to the start of the buffer 340 | if (numMergeSteps % 2 == 0) { 341 | commandEncoder.copyBufferToBuffer( 342 | buffers.values, readbackOffset * 4, scratch.values, 0, size * 4); 343 | commandEncoder.copyBufferToBuffer(scratch.values, 0, buffers.values, 0, size * 4); 344 | } else { 345 | commandEncoder.copyBufferToBuffer( 346 | scratch.values, readbackOffset * 4, buffers.values, 0, size * 4); 347 | } 348 | 349 | this.device.queue.submit([commandEncoder.finish()]); 350 | await this.device.queue.onSubmittedWorkDone(); 351 | 352 | scratch.keys.destroy(); 353 | scratch.values.destroy(); 354 | arrayInfoBuf.destroy(); 355 | numWorkGroupsBuf.destroy(); 356 | }; 357 | -------------------------------------------------------------------------------- /js/exclusive_scan.js: -------------------------------------------------------------------------------- 1 | var alignTo = function(val, align) { 2 | return Math.floor((val + align - 1) / align) * align; 3 | }; 4 | 5 | // Serial scan for validation 6 | var serialExclusiveScan = function(array, output) { 7 | output[0] = 0; 8 | for (var i = 1; i < array.length; ++i) { 9 | output[i] = array[i - 1] + output[i - 1]; 10 | } 11 | return output[array.length - 1] + array[array.length - 1]; 12 | }; 13 | 14 | var ExclusiveScanPipeline = function(device) { 15 | this.device = device; 16 | // Each thread in a work group is responsible for 2 elements 17 | this.workGroupSize = ScanBlockSize / 2; 18 | // The max size which can be scanned by a single batch without carry in/out 19 | this.maxScanSize = ScanBlockSize * ScanBlockSize; 20 | console.log(`Block size: ${ScanBlockSize}, max scan size: ${this.maxScanSize}`); 21 | 22 | // Buffer to clear the block sums for each new scan 23 | var clearBlocks = device.createBuffer({ 24 | size: ScanBlockSize * 4, 25 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, 26 | mappedAtCreation: true, 27 | }); 28 | new Uint32Array(clearBlocks.getMappedRange()).fill(0); 29 | clearBlocks.unmap(); 30 | this.clearBuf = clearBlocks; 31 | 32 | this.scanBlocksLayout = device.createBindGroupLayout({ 33 | entries: [ 34 | {binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: {type: "storage"}}, 35 | { 36 | binding: 1, 37 | visibility: GPUShaderStage.COMPUTE, 38 | buffer: { 39 | type: "storage", 40 | } 41 | }, 42 | ], 43 | }); 44 | 45 | this.scanBlockResultsLayout = device.createBindGroupLayout({ 46 | entries: [ 47 | { 48 | binding: 0, 49 | visibility: GPUShaderStage.COMPUTE, 50 | buffer: { 51 | type: "storage", 52 | } 53 | }, 54 | { 55 | binding: 1, 56 | visibility: GPUShaderStage.COMPUTE, 57 | buffer: { 58 | type: "storage", 59 | } 60 | }, 61 | ], 62 | }); 63 | 64 | this.scanBlocksPipeline = device.createComputePipeline({ 65 | layout: device.createPipelineLayout({ 66 | bindGroupLayouts: [this.scanBlocksLayout], 67 | }), 68 | compute: { 69 | module: device.createShaderModule({code: prefix_sum_comp_spv}), 70 | entryPoint: "main", 71 | }, 72 | }); 73 | 74 | this.scanBlockResultsPipeline = device.createComputePipeline({ 75 | layout: device.createPipelineLayout({ 76 | bindGroupLayouts: [this.scanBlockResultsLayout], 77 | }), 78 | compute: { 79 | module: device.createShaderModule({code: block_prefix_sum_comp_spv}), 80 | entryPoint: "main", 81 | }, 82 | }); 83 | 84 | this.addBlockSumsPipeline = device.createComputePipeline({ 85 | layout: device.createPipelineLayout({ 86 | bindGroupLayouts: [this.scanBlocksLayout], 87 | }), 88 | compute: { 89 | module: device.createShaderModule({code: add_block_sums_comp_spv}), 90 | entryPoint: "main", 91 | }, 92 | }); 93 | }; 94 | 95 | ExclusiveScanPipeline.prototype.getAlignedSize = function(size) { 96 | return alignTo(size, ScanBlockSize); 97 | }; 98 | 99 | // TODO: refactor to have this return a prepared scanner object? 100 | // Then the pipelines and bind group layouts can be re-used and shared between the scanners 101 | ExclusiveScanPipeline.prototype.prepareInput = function(cpuArray) { 102 | var alignedSize = alignTo(cpuArray.length, ScanBlockSize); 103 | 104 | // Upload input and pad to block size elements 105 | var inputBuf = this.device.createBuffer({ 106 | size: alignedSize * 4, 107 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST, 108 | mappedAtCreation: true, 109 | }); 110 | new Uint32Array(inputBuf.getMappedRange()).set(cpuArray); 111 | inputBuf.unmap(); 112 | 113 | return new ExclusiveScanner(this, inputBuf, alignedSize, cpuArray.length); 114 | }; 115 | 116 | ExclusiveScanPipeline.prototype.prepareGPUInput = function(gpuBuffer, alignedSize) { 117 | if (this.getAlignedSize(alignedSize) != alignedSize) { 118 | alert("Error: GPU input must be aligned to getAlignedSize"); 119 | } 120 | 121 | return new ExclusiveScanner(this, gpuBuffer, alignedSize); 122 | }; 123 | 124 | var ExclusiveScanner = function(scanPipeline, gpuBuffer, alignedSize) { 125 | this.scanPipeline = scanPipeline; 126 | this.inputSize = alignedSize; 127 | this.inputBuf = gpuBuffer; 128 | 129 | this.readbackBuf = scanPipeline.device.createBuffer({ 130 | size: 4, 131 | usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST, 132 | }); 133 | 134 | // Block sum buffer 135 | var blockSumBuf = scanPipeline.device.createBuffer({ 136 | size: ScanBlockSize * 4, 137 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST, 138 | mappedAtCreation: true, 139 | }); 140 | new Uint32Array(blockSumBuf.getMappedRange()).fill(0); 141 | blockSumBuf.unmap(); 142 | this.blockSumBuf = blockSumBuf; 143 | 144 | var carryBuf = scanPipeline.device.createBuffer({ 145 | size: 8, 146 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST, 147 | mappedAtCreation: true, 148 | }); 149 | new Uint32Array(carryBuf.getMappedRange()).fill(0); 150 | carryBuf.unmap(); 151 | this.carryBuf = carryBuf; 152 | 153 | // Can't copy from a buffer to itself so we need an intermediate to move the carry 154 | this.carryIntermediateBuf = scanPipeline.device.createBuffer({ 155 | size: 4, 156 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST, 157 | }); 158 | 159 | this.scanBlockResultsBindGroup = scanPipeline.device.createBindGroup({ 160 | layout: this.scanPipeline.scanBlockResultsLayout, 161 | entries: [ 162 | { 163 | binding: 0, 164 | resource: { 165 | buffer: blockSumBuf, 166 | }, 167 | }, 168 | { 169 | binding: 1, 170 | resource: { 171 | buffer: carryBuf, 172 | }, 173 | }, 174 | ], 175 | }); 176 | }; 177 | 178 | ExclusiveScanner.prototype.scan = async function(dataSize) { 179 | // If the data size we're scanning within the larger input array has changed, 180 | // we just need to re-record the scan commands 181 | var numChunks = Math.ceil(dataSize / this.scanPipeline.maxScanSize); 182 | this.offsets = new Uint32Array(numChunks); 183 | for (var i = 0; i < numChunks; ++i) { 184 | this.offsets.set([i * this.scanPipeline.maxScanSize * 4], i); 185 | } 186 | 187 | // Scan through the data in chunks, updating carry in/out at the end to carry 188 | // over the results of the previous chunks 189 | var commandEncoder = this.scanPipeline.device.createCommandEncoder(); 190 | 191 | // Clear the carry buffer and the readback sum entry if it's not scan size aligned 192 | commandEncoder.copyBufferToBuffer(this.scanPipeline.clearBuf, 0, this.carryBuf, 0, 8); 193 | for (var i = 0; i < numChunks; ++i) { 194 | var nWorkGroups = 195 | Math.min((this.inputSize - i * this.scanPipeline.maxScanSize) / ScanBlockSize, 196 | ScanBlockSize); 197 | 198 | var scanBlockBG = null; 199 | if (nWorkGroups === ScanBlockSize) { 200 | scanBlockBG = this.scanPipeline.device.createBindGroup({ 201 | layout: this.scanPipeline.scanBlocksLayout, 202 | entries: [ 203 | { 204 | binding: 0, 205 | resource: { 206 | buffer: this.inputBuf, 207 | size: Math.min(this.scanPipeline.maxScanSize, this.inputSize) * 4, 208 | offset: this.offsets[i], 209 | }, 210 | }, 211 | { 212 | binding: 1, 213 | resource: { 214 | buffer: this.blockSumBuf, 215 | }, 216 | }, 217 | ], 218 | }); 219 | } else { 220 | // Bind groups for processing the remainder if the aligned size isn't 221 | // an even multiple of the max scan size 222 | scanBlockBG = this.scanPipeline.device.createBindGroup({ 223 | layout: this.scanPipeline.scanBlocksLayout, 224 | entries: [ 225 | { 226 | binding: 0, 227 | resource: { 228 | buffer: this.inputBuf, 229 | size: (this.inputSize % this.scanPipeline.maxScanSize) * 4, 230 | offset: this.offsets[i], 231 | }, 232 | }, 233 | { 234 | binding: 1, 235 | resource: { 236 | buffer: this.blockSumBuf, 237 | }, 238 | }, 239 | ], 240 | }); 241 | } 242 | 243 | // Clear the previous block sums 244 | commandEncoder.copyBufferToBuffer( 245 | this.scanPipeline.clearBuf, 0, this.blockSumBuf, 0, ScanBlockSize * 4); 246 | 247 | var computePass = commandEncoder.beginComputePass(); 248 | 249 | computePass.setPipeline(this.scanPipeline.scanBlocksPipeline); 250 | computePass.setBindGroup(0, scanBlockBG); 251 | computePass.dispatchWorkgroups(nWorkGroups, 1, 1); 252 | 253 | computePass.setPipeline(this.scanPipeline.scanBlockResultsPipeline); 254 | computePass.setBindGroup(0, this.scanBlockResultsBindGroup); 255 | computePass.dispatchWorkgroups(1, 1, 1); 256 | 257 | computePass.setPipeline(this.scanPipeline.addBlockSumsPipeline); 258 | computePass.setBindGroup(0, scanBlockBG); 259 | computePass.dispatchWorkgroups(nWorkGroups, 1, 1); 260 | 261 | computePass.end(); 262 | 263 | // Update the carry in value for the next chunk, copy carry out to carry in 264 | commandEncoder.copyBufferToBuffer(this.carryBuf, 4, this.carryIntermediateBuf, 0, 4); 265 | commandEncoder.copyBufferToBuffer(this.carryIntermediateBuf, 0, this.carryBuf, 0, 4); 266 | } 267 | var commandBuffer = commandEncoder.finish(); 268 | 269 | // We need to clear a different element in the input buf for the last item if the data size 270 | // shrinks 271 | if (dataSize < this.inputSize) { 272 | var commandEncoder = this.scanPipeline.device.createCommandEncoder(); 273 | commandEncoder.copyBufferToBuffer( 274 | this.scanPipeline.clearBuf, 0, this.inputBuf, dataSize * 4, 4); 275 | this.scanPipeline.device.queue.submit([commandEncoder.finish()]); 276 | } 277 | 278 | this.scanPipeline.device.queue.submit([commandBuffer]); 279 | 280 | // Readback the the last element to return the total sum as well 281 | var commandEncoder = this.scanPipeline.device.createCommandEncoder(); 282 | if (dataSize < this.inputSize) { 283 | commandEncoder.copyBufferToBuffer(this.inputBuf, dataSize * 4, this.readbackBuf, 0, 4); 284 | } else { 285 | commandEncoder.copyBufferToBuffer(this.carryBuf, 4, this.readbackBuf, 0, 4); 286 | } 287 | this.scanPipeline.device.queue.submit([commandEncoder.finish()]); 288 | 289 | await this.readbackBuf.mapAsync(GPUMapMode.READ); 290 | var mapping = new Uint32Array(this.readbackBuf.getMappedRange()); 291 | var sum = mapping[0]; 292 | this.readbackBuf.unmap(); 293 | 294 | return sum; 295 | }; 296 | -------------------------------------------------------------------------------- /shaders/raytrace_active_block.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | #include "util.glsl" 4 | #include "load_block.comp" 5 | 6 | layout(local_size_x = BLOCK_NUM_VOXELS, local_size_y = 1, local_size_z = 1) in; 7 | 8 | layout(set = 1, binding = 0, std140) uniform ViewParams 9 | { 10 | mat4 proj_view; 11 | vec4 eye_pos; 12 | vec4 eye_dir; 13 | float near_plane; 14 | uint current_pass_index; 15 | }; 16 | 17 | layout(set = 1, binding = 1, std430) buffer RayInformation 18 | { 19 | RayInfo rays[]; 20 | }; 21 | 22 | layout(set = 1, binding = 2, std430) buffer RayIDs 23 | { 24 | uint ray_ids[]; 25 | }; 26 | 27 | layout(set = 1, binding = 3, std430) buffer BlockInformation 28 | { 29 | BlockInfo blocks[]; 30 | }; 31 | 32 | layout(set = 1, binding = 5, std430) buffer SpeculativeIDs 33 | { 34 | uint spec_ids[]; 35 | }; 36 | 37 | layout(set = 1, binding = 6, std430) buffer RayRGBZ 38 | { 39 | vec2 ray_rgbz[]; 40 | }; 41 | 42 | uniform layout(set = 1, binding = 4, rgba8) writeonly image2D render_target; 43 | 44 | layout(set = 2, binding = 0, std140) uniform BlockIDOffset 45 | { 46 | uint id_offset; 47 | uint total_active_blocks; 48 | }; 49 | 50 | shared BlockInfo block_info; 51 | 52 | // Compute the polynomial for the cell with the given vertex values 53 | vec4 compute_polynomial(const vec3 p, const vec3 dir, const vec3 v000, in float values[8]) { 54 | const vec3 v111 = v000 + vec3(1); 55 | // Note: Grid voxels sizes are 1^3 56 | const vec3 a[2] = {v111 - p, p - v000}; 57 | const vec3 b[2] = {-dir, dir}; 58 | 59 | vec4 poly = vec4(0); 60 | for (int k = 0; k < 2; ++k) { 61 | for (int j = 0; j < 2; ++j) { 62 | for (int i = 0; i < 2; ++i) { 63 | const float val = values[i + 2 * (j + 2 * k)]; 64 | 65 | poly.x += b[i].x * b[j].y * b[k].z * val; 66 | 67 | poly.y += (a[i].x * b[j].y * b[k].z + 68 | b[i].x * a[j].y * b[k].z + 69 | b[i].x * b[j].y * a[k].z) * val; 70 | 71 | poly.z += (b[i].x * a[j].y * a[k].z + 72 | a[i].x * b[j].y * a[k].z + 73 | a[i].x * a[j].y * b[k].z) * val; 74 | 75 | poly.w += a[i].x * a[j].y * a[k].z * val; 76 | } 77 | } 78 | } 79 | 80 | return poly; 81 | } 82 | 83 | float evaluate_polynomial(const vec4 poly, const float t) { 84 | return poly.x * t * t * t + poly.y * t * t + poly.z * t + poly.w; 85 | } 86 | 87 | // Returns true if the quadratic has real roots 88 | bool solve_quadratic(const vec3 poly, out float roots[2]) { 89 | // Check for case when poly is just Bt + c = 0 90 | if (poly.x == 0) { 91 | roots[0] = -poly.z/poly.y; 92 | roots[1] = -poly.z/poly.y; 93 | return true; 94 | } 95 | float discriminant = pow(poly.y, 2.f) - 4.f * poly.x * poly.z; 96 | if (discriminant < 0.f) { 97 | return false; 98 | } 99 | discriminant = sqrt(discriminant); 100 | vec2 r = 0.5f * vec2(-poly.y + discriminant, -poly.y - discriminant) / poly.x; 101 | roots[0] = min(r.x, r.y); 102 | roots[1] = max(r.x, r.y); 103 | return true; 104 | } 105 | 106 | // Trilinear interpolation at the given point within the cell with its origin at v000 107 | // (origin = bottom-left-near point) 108 | float trilinear_interpolate_in_cell(const vec3 p, const ivec3 v000, in float values[8]) { 109 | const vec3 diff = clamp(p, vec3(v000), vec3(v000) + 1) - v000; 110 | // Interpolate across x, then y, then z, and return the value normalized between 0 and 1 111 | // WILL note: renamed t0 c00/c11 to match wikipedia notation 112 | const float c00 = values[0] * (1.f - diff.x) + values[1] * diff.x; 113 | const float c01 = values[4] * (1.f - diff.x) + values[5] * diff.x; 114 | const float c10 = values[2] * (1.f - diff.x) + values[3] * diff.x; 115 | const float c11 = values[6] * (1.f - diff.x) + values[7] * diff.x; 116 | const float c0 = c00 * (1.f - diff.y) + c10 * diff.y; 117 | const float c1 = c01 * (1.f - diff.y) + c11 * diff.y; 118 | return c0 * (1.f - diff.z) + c1 * diff.z; 119 | } 120 | 121 | vec3 shading(vec3 N, vec3 V, vec3 L, vec3 base_color) { 122 | // Just a simple Blinn-Phong model for the paper 123 | vec3 H = normalize(V + L); 124 | vec3 c = base_color * 0.2; 125 | c.rgb += 0.6 * clamp(dot(L, N), 0.f, 1.f) * base_color; 126 | c.rgb += 0.1 * pow(clamp(dot(H, N), 0.f, 1.f), 5.f); 127 | return c; 128 | } 129 | 130 | vec3 compute_normal(const ivec3 v000, const vec3 hit_p, const in float vertex_values[8]) { 131 | vec3 N = vec3(0); 132 | const vec3 v111 = v000 + vec3(1); 133 | const vec3 a[2] = {v111 - hit_p, hit_p - v000}; 134 | const float negative_pow[2] = {-1, 1}; 135 | for (int k = 0; k < 2; ++k) { 136 | for (int j = 0; j < 2; ++j) { 137 | for (int i = 0; i < 2; ++i) { 138 | const float val = vertex_values[i + 2 * (j + 2 * k)]; 139 | N.x += negative_pow[i] * a[j].y * a[k].z * val; 140 | N.y += negative_pow[j] * a[i].x * a[k].z * val; 141 | N.z += negative_pow[k] * a[i].x * a[j].y * val; 142 | } 143 | } 144 | } 145 | return normalize(N); 146 | } 147 | 148 | bool marmitt_intersect(const in vec3 vol_eye, 149 | const in vec3 grid_ray_dir, 150 | const in vec3 v000, 151 | const in float vertex_values[8], 152 | const float t_prev, 153 | const float t_next, 154 | out float t_hit) 155 | { 156 | if (t_next <= t_prev) { 157 | return false; 158 | } 159 | // The text seems to not say explicitly, but I think it is required to have 160 | // the ray "origin" within the cell for the cell-local coordinates for a to 161 | // be computed properly. So here I set the cell_p to be at the midpoint of the 162 | // ray's overlap with the cell, which makes it easy to compute t_in/t_out and 163 | // avoid numerical issues with cell_p being right at the edge of the cell. 164 | const vec3 cell_p = vol_eye + grid_ray_dir * (t_prev + (t_next - t_prev) * 0.5f); 165 | float t_in = -(t_next - t_prev) * 0.5f * length(grid_ray_dir); 166 | float t_out = (t_next - t_prev) * 0.5f * length(grid_ray_dir); 167 | 168 | const vec3 cell_ray_dir = normalize(grid_ray_dir); 169 | vec4 poly = compute_polynomial(cell_p, cell_ray_dir, v000, vertex_values); 170 | poly.w -= isovalue; 171 | 172 | float f_in = evaluate_polynomial(poly, t_in); 173 | float f_out = evaluate_polynomial(poly, t_out); 174 | float roots[2] = {0.0, 0.0}; 175 | // TODO: Seeming to get some holes in the surface with the Marmitt intersector 176 | if (solve_quadratic(vec3(3.f * poly.x, 2.f * poly.y, poly.z), roots)) { 177 | if (roots[0] >= t_in && roots[0] <= t_out) { 178 | float f_root0 = evaluate_polynomial(poly, roots[0]); 179 | if (sign(f_root0) == sign(f_in)) { 180 | t_in = roots[0]; 181 | f_in = f_root0; 182 | } else { 183 | t_out = roots[0]; 184 | f_out = f_root0; 185 | } 186 | } 187 | if (roots[1] >= t_in && roots[1] <= t_out) { 188 | float f_root1 = evaluate_polynomial(poly, roots[1]); 189 | if (sign(f_root1) == sign(f_in)) { 190 | t_in = roots[1]; 191 | f_in = f_root1; 192 | } else { 193 | t_out = roots[1]; 194 | f_out = f_root1; 195 | } 196 | } 197 | } 198 | // If the signs aren't equal we know there's an intersection in the cell 199 | if (sign(f_in) != sign(f_out)) { 200 | // Find the intersection via repeated linear interpolation 201 | for (int i = 0; i < 3; ++i) { 202 | float t = t_in + (t_out - t_in) * (-f_in) / (f_out - f_in); 203 | float f_t = evaluate_polynomial(poly, t); 204 | if (sign(f_t) == sign(f_in)) { 205 | t_in = t; 206 | f_in = f_t; 207 | } else { 208 | t_out = t; 209 | f_out = f_t; 210 | } 211 | } 212 | const float cell_t_hit = t_in + (t_out - t_in) * (-f_in) / (f_out - f_in); 213 | // Return t_hit relative to vol_eye 214 | vec3 hit_p = cell_p + cell_ray_dir * cell_t_hit; 215 | t_hit = length(hit_p - vol_eye) / length(grid_ray_dir); 216 | return true; 217 | } 218 | return false; 219 | } 220 | 221 | void main(void) 222 | { 223 | // Note: not scale applied to id_offset here, because the 64 threads in each 224 | // work group are responsible for the same block 225 | // if (gl_WorkGroupID.x + id_offset >= total_active_blocks) { 226 | // return; 227 | // } 228 | 229 | if (gl_LocalInvocationID.x == 0) { 230 | block_info = blocks[gl_WorkGroupID.x + id_offset]; 231 | } 232 | 233 | // Note: Barriers must be called from uniform control flow, so can't call these 234 | // conditional return statements before calling barrier or load_block. 235 | barrier(); 236 | uvec3 block_dims = load_block(block_info.id); 237 | if (gl_WorkGroupID.x + id_offset >= total_active_blocks) { 238 | return; 239 | } 240 | if (block_info.num_rays == 0) { 241 | return; 242 | } 243 | 244 | const uvec3 n_blocks = padded_dims.xyz / uvec3(4); 245 | 246 | // Block position in voxels 247 | const uvec3 block_pos = block_id_to_pos(block_info.id) * 4; 248 | 249 | // See note on barriers above. 250 | // uvec3 block_dims; 251 | // if (block_info.lod == 0) { 252 | // block_dims = load_block(block_info.id); 253 | // } else { 254 | // // Don't load the block data if we're doing LOD 255 | // block_dims = compute_block_dims_with_ghost(block_pos / 4); 256 | // } 257 | 258 | vec3 volume_translation = vec3(0) - volume_scale.xyz * 0.5; 259 | vec3 transformed_eye = (eye_pos.xyz - volume_translation) / volume_scale.xyz; 260 | 261 | // Transform eye into the volume space, and then the block space 262 | vec3 vol_eye = transformed_eye * volume_dims.xyz - vec3(0.5) - block_pos; 263 | 264 | // Process all the rays for this block in parallel, each thread takes one ray 265 | // so we can do 64 rays at a time. For cases where a block projects to a lot more 266 | // than 64 pixels it'd be good to break up the rays to be processed by multiple 267 | // work groups to avoid serializing rendering. However that might not be a common case 268 | // for most large volumes. For the last chunk, threads that don't have a ray to 269 | // process just write an invalid texture coordinate, which results in the write 270 | // being discarded. However, they have to call imageStore collectively, as it's a 271 | // sync point 272 | uint chunks = (block_info.num_rays / gl_WorkGroupSize.x); 273 | if (block_info.num_rays % gl_WorkGroupSize.x != 0) { 274 | ++chunks; 275 | } 276 | 277 | for (uint i = 0; i < chunks; ++i) { 278 | ivec2 pixel_coords = ivec2(-1); 279 | vec4 color = vec4(1); 280 | color.a = 1; 281 | const uint ray_id = i * gl_WorkGroupSize.x + gl_LocalInvocationID.x; 282 | 283 | if (ray_id < block_info.num_rays) { 284 | const uint ray_index = ray_ids[block_info.ray_offset + ray_id]; 285 | pixel_coords = ivec2(ray_id_to_pos(ray_index)); 286 | const uint spec_index = spec_ids[block_info.ray_offset + ray_id]; 287 | 288 | const vec3 grid_ray_dir = rays[ray_index].ray_dir; 289 | 290 | // Traverse the ray through the dual grid 291 | float vertex_values[8]; 292 | bool hit_surface = false; 293 | vec3 hit_p = vec3(0); 294 | float t_hit; 295 | vec2 cell_range = vec2(0); 296 | 297 | const vec2 brick_range = 298 | intersect_box(vol_eye, grid_ray_dir, vec3(0), vec3(4.0)); 299 | 300 | // Make sure we actually intersect this brick, can have some glancing edge cases 301 | if (brick_range.y <= brick_range.x) { 302 | continue; 303 | } 304 | 305 | GridIterator grid_iter = 306 | init_grid_iterator(vol_eye, 307 | grid_ray_dir, 308 | brick_range.x - 0.001, 309 | ivec3(block_dims) - 1); 310 | ivec3 v000 = ivec3(0); 311 | vec2 cell_t_range; 312 | while (grid_iterator_get_cell(grid_iter, cell_t_range, v000)) { 313 | compute_vertex_values(v000, block_dims, vertex_values, cell_range); 314 | 315 | const bool skip_cell = isovalue < cell_range.x || isovalue > cell_range.y; 316 | 317 | if (!skip_cell) { 318 | hit_surface = marmitt_intersect(vol_eye, 319 | grid_ray_dir, v000, vertex_values, cell_t_range.x, cell_t_range.y, t_hit); 320 | 321 | if (hit_surface) { 322 | hit_p = vol_eye + grid_ray_dir * t_hit; 323 | break; 324 | } 325 | } 326 | grid_iterator_advance(grid_iter); 327 | } 328 | 329 | if (hit_surface) { 330 | // color.xyz = vec3(rays[ray_index].t / (padded_dims.x)); 331 | // block_ids[ray_index] = UINT_MAX; 332 | rays[ray_index].t = FLT_MAX; 333 | 334 | vec3 N = compute_normal(v000, hit_p, vertex_values); 335 | vec3 L = normalize(-grid_ray_dir); 336 | vec3 V = normalize(-grid_ray_dir); 337 | // Make sure normal faces back along ray 338 | if (dot(N, grid_ray_dir) > 0.0) { 339 | N = -N; 340 | } 341 | const vec3 base_color = vec3(0.3, 0.3, 0.9); 342 | // color.xyz = vec3(cell_range.y / 4096.0); 343 | // color.xyz = vec3(v000) / block_pos; 344 | color.xyz = shading(N, L, V, base_color); 345 | // color.xyz = base_color; 346 | // Pack the ray color 347 | ray_rgbz[spec_index] = vec2(intBitsToFloat(pack_color(color.xyz)), t_hit); 348 | } 349 | // imageStore(render_target, pixel_coords, color); 350 | } 351 | } 352 | } 353 | 354 | -------------------------------------------------------------------------------- /js/tri_table.js: -------------------------------------------------------------------------------- 1 | const triTable = new Int32Array([ 2 | -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 3, -1, 0, 0, 0, 3 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 9, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4 | 0, 0, 8, 1, 9, 8, 3, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 10, 1, -1, 0, 5 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 3, 1, 2, 10, -1, 0, 0, 0, 0, 0, 6 | 0, 0, 0, 0, 9, 2, 10, 9, 0, 2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 10, 7 | 3, 10, 8, 8, 10, 9, -1, 0, 0, 0, 0, 0, 0, 2, 3, 11, -1, 0, 0, 0, 0, 0, 0, 8 | 0, 0, 0, 0, 0, 0, 11, 0, 8, 11, 2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 9 | 9, 0, 2, 3, 11, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 9, 2, 9, 11, 11, 9, 10 | 8, -1, 0, 0, 0, 0, 0, 0, 3, 10, 1, 3, 11, 10, -1, 0, 0, 0, 0, 0, 0, 0, 0, 11 | 0, 1, 0, 8, 1, 8, 10, 10, 8, 11, -1, 0, 0, 0, 0, 0, 0, 0, 3, 11, 0, 11, 9, 12 | 9, 11, 10, -1, 0, 0, 0, 0, 0, 0, 11, 10, 9, 11, 9, 8, -1, 0, 0, 0, 0, 0, 0, 13 | 0, 0, 0, 4, 7, 8, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 3, 0, 4, 14 | 7, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 7, 8, 9, 0, 1, -1, 0, 0, 0, 0, 15 | 0, 0, 0, 0, 0, 9, 4, 7, 9, 7, 1, 1, 7, 3, -1, 0, 0, 0, 0, 0, 0, 4, 7, 16 | 8, 1, 2, 10, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 3, 0, 4, 7, 3, 2, 10, 1, 17 | -1, 0, 0, 0, 0, 0, 0, 2, 9, 0, 2, 10, 9, 4, 7, 8, -1, 0, 0, 0, 0, 0, 0, 18 | 3, 2, 7, 7, 9, 4, 7, 2, 9, 9, 2, 10, -1, 0, 0, 0, 8, 4, 7, 3, 11, 2, -1, 19 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 11, 2, 7, 2, 4, 4, 2, 0, -1, 0, 0, 0, 0, 20 | 0, 0, 2, 3, 11, 1, 9, 0, 8, 4, 7, -1, 0, 0, 0, 0, 0, 0, 2, 1, 9, 2, 9, 21 | 4, 2, 4, 11, 11, 4, 7, -1, 0, 0, 0, 10, 3, 11, 10, 1, 3, 8, 4, 7, -1, 0, 0, 22 | 0, 0, 0, 0, 4, 7, 0, 0, 10, 1, 7, 10, 0, 7, 11, 10, -1, 0, 0, 0, 8, 4, 7, 23 | 0, 3, 11, 0, 11, 9, 9, 11, 10, -1, 0, 0, 0, 7, 9, 4, 7, 11, 9, 9, 11, 10, -1, 24 | 0, 0, 0, 0, 0, 0, 4, 9, 5, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 25 | 3, 0, 4, 9, 5, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 4, 0, 1, 5, -1, 0, 26 | 0, 0, 0, 0, 0, 0, 0, 0, 4, 8, 3, 4, 3, 5, 5, 3, 1, -1, 0, 0, 0, 0, 0, 27 | 0, 1, 2, 10, 9, 5, 4, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 9, 5, 8, 3, 0, 28 | 1, 2, 10, -1, 0, 0, 0, 0, 0, 0, 10, 5, 4, 10, 4, 2, 2, 4, 0, -1, 0, 0, 0, 29 | 0, 0, 0, 4, 8, 3, 4, 3, 2, 4, 2, 5, 5, 2, 10, -1, 0, 0, 0, 2, 3, 11, 5, 30 | 4, 9, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 8, 11, 2, 0, 9, 5, 4, -1, 0, 31 | 0, 0, 0, 0, 0, 5, 0, 1, 5, 4, 0, 3, 11, 2, -1, 0, 0, 0, 0, 0, 0, 11, 2, 32 | 8, 8, 5, 4, 2, 5, 8, 2, 1, 5, -1, 0, 0, 0, 3, 10, 1, 3, 11, 10, 5, 4, 9, 33 | -1, 0, 0, 0, 0, 0, 0, 9, 5, 4, 1, 0, 8, 1, 8, 10, 10, 8, 11, -1, 0, 0, 0, 34 | 10, 5, 11, 11, 0, 3, 11, 5, 0, 0, 5, 4, -1, 0, 0, 0, 4, 10, 5, 4, 8, 10, 10, 35 | 8, 11, -1, 0, 0, 0, 0, 0, 0, 7, 9, 5, 7, 8, 9, -1, 0, 0, 0, 0, 0, 0, 0, 36 | 0, 0, 0, 9, 5, 0, 5, 3, 3, 5, 7, -1, 0, 0, 0, 0, 0, 0, 8, 0, 1, 8, 1, 37 | 7, 7, 1, 5, -1, 0, 0, 0, 0, 0, 0, 3, 1, 5, 3, 5, 7, -1, 0, 0, 0, 0, 0, 38 | 0, 0, 0, 0, 7, 9, 5, 7, 8, 9, 1, 2, 10, -1, 0, 0, 0, 0, 0, 0, 1, 2, 10, 39 | 0, 9, 5, 0, 5, 3, 3, 5, 7, -1, 0, 0, 0, 7, 8, 5, 5, 2, 10, 8, 2, 5, 8, 40 | 0, 2, -1, 0, 0, 0, 10, 3, 2, 10, 5, 3, 3, 5, 7, -1, 0, 0, 0, 0, 0, 0, 9, 41 | 7, 8, 9, 5, 7, 11, 2, 3, -1, 0, 0, 0, 0, 0, 0, 0, 9, 2, 2, 7, 11, 2, 9, 42 | 7, 7, 9, 5, -1, 0, 0, 0, 3, 11, 2, 8, 0, 1, 8, 1, 7, 7, 1, 5, -1, 0, 0, 43 | 0, 2, 7, 11, 2, 1, 7, 7, 1, 5, -1, 0, 0, 0, 0, 0, 0, 11, 1, 3, 11, 10, 1, 44 | 7, 8, 9, 7, 9, 5, -1, 0, 0, 0, 11, 10, 1, 11, 1, 7, 7, 1, 0, 7, 0, 9, 7, 45 | 9, 5, -1, 5, 7, 8, 5, 8, 10, 10, 8, 0, 10, 0, 3, 10, 3, 11, -1, 11, 10, 5, 11, 46 | 5, 7, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 6, 5, -1, 0, 0, 0, 0, 0, 0, 0, 47 | 0, 0, 0, 0, 0, 0, 8, 3, 10, 6, 5, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 48 | 1, 5, 10, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 1, 9, 8, 3, 1, 10, 6, 5, 49 | -1, 0, 0, 0, 0, 0, 0, 6, 1, 2, 6, 5, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50 | 6, 1, 2, 6, 5, 1, 0, 8, 3, -1, 0, 0, 0, 0, 0, 0, 5, 9, 0, 5, 0, 6, 6, 51 | 0, 2, -1, 0, 0, 0, 0, 0, 0, 6, 5, 2, 2, 8, 3, 5, 8, 2, 5, 9, 8, -1, 0, 52 | 0, 0, 2, 3, 11, 10, 6, 5, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 2, 0, 8, 53 | 11, 6, 5, 10, -1, 0, 0, 0, 0, 0, 0, 0, 1, 9, 3, 11, 2, 10, 6, 5, -1, 0, 0, 54 | 0, 0, 0, 0, 10, 6, 5, 2, 1, 9, 2, 9, 11, 11, 9, 8, -1, 0, 0, 0, 11, 6, 5, 55 | 11, 5, 3, 3, 5, 1, -1, 0, 0, 0, 0, 0, 0, 11, 6, 8, 8, 1, 0, 8, 6, 1, 1, 56 | 6, 5, -1, 0, 0, 0, 0, 3, 11, 0, 11, 6, 0, 6, 9, 9, 6, 5, -1, 0, 0, 0, 5, 57 | 11, 6, 5, 9, 11, 11, 9, 8, -1, 0, 0, 0, 0, 0, 0, 7, 8, 4, 6, 5, 10, -1, 0, 58 | 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 7, 3, 0, 4, 5, 10, 6, -1, 0, 0, 0, 0, 0, 59 | 0, 6, 5, 10, 7, 8, 4, 9, 0, 1, -1, 0, 0, 0, 0, 0, 0, 5, 10, 6, 9, 4, 7, 60 | 9, 7, 1, 1, 7, 3, -1, 0, 0, 0, 1, 6, 5, 1, 2, 6, 7, 8, 4, -1, 0, 0, 0, 61 | 0, 0, 0, 7, 0, 4, 7, 3, 0, 6, 5, 1, 6, 1, 2, -1, 0, 0, 0, 4, 7, 8, 5, 62 | 9, 0, 5, 0, 6, 6, 0, 2, -1, 0, 0, 0, 2, 6, 5, 2, 5, 3, 3, 5, 9, 3, 9, 63 | 4, 3, 4, 7, -1, 4, 7, 8, 5, 10, 6, 11, 2, 3, -1, 0, 0, 0, 0, 0, 0, 6, 5, 64 | 10, 7, 11, 2, 7, 2, 4, 4, 2, 0, -1, 0, 0, 0, 4, 7, 8, 9, 0, 1, 6, 5, 10, 65 | 3, 11, 2, -1, 0, 0, 0, 6, 5, 10, 11, 4, 7, 11, 2, 4, 4, 2, 9, 9, 2, 1, -1, 66 | 7, 8, 4, 11, 6, 5, 11, 5, 3, 3, 5, 1, -1, 0, 0, 0, 0, 4, 7, 0, 7, 1, 1, 67 | 7, 11, 1, 11, 6, 1, 6, 5, -1, 4, 7, 8, 9, 6, 5, 9, 0, 6, 6, 0, 11, 11, 0, 68 | 3, -1, 7, 11, 4, 11, 9, 4, 11, 5, 9, 11, 6, 5, -1, 0, 0, 0, 10, 4, 9, 10, 6, 69 | 4, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 4, 9, 10, 6, 4, 8, 3, 0, -1, 0, 0, 70 | 0, 0, 0, 0, 1, 10, 6, 1, 6, 0, 0, 6, 4, -1, 0, 0, 0, 0, 0, 0, 4, 8, 6, 71 | 6, 1, 10, 6, 8, 1, 1, 8, 3, -1, 0, 0, 0, 9, 1, 2, 9, 2, 4, 4, 2, 6, -1, 72 | 0, 0, 0, 0, 0, 0, 0, 8, 3, 9, 1, 2, 9, 2, 4, 4, 2, 6, -1, 0, 0, 0, 0, 73 | 2, 6, 0, 6, 4, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 8, 3, 2, 4, 4, 2, 74 | 6, -1, 0, 0, 0, 0, 0, 0, 4, 10, 6, 4, 9, 10, 2, 3, 11, -1, 0, 0, 0, 0, 0, 75 | 0, 8, 2, 0, 8, 11, 2, 4, 9, 10, 4, 10, 6, -1, 0, 0, 0, 2, 3, 11, 1, 10, 6, 76 | 1, 6, 0, 0, 6, 4, -1, 0, 0, 0, 8, 11, 2, 8, 2, 4, 4, 2, 1, 4, 1, 10, 4, 77 | 10, 6, -1, 3, 11, 1, 1, 4, 9, 11, 4, 1, 11, 6, 4, -1, 0, 0, 0, 6, 4, 9, 6, 78 | 9, 11, 11, 9, 1, 11, 1, 0, 11, 0, 8, -1, 11, 0, 3, 11, 6, 0, 0, 6, 4, -1, 0, 79 | 0, 0, 0, 0, 0, 8, 11, 6, 8, 6, 4, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 80 | 8, 6, 8, 10, 10, 8, 9, -1, 0, 0, 0, 0, 0, 0, 3, 0, 7, 7, 10, 6, 0, 10, 7, 81 | 0, 9, 10, -1, 0, 0, 0, 1, 10, 6, 1, 6, 7, 1, 7, 0, 0, 7, 8, -1, 0, 0, 0, 82 | 6, 1, 10, 6, 7, 1, 1, 7, 3, -1, 0, 0, 0, 0, 0, 0, 9, 1, 8, 8, 6, 7, 8, 83 | 1, 6, 6, 1, 2, -1, 0, 0, 0, 7, 3, 0, 7, 0, 6, 6, 0, 9, 6, 9, 1, 6, 1, 84 | 2, -1, 8, 6, 7, 8, 0, 6, 6, 0, 2, -1, 0, 0, 0, 0, 0, 0, 2, 6, 7, 2, 7, 85 | 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 2, 3, 6, 7, 8, 6, 8, 10, 10, 8, 9, 86 | -1, 0, 0, 0, 9, 10, 6, 9, 6, 0, 0, 6, 7, 0, 7, 11, 0, 11, 2, -1, 3, 11, 2, 87 | 0, 7, 8, 0, 1, 7, 7, 1, 6, 6, 1, 10, -1, 6, 7, 10, 7, 1, 10, 7, 2, 1, 7, 88 | 11, 2, -1, 0, 0, 0, 1, 3, 11, 1, 11, 9, 9, 11, 6, 9, 6, 7, 9, 7, 8, -1, 6, 89 | 7, 11, 9, 1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 7, 0, 6, 7, 0, 11, 90 | 6, 0, 3, 11, -1, 0, 0, 0, 6, 7, 11, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 91 | 0, 6, 11, 7, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 8, 11, 7, 6, 92 | -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 11, 7, 9, 0, 1, -1, 0, 0, 0, 0, 0, 0, 93 | 0, 0, 0, 1, 8, 3, 1, 9, 8, 7, 6, 11, -1, 0, 0, 0, 0, 0, 0, 11, 7, 6, 2, 94 | 10, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 10, 0, 8, 3, 11, 7, 6, -1, 0, 95 | 0, 0, 0, 0, 0, 9, 2, 10, 9, 0, 2, 11, 7, 6, -1, 0, 0, 0, 0, 0, 0, 11, 7, 96 | 6, 3, 2, 10, 3, 10, 8, 8, 10, 9, -1, 0, 0, 0, 2, 7, 6, 2, 3, 7, -1, 0, 0, 97 | 0, 0, 0, 0, 0, 0, 0, 8, 7, 6, 8, 6, 0, 0, 6, 2, -1, 0, 0, 0, 0, 0, 0, 98 | 7, 2, 3, 7, 6, 2, 1, 9, 0, -1, 0, 0, 0, 0, 0, 0, 8, 7, 9, 9, 2, 1, 9, 99 | 7, 2, 2, 7, 6, -1, 0, 0, 0, 6, 10, 1, 6, 1, 7, 7, 1, 3, -1, 0, 0, 0, 0, 100 | 0, 0, 6, 10, 1, 6, 1, 0, 6, 0, 7, 7, 0, 8, -1, 0, 0, 0, 7, 6, 3, 3, 9, 101 | 0, 6, 9, 3, 6, 10, 9, -1, 0, 0, 0, 6, 8, 7, 6, 10, 8, 8, 10, 9, -1, 0, 0, 102 | 0, 0, 0, 0, 8, 6, 11, 8, 4, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 3, 0, 103 | 11, 0, 6, 6, 0, 4, -1, 0, 0, 0, 0, 0, 0, 6, 8, 4, 6, 11, 8, 0, 1, 9, -1, 104 | 0, 0, 0, 0, 0, 0, 1, 9, 3, 3, 6, 11, 9, 6, 3, 9, 4, 6, -1, 0, 0, 0, 8, 105 | 6, 11, 8, 4, 6, 10, 1, 2, -1, 0, 0, 0, 0, 0, 0, 2, 10, 1, 11, 3, 0, 11, 0, 106 | 6, 6, 0, 4, -1, 0, 0, 0, 11, 4, 6, 11, 8, 4, 2, 10, 9, 2, 9, 0, -1, 0, 0, 107 | 0, 4, 6, 11, 4, 11, 9, 9, 11, 3, 9, 3, 2, 9, 2, 10, -1, 3, 8, 4, 3, 4, 2, 108 | 2, 4, 6, -1, 0, 0, 0, 0, 0, 0, 2, 0, 4, 2, 4, 6, -1, 0, 0, 0, 0, 0, 0, 109 | 0, 0, 0, 0, 1, 9, 3, 8, 4, 3, 4, 2, 2, 4, 6, -1, 0, 0, 0, 9, 2, 1, 9, 110 | 4, 2, 2, 4, 6, -1, 0, 0, 0, 0, 0, 0, 6, 10, 4, 4, 3, 8, 4, 10, 3, 3, 10, 111 | 1, -1, 0, 0, 0, 1, 6, 10, 1, 0, 6, 6, 0, 4, -1, 0, 0, 0, 0, 0, 0, 10, 9, 112 | 0, 10, 0, 6, 6, 0, 3, 6, 3, 8, 6, 8, 4, -1, 10, 9, 4, 10, 4, 6, -1, 0, 0, 113 | 0, 0, 0, 0, 0, 0, 0, 6, 11, 7, 5, 4, 9, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 114 | 0, 8, 3, 9, 5, 4, 7, 6, 11, -1, 0, 0, 0, 0, 0, 0, 0, 5, 4, 0, 1, 5, 6, 115 | 11, 7, -1, 0, 0, 0, 0, 0, 0, 7, 6, 11, 4, 8, 3, 4, 3, 5, 5, 3, 1, -1, 0, 116 | 0, 0, 2, 10, 1, 11, 7, 6, 5, 4, 9, -1, 0, 0, 0, 0, 0, 0, 0, 8, 3, 1, 2, 117 | 10, 4, 9, 5, 11, 7, 6, -1, 0, 0, 0, 6, 11, 7, 10, 5, 4, 10, 4, 2, 2, 4, 0, 118 | -1, 0, 0, 0, 6, 11, 7, 5, 2, 10, 5, 4, 2, 2, 4, 3, 3, 4, 8, -1, 2, 7, 6, 119 | 2, 3, 7, 4, 9, 5, -1, 0, 0, 0, 0, 0, 0, 4, 9, 5, 8, 7, 6, 8, 6, 0, 0, 120 | 6, 2, -1, 0, 0, 0, 3, 6, 2, 3, 7, 6, 0, 1, 5, 0, 5, 4, -1, 0, 0, 0, 1, 121 | 5, 4, 1, 4, 2, 2, 4, 8, 2, 8, 7, 2, 7, 6, -1, 5, 4, 9, 6, 10, 1, 6, 1, 122 | 7, 7, 1, 3, -1, 0, 0, 0, 4, 9, 5, 7, 0, 8, 7, 6, 0, 0, 6, 1, 1, 6, 10, 123 | -1, 3, 7, 6, 3, 6, 0, 0, 6, 10, 0, 10, 5, 0, 5, 4, -1, 4, 8, 5, 8, 10, 5, 124 | 8, 6, 10, 8, 7, 6, -1, 0, 0, 0, 5, 6, 11, 5, 11, 9, 9, 11, 8, -1, 0, 0, 0, 125 | 0, 0, 0, 0, 9, 5, 0, 5, 6, 0, 6, 3, 3, 6, 11, -1, 0, 0, 0, 8, 0, 11, 11, 126 | 5, 6, 11, 0, 5, 5, 0, 1, -1, 0, 0, 0, 11, 5, 6, 11, 3, 5, 5, 3, 1, -1, 0, 127 | 0, 0, 0, 0, 0, 10, 1, 2, 5, 6, 11, 5, 11, 9, 9, 11, 8, -1, 0, 0, 0, 2, 10, 128 | 1, 3, 6, 11, 3, 0, 6, 6, 0, 5, 5, 0, 9, -1, 0, 2, 10, 0, 10, 8, 8, 10, 5, 129 | 8, 5, 6, 8, 6, 11, -1, 11, 3, 6, 3, 5, 6, 3, 10, 5, 3, 2, 10, -1, 0, 0, 0, 130 | 2, 3, 6, 6, 9, 5, 3, 9, 6, 3, 8, 9, -1, 0, 0, 0, 5, 0, 9, 5, 6, 0, 0, 131 | 6, 2, -1, 0, 0, 0, 0, 0, 0, 6, 2, 3, 6, 3, 5, 5, 3, 8, 5, 8, 0, 5, 0, 132 | 1, -1, 6, 2, 1, 6, 1, 5, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 5, 8, 5, 133 | 3, 3, 5, 6, 3, 6, 10, 3, 10, 1, -1, 1, 0, 10, 0, 6, 10, 0, 5, 6, 0, 9, 5, 134 | -1, 0, 0, 0, 0, 3, 8, 10, 5, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 5, 6, 135 | -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 5, 10, 11, 7, 5, -1, 0, 0, 0, 136 | 0, 0, 0, 0, 0, 0, 5, 11, 7, 5, 10, 11, 3, 0, 8, -1, 0, 0, 0, 0, 0, 0, 11, 137 | 5, 10, 11, 7, 5, 9, 0, 1, -1, 0, 0, 0, 0, 0, 0, 9, 3, 1, 9, 8, 3, 5, 10, 138 | 11, 5, 11, 7, -1, 0, 0, 0, 2, 11, 7, 2, 7, 1, 1, 7, 5, -1, 0, 0, 0, 0, 0, 139 | 0, 3, 0, 8, 2, 11, 7, 2, 7, 1, 1, 7, 5, -1, 0, 0, 0, 2, 11, 0, 0, 5, 9, 140 | 0, 11, 5, 5, 11, 7, -1, 0, 0, 0, 9, 8, 3, 9, 3, 5, 5, 3, 2, 5, 2, 11, 5, 141 | 11, 7, -1, 10, 2, 3, 10, 3, 5, 5, 3, 7, -1, 0, 0, 0, 0, 0, 0, 5, 10, 7, 7, 142 | 0, 8, 10, 0, 7, 10, 2, 0, -1, 0, 0, 0, 1, 9, 0, 10, 2, 3, 10, 3, 5, 5, 3, 143 | 7, -1, 0, 0, 0, 7, 5, 10, 7, 10, 8, 8, 10, 2, 8, 2, 1, 8, 1, 9, -1, 7, 5, 144 | 1, 7, 1, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 1, 0, 8, 7, 1, 1, 7, 5, 145 | -1, 0, 0, 0, 0, 0, 0, 0, 5, 9, 0, 3, 5, 5, 3, 7, -1, 0, 0, 0, 0, 0, 0, 146 | 7, 5, 9, 7, 9, 8, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 10, 4, 10, 8, 8, 147 | 10, 11, -1, 0, 0, 0, 0, 0, 0, 11, 3, 10, 10, 4, 5, 10, 3, 4, 4, 3, 0, -1, 0, 148 | 0, 0, 9, 0, 1, 4, 5, 10, 4, 10, 8, 8, 10, 11, -1, 0, 0, 0, 3, 1, 9, 3, 9, 149 | 11, 11, 9, 4, 11, 4, 5, 11, 5, 10, -1, 8, 4, 11, 11, 1, 2, 4, 1, 11, 4, 5, 1, 150 | -1, 0, 0, 0, 5, 1, 2, 5, 2, 4, 4, 2, 11, 4, 11, 3, 4, 3, 0, -1, 11, 8, 4, 151 | 11, 4, 2, 2, 4, 5, 2, 5, 9, 2, 9, 0, -1, 2, 11, 3, 5, 9, 4, -1, 0, 0, 0, 152 | 0, 0, 0, 0, 0, 0, 4, 5, 10, 4, 10, 2, 4, 2, 8, 8, 2, 3, -1, 0, 0, 0, 10, 153 | 4, 5, 10, 2, 4, 4, 2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 1, 9, 8, 2, 3, 8, 4, 154 | 2, 2, 4, 10, 10, 4, 5, -1, 10, 2, 5, 2, 4, 5, 2, 9, 4, 2, 1, 9, -1, 0, 0, 155 | 0, 4, 3, 8, 4, 5, 3, 3, 5, 1, -1, 0, 0, 0, 0, 0, 0, 0, 4, 5, 0, 5, 1, 156 | -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 9, 3, 5, 9, 3, 4, 5, 3, 8, 4, -1, 157 | 0, 0, 0, 4, 5, 9, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 4, 9, 7, 158 | 9, 11, 11, 9, 10, -1, 0, 0, 0, 0, 0, 0, 8, 3, 0, 7, 4, 9, 7, 9, 11, 11, 9, 159 | 10, -1, 0, 0, 0, 0, 1, 4, 4, 11, 7, 1, 11, 4, 1, 10, 11, -1, 0, 0, 0, 10, 11, 160 | 7, 10, 7, 1, 1, 7, 4, 1, 4, 8, 1, 8, 3, -1, 2, 11, 7, 2, 7, 4, 2, 4, 1, 161 | 1, 4, 9, -1, 0, 0, 0, 0, 8, 3, 1, 4, 9, 1, 2, 4, 4, 2, 7, 7, 2, 11, -1, 162 | 7, 2, 11, 7, 4, 2, 2, 4, 0, -1, 0, 0, 0, 0, 0, 0, 7, 4, 11, 4, 2, 11, 4, 163 | 3, 2, 4, 8, 3, -1, 0, 0, 0, 7, 4, 3, 3, 10, 2, 3, 4, 10, 10, 4, 9, -1, 0, 164 | 0, 0, 2, 0, 8, 2, 8, 10, 10, 8, 7, 10, 7, 4, 10, 4, 9, -1, 4, 0, 1, 4, 1, 165 | 7, 7, 1, 10, 7, 10, 2, 7, 2, 3, -1, 4, 8, 7, 1, 10, 2, -1, 0, 0, 0, 0, 0, 166 | 0, 0, 0, 0, 9, 7, 4, 9, 1, 7, 7, 1, 3, -1, 0, 0, 0, 0, 0, 0, 8, 7, 0, 167 | 7, 1, 0, 7, 9, 1, 7, 4, 9, -1, 0, 0, 0, 4, 0, 3, 4, 3, 7, -1, 0, 0, 0, 168 | 0, 0, 0, 0, 0, 0, 4, 8, 7, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 169 | 9, 10, 8, 10, 11, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 3, 0, 9, 11, 11, 9, 170 | 10, -1, 0, 0, 0, 0, 0, 0, 1, 8, 0, 1, 10, 8, 8, 10, 11, -1, 0, 0, 0, 0, 0, 171 | 0, 3, 1, 10, 3, 10, 11, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 9, 1, 2, 11, 9, 172 | 9, 11, 8, -1, 0, 0, 0, 0, 0, 0, 0, 9, 3, 9, 11, 3, 9, 2, 11, 9, 1, 2, -1, 173 | 0, 0, 0, 11, 8, 0, 11, 0, 2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 11, 3, -1, 174 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 10, 2, 3, 8, 10, 10, 8, 9, -1, 0, 175 | 0, 0, 0, 0, 0, 9, 10, 2, 9, 2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 8, 176 | 2, 8, 10, 2, 8, 1, 10, 8, 0, 1, -1, 0, 0, 0, 2, 1, 10, -1, 0, 0, 0, 0, 0, 177 | 0, 0, 0, 0, 0, 0, 0, 8, 9, 1, 8, 1, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 178 | 1, 0, 9, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 8, -1, 0, 0, 0, 179 | 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 180 | 0, 0 181 | ]); 182 | -------------------------------------------------------------------------------- /js/render.js: -------------------------------------------------------------------------------- 1 | (async () => { 2 | var adapter = await navigator.gpu.requestAdapter(); 3 | console.log(adapter.limits); 4 | 5 | var gpuDeviceDesc = { 6 | requiredLimits: { 7 | maxStorageBuffersPerShaderStage: adapter.limits.maxStorageBuffersPerShaderStage, 8 | maxStorageBufferBindingSize: adapter.limits.maxStorageBufferBindingSize, 9 | maxBufferSize: adapter.limits.maxBufferSize 10 | }, 11 | }; 12 | var device = await adapter.requestDevice(gpuDeviceDesc); 13 | 14 | var canvas = document.getElementById("webgpu-canvas"); 15 | var context = canvas.getContext("webgpu"); 16 | 17 | var dataset = datasets.skull; 18 | if (window.location.hash) { 19 | var name = decodeURI(window.location.hash.substr(1)); 20 | console.log(`Linked to data set ${name}`); 21 | dataset = datasets[name]; 22 | } 23 | 24 | var volumeDims = getVolumeDimensions(dataset.name); 25 | var zfpDataName = dataset.name + ".zfp"; 26 | var volumeURL = null; 27 | if (window.location.hostname == "www.willusher.io") { 28 | volumeURL = "https://cdn.willusher.io/bcmc-demo-data/" + zfpDataName; 29 | } else { 30 | volumeURL = "/models/" + zfpDataName; 31 | } 32 | var compressedData = 33 | await fetch(volumeURL).then((res) => res.arrayBuffer().then(function(arr) { 34 | return new Uint8Array(arr); 35 | })); 36 | 37 | if (compressedData == null) { 38 | alert(`Failed to load compressed data`); 39 | return; 40 | } 41 | var imageBuffer = device.createBuffer({ 42 | size: canvas.width * canvas.height * 4, 43 | usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ, 44 | }); 45 | var resolutionBuffer = device.createBuffer({ 46 | size: 2 * 4, 47 | usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, 48 | }); 49 | var commandEncoder = device.createCommandEncoder(); 50 | var uploadResolution = device.createBuffer( 51 | {size: 2 * 4, usage: GPUBufferUsage.COPY_SRC, mappedAtCreation: true}); 52 | new Uint32Array(uploadResolution.getMappedRange()).set([canvas.width, canvas.height]); 53 | uploadResolution.unmap(); 54 | commandEncoder.copyBufferToBuffer(uploadResolution, 0, resolutionBuffer, 0, 2 * 4); 55 | device.queue.submit([commandEncoder.finish()]); 56 | var renderBGLayout = device.createBindGroupLayout({ 57 | entries: [ 58 | {binding: 0, visibility: GPUShaderStage.FRAGMENT, texture: {viewDimension: "2d"}}, 59 | {binding: 1, visibility: GPUShaderStage.FRAGMENT, buffer: {type: "uniform"}}, 60 | {binding: 2, visibility: GPUShaderStage.FRAGMENT, sampler: {type: "filtering"}} 61 | ] 62 | }); 63 | const sampler = device.createSampler({ 64 | magFilter: 'linear', 65 | minFilter: 'linear', 66 | }); 67 | 68 | var enableSpeculationUI = document.getElementById("enableSpeculation"); 69 | enableSpeculationUI.checked = true; 70 | 71 | var recordVisibleBlocksUI = document.getElementById("recordVisibleBlocks") 72 | var resolution = document.getElementById("resolution"); 73 | var resolutionToDivisor = {"full": 1, "half": 2, "quarter": 4}; 74 | var width = canvas.width / resolutionToDivisor[resolution.value]; 75 | var height = canvas.height / resolutionToDivisor[resolution.value]; 76 | this.volumeRC = 77 | new VolumeRaycaster(device, width, height, recordVisibleBlocksUI, enableSpeculationUI); 78 | var render = this; 79 | resolution.onchange = async () => { 80 | var width = canvas.width / resolutionToDivisor[resolution.value]; 81 | var height = canvas.height / resolutionToDivisor[resolution.value]; 82 | console.log(`Changed resolution to ${width}x${height}`); 83 | render.volumeRC = new VolumeRaycaster( 84 | device, width, height, recordVisibleBlocksUI, enableSpeculationUI); 85 | await render.volumeRC.setCompressedVolume( 86 | compressedData, dataset.compressionRate, volumeDims, dataset.scale); 87 | recomputeSurface = true; 88 | render.renderPipelineBG = device.createBindGroup({ 89 | layout: renderBGLayout, 90 | entries: [ 91 | {binding: 0, resource: render.volumeRC.renderTarget.createView()}, 92 | {binding: 1, resource: {buffer: resolutionBuffer}}, 93 | {binding: 2, resource: sampler} 94 | ] 95 | }); 96 | }; 97 | await this.volumeRC.setCompressedVolume( 98 | compressedData, dataset.compressionRate, volumeDims, dataset.scale); 99 | 100 | var totalMemDisplay = document.getElementById("totalMemDisplay"); 101 | var mcMemDisplay = document.getElementById("mcMemDisplay"); 102 | var cacheMemDisplay = document.getElementById("cacheMemDisplay"); 103 | var fpsDisplay = document.getElementById("fps"); 104 | var camDisplay = document.getElementById("camDisplay"); 105 | 106 | var enableCache = document.getElementById("enableCache"); 107 | enableCache.checked = true; 108 | 109 | var isovalueSlider = document.getElementById("isovalue"); 110 | isovalueSlider.min = dataset.range[0]; 111 | isovalueSlider.max = dataset.range[1]; 112 | if (dataset.step !== undefined) { 113 | isovalueSlider.step = dataset.step; 114 | } else { 115 | isovalueSlider.step = (isovalueSlider.max - isovalueSlider.min) / 255.0; 116 | } 117 | isovalueSlider.value = (dataset.range[0] + dataset.range[1]) / 2.0; 118 | var currentIsovalue = isovalueSlider.value; 119 | 120 | var displayCacheInfo = function() { 121 | var percentActive = (this.volumeRC.numVisibleBlocks / this.volumeRC.totalBlocks) * 100; 122 | cacheInfo.innerHTML = `Cache Space: ${ 123 | this.volumeRC.lruCache.cacheSize 124 | } blocks 125 | (${( 126 | (this.volumeRC.lruCache.cacheSize / this.volumeRC.totalBlocks) * 127 | 100 128 | ).toFixed(2)} % 129 | of ${this.volumeRC.totalBlocks} total blocks)
130 | # Cache Slots Available ${ 131 | this.volumeRC.lruCache.displayNumSlotsAvailable}
132 | For this Pass:
133 | # Newly Decompressed: ${this.volumeRC.newDecompressed}
134 | # Visible Blocks: ${this.volumeRC.numVisibleBlocks} 135 | (${percentActive.toFixed(2)}%)
`; 136 | }; 137 | displayCacheInfo(); 138 | 139 | const defaultEye = vec3.set(vec3.create(), 0.0, 0.0, -1.5); 140 | const center = vec3.set(vec3.create(), 0.0, 0.0, 0.0); 141 | const up = vec3.set(vec3.create(), 0.0, 1.0, 0.0); 142 | /* 143 | // For matching benchmark configurations 144 | var benchmarkEye = { 145 | "eyePos": [-1.012491226196289, 0.7122936248779297, 0.8317527174949646], 146 | "eyeDir": [0.6625354886054993, -0.5211779475212097, -0.537977933883667], 147 | "upDir": [0.4094274640083313, 0.8534227609634399, -0.3225504457950592], 148 | }; 149 | const defaultEye = vec3.set(vec3.create(), 150 | benchmarkEye["eyePos"][0], 151 | benchmarkEye["eyePos"][1], 152 | benchmarkEye["eyePos"][2]); 153 | const center = vec3.add(vec3.create(), 154 | defaultEye, 155 | vec3.set(vec3.create(), 156 | benchmarkEye["eyeDir"][0], 157 | benchmarkEye["eyeDir"][1], 158 | benchmarkEye["eyeDir"][2])); 159 | const up = vec3.set(vec3.create(), 160 | benchmarkEye["upDir"][0], 161 | benchmarkEye["upDir"][1], 162 | benchmarkEye["upDir"][2]); 163 | */ 164 | 165 | var camera = new ArcballCamera(defaultEye, center, up, 4, [ 166 | canvas.width, 167 | canvas.height, 168 | ]); 169 | const nearPlane = 0.1; 170 | var proj = mat4.perspective( 171 | mat4.create(), (50 * Math.PI) / 180.0, canvas.width / canvas.height, nearPlane, 1000); 172 | var projView = mat4.create(); 173 | 174 | var numFrames = 0; 175 | var totalTimeMS = 0; 176 | var cameraChanged = true; 177 | 178 | var controller = new Controller(); 179 | controller.mousemove = function(prev, cur, evt) { 180 | if (evt.buttons == 1) { 181 | cameraChanged = true; 182 | camera.rotate(prev, cur); 183 | numFrames = 0; 184 | totalTimeMS = 0; 185 | } else if (evt.buttons == 2) { 186 | cameraChanged = true; 187 | camera.pan([cur[0] - prev[0], prev[1] - cur[1]]); 188 | numFrames = 0; 189 | totalTimeMS = 0; 190 | } 191 | }; 192 | controller.wheel = function(amt) { 193 | cameraChanged = true; 194 | camera.zoom(amt * 0.05); 195 | numFrames = 0; 196 | totalTimeMS = 0; 197 | }; 198 | controller.pinch = controller.wheel; 199 | controller.twoFingerDrag = function(drag) { 200 | cameraChanged = true; 201 | camera.pan(drag); 202 | numFrames = 0; 203 | totalTimeMS = 0; 204 | }; 205 | controller.registerForCanvas(canvas); 206 | 207 | var animationFrame = function() { 208 | var resolve = null; 209 | var promise = new Promise((r) => (resolve = r)); 210 | window.requestAnimationFrame(resolve); 211 | return promise; 212 | }; 213 | 214 | requestAnimationFrame(animationFrame); 215 | 216 | var upload = device.createBuffer({ 217 | // mat4, 2 vec4's and a float + some extra to align 218 | size: 32 * 4, 219 | usage: GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC, 220 | }); 221 | 222 | /* We need a render pass to blit the image that is computed by the volume 223 | * raycaster to the screen. This just draws a quad to the screen and loads 224 | * the corresponding texel from the render to show on the screen 225 | */ 226 | var swapChainFormat = "bgra8unorm"; 227 | context.configure( 228 | {device: device, format: swapChainFormat, usage: GPUTextureUsage.RENDER_ATTACHMENT}); 229 | 230 | var vertModule = device.createShaderModule({code: display_render_vert_spv}); 231 | var fragModule = device.createShaderModule({code: display_render_frag_spv}); 232 | 233 | var renderPipeline = device.createRenderPipeline({ 234 | layout: device.createPipelineLayout({bindGroupLayouts: [renderBGLayout]}), 235 | vertex: { 236 | module: vertModule, 237 | entryPoint: "main", 238 | }, 239 | fragment: 240 | {module: fragModule, entryPoint: "main", targets: [{format: swapChainFormat}]} 241 | }); 242 | 243 | this.renderPipelineBG = device.createBindGroup({ 244 | layout: renderBGLayout, 245 | entries: [ 246 | {binding: 0, resource: this.volumeRC.renderTarget.createView()}, 247 | {binding: 1, resource: {buffer: resolutionBuffer}}, 248 | {binding: 2, resource: sampler} 249 | ] 250 | }); 251 | 252 | var renderPassDesc = { 253 | colorAttachments: [{ 254 | view: undefined, 255 | loadOp: "clear", 256 | clearValue: [0.3, 0.3, 0.3, 1], 257 | storeOp: "store" 258 | }], 259 | }; 260 | 261 | var currentBenchmark = null; 262 | var cameraBenchmark = null; 263 | 264 | var perfStats = []; 265 | 266 | var recomputeSurface = true; 267 | var surfaceDone = false; 268 | var averageComputeTime = 0; 269 | while (true) { 270 | await animationFrame(); 271 | var start = performance.now(); 272 | 273 | if (requestBenchmark && !currentBenchmark) { 274 | perfStats = []; 275 | await this.volumeRC.lruCache.reset(); 276 | if (requestBenchmark == "random") { 277 | var valueBenchmark = 278 | new RandomIsovalueBenchmark(isovalueSlider, dataset.range); 279 | cameraBenchmark = new CameraOrbitBenchmark(1.5); 280 | currentBenchmark = new NestedBenchmark(valueBenchmark, cameraBenchmark); 281 | } else if (requestBenchmark == "sweepUp") { 282 | var valueBenchmark = 283 | new SweepIsovalueBenchmark(isovalueSlider, dataset.range, true); 284 | cameraBenchmark = new CameraOrbitBenchmark(1.5); 285 | currentBenchmark = new NestedBenchmark(valueBenchmark, cameraBenchmark); 286 | } else if (requestBenchmark == "sweepDown") { 287 | var valueBenchmark = 288 | new SweepIsovalueBenchmark(isovalueSlider, dataset.range, false); 289 | cameraBenchmark = new CameraOrbitBenchmark(1.5); 290 | currentBenchmark = new NestedBenchmark(valueBenchmark, cameraBenchmark); 291 | } else if (requestBenchmark == "manualSingle") { 292 | currentBenchmark = new ManualSingleBenchmark(); 293 | recomputeSurface = true; 294 | } else { 295 | cameraBenchmark = new CameraOrbitBenchmark(1.5); 296 | currentBenchmark = cameraBenchmark; 297 | } 298 | requestBenchmark = null; 299 | } 300 | 301 | if (currentBenchmark && surfaceDone) { 302 | if (!currentBenchmark.run()) { 303 | var blob = new Blob([JSON.stringify(perfStats)], {type: "text/plain"}); 304 | saveAs(blob, `perf-${dataset.name}-${currentBenchmark.name}.json`); 305 | 306 | currentBenchmark = null; 307 | } else if (currentBenchmark.name.includes("cameraOrbit")) { 308 | camera = new ArcballCamera(cameraBenchmark.currentPoint, center, up, 4, [ 309 | canvas.width, 310 | canvas.height, 311 | ]); 312 | cameraChanged = true; 313 | } 314 | } 315 | 316 | projView = mat4.mul(projView, proj, camera.camera); 317 | await upload.mapAsync(GPUMapMode.WRITE); 318 | var uploadArray = new Float32Array(upload.getMappedRange()); 319 | uploadArray.set(projView); 320 | uploadArray.set(camera.eyePos(), 16); 321 | uploadArray.set(camera.eyeDir(), 20); 322 | uploadArray.set([nearPlane], 24); 323 | upload.unmap(); 324 | 325 | if (cameraChanged) { 326 | cameraChanged = false; 327 | recomputeSurface = true; 328 | 329 | var eyePos = camera.eyePos(); 330 | var eyeDir = camera.eyeDir(); 331 | var upDir = camera.upDir(); 332 | camDisplay.innerHTML = `eye = ${eyePos[0].toFixed(4)}, ${eyePos[1].toFixed( 333 | 4 334 | )}, ${eyePos[2].toFixed(4)}
335 | dir = ${eyeDir[0].toFixed(4)}, ${eyeDir[1].toFixed( 336 | 4 337 | )}, ${eyeDir[2].toFixed(4)}
338 | up = ${upDir[0].toFixed(4)}, ${upDir[1].toFixed( 339 | 4 340 | )}, ${upDir[2].toFixed(4)}`; 341 | } 342 | 343 | if (!enableCache.checked) { 344 | await this.volumeRC.lruCache.reset(); 345 | } 346 | 347 | if (isovalueSlider.value != currentIsovalue) { 348 | recomputeSurface = true; 349 | currentIsovalue = parseFloat(isovalueSlider.value); 350 | } 351 | 352 | if (recomputeSurface || !surfaceDone) { 353 | var eyePos = camera.eyePos(); 354 | var eyeDir = camera.eyeDir(); 355 | var upDir = camera.upDir(); 356 | 357 | var start = performance.now(); 358 | surfaceDone = await this.volumeRC.renderSurface( 359 | currentIsovalue, 1, upload, recomputeSurface, eyePos, eyeDir, upDir); 360 | var end = performance.now(); 361 | 362 | if (surfaceDone) { 363 | perfStats.push( 364 | {"isovalue": currentIsovalue, "stats": this.volumeRC.surfacePerfStats}); 365 | } 366 | 367 | averageComputeTime = 368 | Math.round(this.volumeRC.totalPassTime / this.volumeRC.numPasses); 369 | recomputeSurface = false; 370 | 371 | displayCacheInfo(); 372 | var memUse = this.volumeRC.reportMemoryUse(); 373 | mcMemDisplay.innerHTML = memUse[0]; 374 | cacheMemDisplay.innerHTML = memUse[1]; 375 | totalMemDisplay.innerHTML = `Total Memory: ${memUse[2]}`; 376 | 377 | if (document.getElementById("outputImages").checked) { 378 | await takeScreenshot( 379 | device, 380 | `${dataset.name.substring(0, 5)}_pass_${this.volumeRC.numPasses}`, 381 | this.volumeRC.renderTarget, 382 | imageBuffer, 383 | document.getElementById('out-canvas')); 384 | } 385 | } 386 | if (saveScreenshot) { 387 | saveScreenshot = false; 388 | await takeScreenshot(device, 389 | `${dataset.name}_prog_iso`, 390 | this.volumeRC.renderTarget, 391 | imageBuffer, 392 | document.getElementById('out-canvas')); 393 | } 394 | 395 | // Blit the image rendered by the raycaster onto the screen 396 | var commandEncoder = device.createCommandEncoder(); 397 | 398 | renderPassDesc.colorAttachments[0].view = context.getCurrentTexture().createView(); 399 | var renderPass = commandEncoder.beginRenderPass(renderPassDesc); 400 | 401 | renderPass.setPipeline(renderPipeline); 402 | renderPass.setBindGroup(0, this.renderPipelineBG); 403 | // Draw a full screen quad 404 | renderPass.draw(6, 1, 0, 0); 405 | renderPass.end(); 406 | device.queue.submit([commandEncoder.finish()]); 407 | 408 | // Measure render time by waiting for the work done 409 | await device.queue.onSubmittedWorkDone(); 410 | var end = performance.now(); 411 | numFrames += 1; 412 | totalTimeMS += end - start; 413 | fpsDisplay.innerHTML = `Avg. FPS ${Math.round((1000.0 * numFrames) / totalTimeMS)}
414 | Avg. pass time: ${averageComputeTime}ms
415 | Pass # ${this.volumeRC.numPasses}
416 | Speculation Count: ${this.volumeRC.speculationCount}
417 | Total pipeline time: ${Math.round(this.volumeRC.totalPassTime)}ms`; 418 | } 419 | })(); 420 | --------------------------------------------------------------------------------