├── README.md ├── index.html ├── media └── demo.gif └── scripts ├── bvh ├── build-bvh.js └── kernels │ ├── aabb-z-idx.js │ ├── bvh-up-pass.js │ ├── radix-sort.js │ ├── radix-tree.js │ └── rearrange.js ├── main.js ├── obj ├── obj-file-parser.js └── parse-obj.js └── pathtracer └── pathtracer.js /README.md: -------------------------------------------------------------------------------- 1 | # WEBGPU LBVH demo 2 | 3 | This demo builds a linear BVH on the GPU using compute shaders based on the algorithm in [this paper](https://research.nvidia.com/sites/default/files/pubs/2012-06_Maximizing-Parallelism-in/karras2012hpg_paper.pdf). 4 | 5 | ![demo](media/demo.gif) 6 | 7 | You can try the demo [here](https://addisonprairie.github.io/WebGPU-LVBH-demo/) - just drag and drop an .obj model. It uses [this repository](https://github.com/WesUnwin/obj-file-parser) for parsing .obj files. -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 33 | 34 | 35 |
36 | 37 |
38 |
39 | WEBGPU BVH DEMO
40 |

Uses WebGPU compute shaders to build a BVH and renders the result with path tracing. Drag and drop an .obj file to begin.

41 | Triangles : { 2,000,000 }

42 | Parse File: { 0.000001s }
43 | Build BVH : { 0.000001s }

44 |
[ Rotate View ]
45 |
46 |
47 | 48 | -------------------------------------------------------------------------------- /media/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AddisonPrairie/WebGPU-LVBH-demo/afd0b8f946f56827b41a86c34f6222e137777027/media/demo.gif -------------------------------------------------------------------------------- /scripts/bvh/build-bvh.js: -------------------------------------------------------------------------------- 1 | function initBVHBuild(device) { 2 | 3 | // initialize all other shaders 4 | const radixSortKernel = initRadixSortKernel(device) 5 | const radixTreeKernel = initRadixTreeKernel(device) 6 | const aabb_ZidxKernel = initAABB_ZidxKernel(device) 7 | const bvhUpPassKernel = initBVHUpPassKernel(device) 8 | const rearrangeKernel = initRearrangeKernel(device) 9 | 10 | return { build } 11 | 12 | async function build(TRI_ARRAY, NUM_TRIS, MODEL_BOUNDS) { 13 | // create GPU triangle buffer and copy values to it 14 | const I_TRIANGE_BUFFER = device.createBuffer({ 15 | size: NUM_TRIS * 48, 16 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST, 17 | mappedAtCreation: true 18 | }) 19 | 20 | new Float32Array(I_TRIANGE_BUFFER.getMappedRange()).set(TRI_ARRAY) 21 | I_TRIANGE_BUFFER.unmap() 22 | 23 | // compute AABB and morton code for each triangle 24 | const { AABB_BUFFER, Z_IDX_BUFFER } = await aabb_ZidxKernel.execute( 25 | I_TRIANGE_BUFFER, 26 | NUM_TRIS, 27 | MODEL_BOUNDS 28 | ) 29 | 30 | // sort the morton code buffer and store how indices change 31 | const { IDX_BUFFER } = await radixSortKernel.execute( 32 | Z_IDX_BUFFER, 33 | NUM_TRIS 34 | ) 35 | 36 | // compute the radix tree over the morton codes 37 | const { PARENT_BUFFER } = await radixTreeKernel.execute( 38 | Z_IDX_BUFFER, 39 | NUM_TRIS 40 | ) 41 | 42 | // combine all information from previous passes into BVH 43 | const { BVH_BUFFER } = await bvhUpPassKernel.execute( 44 | IDX_BUFFER, 45 | AABB_BUFFER, 46 | PARENT_BUFFER, 47 | NUM_TRIS 48 | ) 49 | 50 | 51 | // rearrange the triangles 52 | const { O_TRIANGLE_BUFFER } = await rearrangeKernel.execute( 53 | I_TRIANGE_BUFFER, 54 | IDX_BUFFER, 55 | NUM_TRIS 56 | ) 57 | 58 | // free all buffers that are not input/output 59 | AABB_BUFFER.destroy() 60 | Z_IDX_BUFFER.destroy() 61 | PARENT_BUFFER.destroy() 62 | IDX_BUFFER.destroy() 63 | I_TRIANGE_BUFFER.destroy() 64 | 65 | return { BVH_BUFFER, O_TRIANGLE_BUFFER } 66 | } 67 | } -------------------------------------------------------------------------------- /scripts/bvh/kernels/aabb-z-idx.js: -------------------------------------------------------------------------------- 1 | 2 | function initAABB_ZidxKernel(device) { 3 | // shader parameters 4 | const WG_SIZE = 64 5 | 6 | // create bind group layout, shader module and pipeline 7 | const BG_LAYOUT = device.createBindGroupLayout({ 8 | entries: [ 9 | { 10 | binding: 0, 11 | visibility: GPUShaderStage.COMPUTE, 12 | buffer: { 13 | type: "storage" 14 | } 15 | }, 16 | { 17 | binding: 1, 18 | visibility: GPUShaderStage.COMPUTE, 19 | buffer: { 20 | type: "storage" 21 | } 22 | }, 23 | { 24 | binding: 2, 25 | visibility: GPUShaderStage.COMPUTE, 26 | buffer: { 27 | type: "storage" 28 | } 29 | }, 30 | { 31 | binding: 3, 32 | visibility: GPUShaderStage.COMPUTE, 33 | buffer: { 34 | type: "uniform" 35 | } 36 | } 37 | ] 38 | }) 39 | 40 | const SM = device.createShaderModule({ 41 | code: SRC(), 42 | label: "AABB/Z-index shader module" 43 | }) 44 | 45 | const PIPELINE = device.createComputePipeline({ 46 | layout: device.createPipelineLayout({ 47 | bindGroupLayouts: [BG_LAYOUT] 48 | }), 49 | compute: { 50 | module: SM, 51 | entryPoint: "compute_aabb_z_idx" 52 | } 53 | }) 54 | 55 | return { execute } 56 | 57 | async function execute(TRIANGLE_BUFFER, size, bounds) { 58 | if (TRIANGLE_BUFFER.size != 48 * size) { 59 | console.warn(`in AABB/Z-index: buffer size [ ${TRIANGLE_BUFFER.size} ] does not match requested size [ ${size} ]`) 60 | return 61 | } 62 | 63 | // create all the necessary buffers 64 | const AABB_BUFFER = device.createBuffer({ 65 | size: size * 32, 66 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, 67 | }) 68 | const Z_IDX_BUFFER = device.createBuffer({ 69 | size: size * 4, 70 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC 71 | }) 72 | const UNIFORM_BUFFER = device.createBuffer({ 73 | size: 32, 74 | usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST 75 | }) 76 | 77 | // create the bind group 78 | const BG = device.createBindGroup({ 79 | layout: BG_LAYOUT, 80 | entries: [ 81 | { 82 | binding: 0, 83 | visibility: GPUShaderStage.COMPUTE, 84 | resource: { 85 | buffer: TRIANGLE_BUFFER 86 | } 87 | }, 88 | { 89 | binding: 1, 90 | visibility: GPUShaderStage.COMPUTE, 91 | resource: { 92 | buffer: AABB_BUFFER 93 | } 94 | }, 95 | { 96 | binding: 2, 97 | visibility: GPUShaderStage.COMPUTE, 98 | resource: { 99 | buffer: Z_IDX_BUFFER 100 | } 101 | }, 102 | { 103 | binding: 3, 104 | visibility: GPUShaderStage.COMPUTE, 105 | resource: { 106 | buffer: UNIFORM_BUFFER 107 | } 108 | } 109 | ] 110 | }) 111 | 112 | {// send work to GPU 113 | const BUFF = new ArrayBuffer(32) 114 | const DV = new DataView(BUFF) 115 | 116 | DV.setFloat32( 0, bounds.min[0], true) 117 | DV.setFloat32( 4, bounds.min[1], true) 118 | DV.setFloat32( 8, bounds.min[2], true) 119 | 120 | DV.setFloat32(16, bounds.max[0], true) 121 | DV.setFloat32(20, bounds.max[1], true) 122 | DV.setFloat32(24, bounds.max[2], true) 123 | 124 | DV.setInt32(12, size, true) 125 | 126 | device.queue.writeBuffer( 127 | UNIFORM_BUFFER, 128 | 0, 129 | BUFF, 130 | 0, 131 | 32 132 | ) 133 | 134 | const CE = device.createCommandEncoder() 135 | const P = CE.beginComputePass() 136 | 137 | P.setPipeline(PIPELINE) 138 | P.setBindGroup(0, BG) 139 | P.dispatchWorkgroups(Math.ceil(size / WG_SIZE)) 140 | P.end() 141 | 142 | device.queue.submit([CE.finish()]) 143 | } 144 | 145 | await device.queue.onSubmittedWorkDone() 146 | 147 | return { AABB_BUFFER, Z_IDX_BUFFER } 148 | } 149 | 150 | function SRC() { 151 | return /* wgsl */ ` 152 | 153 | struct Triangle { 154 | v0 : vec3f, 155 | v1 : vec3f, 156 | v2 : vec3f 157 | }; 158 | 159 | struct AABB { 160 | min : vec3f, 161 | max : vec3f 162 | }; 163 | 164 | struct Uniforms { 165 | min : vec3f, 166 | num : i32, 167 | max : vec3f, 168 | f_1 : i32 169 | }; 170 | 171 | @group(0) @binding(0) var triangles : array; 172 | @group(0) @binding(1) var aabbs : array; 173 | @group(0) @binding(2) var z_indexes : array; 174 | @group(0) @binding(3) var uniforms : Uniforms; 175 | 176 | @compute @workgroup_size(${WG_SIZE}) 177 | fn compute_aabb_z_idx(@builtin(global_invocation_id) global_id : vec3u) { 178 | var idx : i32 = i32(global_id.x); 179 | if (idx >= uniforms.num) { 180 | return; 181 | } 182 | 183 | var tri : Triangle = triangles[idx]; 184 | 185 | var box : AABB; 186 | box.min = min(tri.v0, min(tri.v1, tri.v2)); 187 | box.max = max(tri.v0, max(tri.v1, tri.v2)); 188 | 189 | aabbs[idx] = box; 190 | 191 | var cen : vec3f = (box.max + box.min) * .5f; 192 | var rel : vec3f = (cen - uniforms.min) / (uniforms.max - uniforms.min); 193 | 194 | z_indexes[idx] = morton_code(vec3u(rel * 1023.99f)); 195 | } 196 | 197 | fn morton_code(upos : vec3u) -> u32 { 198 | return split_3(upos.x) | (split_3(upos.y) << 1) | (split_3(upos.z) << 2); 199 | } 200 | 201 | // from: https://stackoverflow.com/questions/1024754/how-to-compute-a-3d-morton-number-interleave-the-bits-of-3-ints 202 | fn split_3(u : u32) -> u32 { 203 | var x : u32 = u; 204 | x = (x | (x << 16)) & 0x030000FFu; 205 | x = (x | (x << 8)) & 0x0300F00Fu; 206 | x = (x | (x << 4)) & 0x030C30C3u; 207 | x = (x | (x << 2)) & 0x09249249u; 208 | return x; 209 | }` 210 | } 211 | } -------------------------------------------------------------------------------- /scripts/bvh/kernels/bvh-up-pass.js: -------------------------------------------------------------------------------- 1 | 2 | function initBVHUpPassKernel(device) { 3 | // shader parameters 4 | const WG_SIZE = 64 5 | 6 | // create bind group layouts, shader module and pipeline 7 | const BG_LAYOUTS = [ 8 | device.createBindGroupLayout({ 9 | entries: [ 10 | { 11 | binding: 0, 12 | visibility: GPUShaderStage.COMPUTE, 13 | buffer: { 14 | type: "storage" 15 | } 16 | }, 17 | { 18 | binding: 1, 19 | visibility: GPUShaderStage.COMPUTE, 20 | buffer: { 21 | type: "storage" 22 | } 23 | }, 24 | { 25 | binding: 2, 26 | visibility: GPUShaderStage.COMPUTE, 27 | buffer: { 28 | type: "storage" 29 | } 30 | } 31 | ] 32 | }), 33 | device.createBindGroupLayout({ 34 | entries: [ 35 | { 36 | binding: 0, 37 | visibility: GPUShaderStage.COMPUTE, 38 | buffer: { 39 | type: "storage" 40 | } 41 | }, 42 | { 43 | binding: 1, 44 | visibility: GPUShaderStage.COMPUTE, 45 | buffer: { 46 | type: "uniform" 47 | } 48 | }, 49 | ] 50 | }) 51 | ] 52 | 53 | const SM = device.createShaderModule({ 54 | code: SRC(), 55 | label: "radix tree shader module" 56 | }) 57 | 58 | const PIPELINE = device.createComputePipeline({ 59 | layout: device.createPipelineLayout({ 60 | bindGroupLayouts: BG_LAYOUTS 61 | }), 62 | compute: { 63 | module: SM, 64 | entryPoint: "bvh_upward_pass" 65 | } 66 | }) 67 | 68 | return { execute } 69 | 70 | async function execute(IDX_BUFFER, AABB_BUFFER, PARENT_BUFFER, size) { 71 | // create all the necessary buffers 72 | const BVH_BUFFER = device.createBuffer({ 73 | size: size * 64, 74 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC 75 | }) 76 | const UNIFORM_BUFFER = device.createBuffer({ 77 | size: 16, 78 | usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST 79 | }) 80 | 81 | const BGS = [ 82 | device.createBindGroup({ 83 | layout: BG_LAYOUTS[0], 84 | entries: [ 85 | { 86 | binding: 0, 87 | visibility: GPUShaderStage.COMPUTE, 88 | resource: { 89 | buffer: IDX_BUFFER 90 | } 91 | }, 92 | { 93 | binding: 1, 94 | visibility: GPUShaderStage.COMPUTE, 95 | resource: { 96 | buffer: AABB_BUFFER 97 | } 98 | }, 99 | { 100 | binding: 2, 101 | visibility: GPUShaderStage.COMPUTE, 102 | resource: { 103 | buffer: PARENT_BUFFER 104 | } 105 | } 106 | ] 107 | }), 108 | device.createBindGroup({ 109 | layout: BG_LAYOUTS[1], 110 | entries: [ 111 | { 112 | binding: 0, 113 | visibility: GPUShaderStage.COMPUTE, 114 | resource: { 115 | buffer: BVH_BUFFER 116 | } 117 | }, 118 | { 119 | binding: 1, 120 | visibility: GPUShaderStage.COMPUTE, 121 | resource: { 122 | buffer: UNIFORM_BUFFER 123 | } 124 | } 125 | ] 126 | }) 127 | ] 128 | 129 | {// send work to GPU 130 | device.queue.writeBuffer( 131 | UNIFORM_BUFFER, 132 | 0, 133 | new Int32Array([ 134 | size, 135 | 0, 136 | 0, 137 | 0 138 | ]) 139 | ) 140 | 141 | const CE = device.createCommandEncoder() 142 | const P = CE.beginComputePass() 143 | 144 | P.setPipeline(PIPELINE) 145 | P.setBindGroup(0, BGS[0]) 146 | P.setBindGroup(1, BGS[1]) 147 | P.dispatchWorkgroups(Math.ceil(size / WG_SIZE)) 148 | P.end() 149 | 150 | device.queue.submit([CE.finish()]) 151 | } 152 | 153 | await device.queue.onSubmittedWorkDone() 154 | 155 | return { BVH_BUFFER } 156 | } 157 | 158 | function SRC() { 159 | return /* wgsl */ ` 160 | 161 | struct BVHNode { 162 | aabb_l_min_x : atomic, 163 | aabb_l_min_y : atomic, 164 | aabb_l_min_z : atomic, 165 | l_child : atomic, 166 | aabb_l_max_x : atomic, 167 | aabb_l_max_y : atomic, 168 | aabb_l_max_z : atomic, 169 | f_1 : atomic, // Used for synchronization 170 | aabb_r_min_x : atomic, 171 | aabb_r_min_y : atomic, 172 | aabb_r_min_z : atomic, 173 | r_child : atomic, 174 | aabb_r_max_x : atomic, 175 | aabb_r_max_y : atomic, 176 | aabb_r_max_z : atomic, 177 | f_2 : atomic 178 | }; 179 | 180 | struct AABB { 181 | min : vec3f, 182 | max : vec3f 183 | }; 184 | 185 | struct Uniforms { 186 | num : i32, 187 | f_1 : i32, 188 | f_2 : i32, 189 | f_3 : i32 190 | }; 191 | 192 | @group(0) @binding(0) var idx_arr : array; 193 | @group(0) @binding(1) var aabb_arr : array; 194 | @group(0) @binding(2) var par_arr : array; 195 | 196 | @group(1) @binding(0) var bvh : array; 197 | @group(1) @binding(1) var uniforms : Uniforms; 198 | 199 | @compute @workgroup_size(${WG_SIZE}) 200 | fn bvh_upward_pass(@builtin(global_invocation_id) global_id : vec3u) { 201 | var idx : i32 = i32(global_id.x); 202 | if (idx >= uniforms.num) { 203 | return; 204 | } 205 | 206 | var bbox : AABB = aabb_arr[idx_arr[idx]]; 207 | 208 | // slightly perturb the bounding box position for check on line ~266 209 | bbox.min -= vec3f(bbox.min == vec3f(0.)) * vec3f(1e-8f); 210 | bbox.max += vec3f(bbox.max == vec3f(0.)) * vec3f(1e-8f); 211 | 212 | var c_idx : i32 = idx; 213 | var w_idx : i32 = -(idx + 1); 214 | var level : i32 = 0; 215 | 216 | var bSkipped : bool = false; 217 | 218 | while ((w_idx != 0 || level == 0) && !bSkipped) { 219 | var p_idx : i32; 220 | if (level == 0) { 221 | p_idx = par_arr[c_idx + uniforms.num]; 222 | } else { 223 | p_idx = par_arr[c_idx]; 224 | } 225 | 226 | if (!bSkipped) { 227 | var sibling : i32; 228 | 229 | if (!bSkipped) { 230 | sibling = atomicAdd(&bvh[p_idx].f_1, 1); 231 | } 232 | 233 | if (sibling == 0 && !bSkipped) { 234 | atomicStore(&bvh[p_idx].aabb_l_min_x, bitcast(bbox.min.x)); 235 | atomicStore(&bvh[p_idx].aabb_l_min_y, bitcast(bbox.min.y)); 236 | atomicStore(&bvh[p_idx].aabb_l_min_z, bitcast(bbox.min.z)); 237 | atomicStore(&bvh[p_idx].aabb_l_max_x, bitcast(bbox.max.x)); 238 | atomicStore(&bvh[p_idx].aabb_l_max_y, bitcast(bbox.max.y)); 239 | atomicStore(&bvh[p_idx].aabb_l_max_z, bitcast(bbox.max.z)); 240 | atomicStore(&bvh[p_idx].l_child, w_idx); 241 | 242 | bSkipped = true; 243 | } 244 | 245 | if (sibling != 0 && !bSkipped) { 246 | atomicStore(&bvh[p_idx].aabb_r_min_x, bitcast(bbox.min.x)); 247 | atomicStore(&bvh[p_idx].aabb_r_min_y, bitcast(bbox.min.y)); 248 | atomicStore(&bvh[p_idx].aabb_r_min_z, bitcast(bbox.min.z)); 249 | atomicStore(&bvh[p_idx].aabb_r_max_x, bitcast(bbox.max.x)); 250 | atomicStore(&bvh[p_idx].aabb_r_max_y, bitcast(bbox.max.y)); 251 | atomicStore(&bvh[p_idx].aabb_r_max_z, bitcast(bbox.max.z)); 252 | atomicStore(&bvh[p_idx].r_child, w_idx); 253 | 254 | var l_min : vec3f = vec3f( 255 | bitcast(atomicLoad(&bvh[p_idx].aabb_l_min_x)), 256 | bitcast(atomicLoad(&bvh[p_idx].aabb_l_min_y)), 257 | bitcast(atomicLoad(&bvh[p_idx].aabb_l_min_z)) 258 | ); 259 | var l_max : vec3f = vec3f( 260 | bitcast(atomicLoad(&bvh[p_idx].aabb_l_max_x)), 261 | bitcast(atomicLoad(&bvh[p_idx].aabb_l_max_y)), 262 | bitcast(atomicLoad(&bvh[p_idx].aabb_l_max_z)) 263 | ); 264 | 265 | // don't do anything if the other is not loaded yet 266 | if (any(l_min == vec3f(0.)) || any(l_max == vec3f(0.))) { 267 | continue; 268 | } 269 | 270 | bbox.min = min(bbox.min, l_min); 271 | bbox.max = max(bbox.max, l_max); 272 | 273 | // Move to parent 274 | c_idx = p_idx; 275 | w_idx = p_idx; 276 | level += 1; 277 | } 278 | } 279 | } 280 | }` 281 | } 282 | } -------------------------------------------------------------------------------- /scripts/bvh/kernels/radix-sort.js: -------------------------------------------------------------------------------- 1 | 2 | function initRadixSortKernel(device) { 3 | // create bind group layouts 4 | const SCAN_UP_BG_LAYOUTS = [ 5 | device.createBindGroupLayout({ 6 | entries: [ 7 | { 8 | binding: 0, 9 | visibility: GPUShaderStage.COMPUTE, 10 | buffer: { 11 | type: "storage" 12 | } 13 | }, 14 | { 15 | binding: 1, 16 | visibility: GPUShaderStage.COMPUTE, 17 | buffer: { 18 | type: "storage" 19 | } 20 | } 21 | ] 22 | }), 23 | device.createBindGroupLayout({ 24 | entries: [ 25 | { 26 | binding: 0, 27 | visibility: GPUShaderStage.COMPUTE, 28 | buffer: { 29 | type: "uniform" 30 | } 31 | } 32 | ] 33 | }) 34 | ] 35 | 36 | const INPUT_L_BG_LAYOUTS = [ 37 | device.createBindGroupLayout({ 38 | entries: [ 39 | { 40 | binding: 0, 41 | visibility: GPUShaderStage.COMPUTE, 42 | buffer: { 43 | type: "storage" 44 | } 45 | }, 46 | { 47 | binding: 1, 48 | visibility: GPUShaderStage.COMPUTE, 49 | buffer: { 50 | type: "storage" 51 | } 52 | }, 53 | { 54 | binding: 2, 55 | visibility: GPUShaderStage.COMPUTE, 56 | buffer: { 57 | type: "storage" 58 | } 59 | }, 60 | { 61 | binding: 3, 62 | visibility: GPUShaderStage.COMPUTE, 63 | buffer: { 64 | type: "storage" 65 | } 66 | } 67 | ] 68 | }), 69 | device.createBindGroupLayout({ 70 | entries: [ 71 | { 72 | binding: 0, 73 | visibility: GPUShaderStage.COMPUTE, 74 | buffer: { 75 | type: "storage" 76 | } 77 | }, 78 | { 79 | binding: 1, 80 | visibility: GPUShaderStage.COMPUTE, 81 | buffer: { 82 | type: "storage" 83 | } 84 | }, 85 | { 86 | binding: 2, 87 | visibility: GPUShaderStage.COMPUTE, 88 | buffer: { 89 | type: "storage" 90 | } 91 | } 92 | ] 93 | }), 94 | device.createBindGroupLayout({ 95 | entries: [ 96 | { 97 | binding: 0, 98 | visibility: GPUShaderStage.COMPUTE, 99 | buffer: { 100 | type: "uniform" 101 | } 102 | } 103 | ] 104 | }) 105 | ] 106 | 107 | // compile shaders 108 | const SCAN_UP_SM = device.createShaderModule({ 109 | code: SCAN_UP_SRC(), 110 | label: "scan up shader module" 111 | }) 112 | 113 | const INPUT_L_SM = device.createShaderModule({ 114 | code: INPUT_L_SRC(), 115 | label: "input level shader module" 116 | }) 117 | 118 | // create pipelines 119 | const INIT_IDX_PIPELINE = device.createComputePipeline({ 120 | layout: device.createPipelineLayout({ 121 | bindGroupLayouts: INPUT_L_BG_LAYOUTS 122 | }), 123 | compute: { 124 | module: INPUT_L_SM, 125 | entryPoint: "init_idx" 126 | } 127 | }) 128 | 129 | const INIT_OFF_PIPELINE = device.createComputePipeline({ 130 | layout: device.createPipelineLayout({ 131 | bindGroupLayouts: INPUT_L_BG_LAYOUTS 132 | }), 133 | compute: { 134 | module: INPUT_L_SM, 135 | entryPoint: "init_off" 136 | } 137 | }) 138 | 139 | const L_SCAN_PIPELINE = device.createComputePipeline({ 140 | layout: device.createPipelineLayout({ 141 | bindGroupLayouts: INPUT_L_BG_LAYOUTS 142 | }), 143 | compute: { 144 | module: INPUT_L_SM, 145 | entryPoint: "scan_and_sort" 146 | } 147 | }) 148 | 149 | const SCAN_UP_PIPELINE = device.createComputePipeline({ 150 | layout: device.createPipelineLayout({ 151 | bindGroupLayouts: SCAN_UP_BG_LAYOUTS 152 | }), 153 | compute: { 154 | module: SCAN_UP_SM, 155 | entryPoint: "scan_up" 156 | } 157 | }) 158 | 159 | return { execute } 160 | 161 | // takes as input a buffer of u32's returns a buffer with keys rearranged - is destructive to the buffer! 162 | async function execute(valBuffer, size) { 163 | if (valBuffer.size != size * 4) { 164 | console.warning(`in radix sort: buffer size [ ${valBuffer.size} ] does not match requested size [ ${size} ]`) 165 | return 166 | } 167 | 168 | // create all necessary buffers 169 | 170 | const valBuffers = [ 171 | valBuffer, 172 | device.createBuffer({ 173 | size: valBuffer.size, 174 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC 175 | }) 176 | ] 177 | 178 | const idxBuffers = [ 179 | device.createBuffer({ 180 | size: valBuffer.size, 181 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC 182 | }), 183 | device.createBuffer({ 184 | size: valBuffer.size, 185 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC 186 | }) 187 | ] 188 | 189 | const l1OffsetsBuffer = device.createBuffer({ 190 | size: 256 * 256 * 16, 191 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC 192 | }) 193 | 194 | const l2OffsetsBuffer = device.createBuffer({ 195 | size: 256 * 16, 196 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC 197 | }) 198 | 199 | const l3OffsetsBuffer = device.createBuffer({ 200 | size: 16, 201 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC 202 | }) 203 | 204 | const uniformBuffer = device.createBuffer({ 205 | size: 16, 206 | usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST 207 | }) 208 | 209 | // create necessary bind groups 210 | 211 | const SCAN_UP_BGS = [ 212 | device.createBindGroup({ 213 | layout: SCAN_UP_BG_LAYOUTS[0], 214 | entries: [ 215 | { 216 | binding: 0, 217 | visibility: GPUShaderStage.COMPUTE, 218 | resource: { 219 | buffer: l1OffsetsBuffer 220 | } 221 | }, 222 | { 223 | binding: 1, 224 | visibility: GPUShaderStage.COMPUTE, 225 | resource: { 226 | buffer: l2OffsetsBuffer 227 | } 228 | } 229 | ] 230 | }), 231 | device.createBindGroup({ 232 | layout: SCAN_UP_BG_LAYOUTS[0], 233 | entries: [ 234 | { 235 | binding: 0, 236 | visibility: GPUShaderStage.COMPUTE, 237 | resource: { 238 | buffer: l2OffsetsBuffer 239 | } 240 | }, 241 | { 242 | binding: 1, 243 | visibility: GPUShaderStage.COMPUTE, 244 | resource: { 245 | buffer: l3OffsetsBuffer 246 | } 247 | } 248 | ] 249 | }), 250 | device.createBindGroup({ 251 | layout: SCAN_UP_BG_LAYOUTS[1], 252 | entries: [ 253 | { 254 | binding: 0, 255 | visibility: GPUShaderStage.COMPUTE, 256 | resource: { 257 | buffer: uniformBuffer 258 | } 259 | } 260 | ] 261 | }) 262 | ] 263 | 264 | const INPUT_L_BGS = [ 265 | device.createBindGroup({ 266 | layout: INPUT_L_BG_LAYOUTS[0], 267 | entries: [ 268 | { 269 | binding: 0, 270 | visibility: GPUShaderStage.COMPUTE, 271 | resource: { 272 | buffer: idxBuffers[0] 273 | } 274 | }, 275 | { 276 | binding: 1, 277 | visibility: GPUShaderStage.COMPUTE, 278 | resource: { 279 | buffer: valBuffers[0] 280 | } 281 | }, 282 | { 283 | binding: 2, 284 | visibility: GPUShaderStage.COMPUTE, 285 | resource: { 286 | buffer: idxBuffers[1] 287 | } 288 | }, 289 | { 290 | binding: 3, 291 | visibility: GPUShaderStage.COMPUTE, 292 | resource: { 293 | buffer: valBuffers[1] 294 | } 295 | }, 296 | ] 297 | }), 298 | device.createBindGroup({ 299 | layout: INPUT_L_BG_LAYOUTS[0], 300 | entries: [ 301 | { 302 | binding: 0, 303 | visibility: GPUShaderStage.COMPUTE, 304 | resource: { 305 | buffer: idxBuffers[1] 306 | } 307 | }, 308 | { 309 | binding: 1, 310 | visibility: GPUShaderStage.COMPUTE, 311 | resource: { 312 | buffer: valBuffers[1] 313 | } 314 | }, 315 | { 316 | binding: 2, 317 | visibility: GPUShaderStage.COMPUTE, 318 | resource: { 319 | buffer: idxBuffers[0] 320 | } 321 | }, 322 | { 323 | binding: 3, 324 | visibility: GPUShaderStage.COMPUTE, 325 | resource: { 326 | buffer: valBuffers[0] 327 | } 328 | }, 329 | ] 330 | }), 331 | device.createBindGroup({ 332 | layout: INPUT_L_BG_LAYOUTS[1], 333 | entries: [ 334 | { 335 | binding: 0, 336 | visibility: GPUShaderStage.COMPUTE, 337 | resource: { 338 | buffer: l1OffsetsBuffer 339 | } 340 | }, 341 | { 342 | binding: 1, 343 | visibility: GPUShaderStage.COMPUTE, 344 | resource: { 345 | buffer: l2OffsetsBuffer 346 | } 347 | }, 348 | { 349 | binding: 2, 350 | visibility: GPUShaderStage.COMPUTE, 351 | resource: { 352 | buffer: l3OffsetsBuffer 353 | } 354 | } 355 | ] 356 | }), 357 | ] 358 | 359 | // initialize the index array 360 | { 361 | device.queue.writeBuffer( 362 | uniformBuffer, 363 | 0, 364 | new Uint32Array([ 365 | size, 366 | 0, 367 | 0, 368 | 0 369 | ]) 370 | ) 371 | 372 | const CE = device.createCommandEncoder() 373 | const P = CE.beginComputePass() 374 | 375 | P.setPipeline(INIT_IDX_PIPELINE) 376 | P.setBindGroup(0, INPUT_L_BGS[0]) 377 | P.setBindGroup(1, INPUT_L_BGS[2]) 378 | P.setBindGroup(2, SCAN_UP_BGS[2]) 379 | P.dispatchWorkgroups(Math.ceil(size / 256)) 380 | P.end() 381 | 382 | device.queue.submit([CE.finish()]) 383 | } 384 | 385 | // sort the given array based on the 2k, 2k + 1-th bits 386 | async function sortKthBits(k) { 387 | {// first pass - update the offsets from the first layer 388 | const CE = device.createCommandEncoder() 389 | 390 | device.queue.writeBuffer( 391 | uniformBuffer, 392 | 0, 393 | new Uint32Array([ 394 | size, 395 | k, 396 | 0, 397 | 0 398 | ]) 399 | ) 400 | 401 | const P = CE.beginComputePass() 402 | P.setPipeline(INIT_OFF_PIPELINE) 403 | P.setBindGroup(0, INPUT_L_BGS[k % 2]) 404 | P.setBindGroup(1, INPUT_L_BGS[2]) 405 | P.setBindGroup(2, SCAN_UP_BGS[2]) 406 | P.dispatchWorkgroups(Math.ceil(size / 256)) 407 | P.end() 408 | 409 | device.queue.submit([CE.finish()]) 410 | } 411 | {// second pass - scan the level 1 offsets 412 | const CE = device.createCommandEncoder() 413 | 414 | device.queue.writeBuffer( 415 | uniformBuffer, 416 | 0, 417 | new Uint32Array([ 418 | Math.ceil(size / 256), 419 | k, 420 | 1, 421 | 0 422 | ]) 423 | ) 424 | 425 | const P = CE.beginComputePass() 426 | P.setPipeline(SCAN_UP_PIPELINE) 427 | P.setBindGroup(0, SCAN_UP_BGS[0]) 428 | P.setBindGroup(1, SCAN_UP_BGS[2]) 429 | P.dispatchWorkgroups(Math.ceil(size / (256 * 256))) 430 | P.end() 431 | 432 | device.queue.submit([CE.finish()]) 433 | } 434 | {// third pass - scan the level 2 offsets 435 | const CE = device.createCommandEncoder() 436 | 437 | device.queue.writeBuffer( 438 | uniformBuffer, 439 | 0, 440 | new Uint32Array([ 441 | Math.ceil(size / (256 * 256)), 442 | k, 443 | 2, 444 | 0 445 | ]) 446 | ) 447 | 448 | const P = CE.beginComputePass() 449 | P.setPipeline(SCAN_UP_PIPELINE) 450 | P.setBindGroup(0, SCAN_UP_BGS[1]) 451 | P.setBindGroup(1, SCAN_UP_BGS[2]) 452 | P.dispatchWorkgroups(1) 453 | P.end() 454 | 455 | device.queue.submit([CE.finish()]) 456 | } 457 | {// final pass - scan and write at the first level 458 | const CE = device.createCommandEncoder() 459 | 460 | device.queue.writeBuffer( 461 | uniformBuffer, 462 | 0, 463 | new Uint32Array([ 464 | size, 465 | k, 466 | 0, 467 | 0 468 | ]) 469 | ) 470 | 471 | const P = CE.beginComputePass() 472 | P.setPipeline(L_SCAN_PIPELINE) 473 | P.setBindGroup(0, INPUT_L_BGS[k % 2]) 474 | P.setBindGroup(1, INPUT_L_BGS[2]) 475 | P.setBindGroup(2, SCAN_UP_BGS[2]) 476 | P.dispatchWorkgroups(Math.ceil(size / 256)) 477 | P.end() 478 | 479 | device.queue.submit([CE.finish()]) 480 | } 481 | 482 | await device.queue.onSubmittedWorkDone() 483 | } 484 | 485 | // run the 2-bit radix sort 16 times 486 | for (var k = 0; k < 16; k++) { 487 | await sortKthBits(k); 488 | } 489 | 490 | // destroy remaining, unused buffers 491 | uniformBuffer.destroy() 492 | valBuffers[1].destroy() 493 | idxBuffers[1].destroy() 494 | l1OffsetsBuffer.destroy() 495 | l2OffsetsBuffer.destroy() 496 | l3OffsetsBuffer.destroy() 497 | 498 | // return the two key buffers 499 | return { IDX_BUFFER : idxBuffers[0] } 500 | } 501 | 502 | function INPUT_L_SRC() { 503 | return /* wgsl */ ` 504 | // bindgroup specific to interactions with the actual input 505 | @group(0) @binding(0) var idxs : array; 506 | @group(0) @binding(1) var vals : array; 507 | @group(0) @binding(2) var n_idxs : array; 508 | @group(0) @binding(3) var n_vals : array; 509 | 510 | // bindgroup with counts from intermediate steps 511 | @group(1) @binding(0) var l1_offsets : array; 512 | @group(1) @binding(1) var l2_offsets : array; 513 | @group(1) @binding(2) var l3_offsets : array; 514 | 515 | struct Uniforms { 516 | num : u32, 517 | win : u32, 518 | lvl : u32, 519 | xtr : u32 520 | }; 521 | 522 | // bindgroup which stores the uniforms 523 | @group(2) @binding(0) var uniforms : Uniforms; 524 | 525 | // set idx in the buffer to just count 0, 1, 2, ... 526 | @compute @workgroup_size(64) 527 | fn init_idx(@builtin(global_invocation_id) global_id : vec3u) { 528 | for (var i : u32 = 0u; i < 4; i++) { 529 | var idx : u32 = 4u * global_id.x + i; 530 | if (idx < uniforms.num) { 531 | idxs[idx] = i32(idx); 532 | } 533 | } 534 | } 535 | 536 | var wg_count : array, 4>; 537 | 538 | // get the number of each element within each group 539 | @compute @workgroup_size(64) 540 | fn init_off( 541 | @builtin(global_invocation_id) global_id : vec3u, 542 | @builtin(local_invocation_id) local_id : vec3u 543 | ) { 544 | // loop over all of this thread's entries and tally how many are of each type 545 | var l_count : array; 546 | for (var i : u32 = 0u; i < 4; i++) { 547 | var idx : u32 = 4u * global_id.x + i; 548 | if (idx < uniforms.num) { 549 | var value : u32 = vals[idx]; 550 | l_count[(value >> (2u * uniforms.win)) & 3u]++; 551 | } 552 | } 553 | 554 | // send this to workgroup memory 555 | atomicAdd(&wg_count[0], l_count[0]); 556 | atomicAdd(&wg_count[1], l_count[1]); 557 | atomicAdd(&wg_count[2], l_count[2]); 558 | atomicAdd(&wg_count[3], l_count[3]); 559 | 560 | // the last thread writes the resulting vector to global memory 561 | workgroupBarrier(); 562 | if (local_id.x == 63u) { 563 | l1_offsets[global_id.x / 64u] = vec4u( 564 | atomicLoad(&wg_count[0]), 565 | atomicLoad(&wg_count[1]), 566 | atomicLoad(&wg_count[2]), 567 | atomicLoad(&wg_count[3]) 568 | ); 569 | } 570 | } 571 | 572 | var scan_arr : array; 573 | 574 | // scan across the workgroup locally, then reorder everything globally 575 | @compute @workgroup_size(64) 576 | fn scan_and_sort( 577 | @builtin(global_invocation_id) global_id : vec3u, 578 | @builtin(local_invocation_id) local_id : vec3u 579 | ) { 580 | var l_idx : u32 = local_id.x; 581 | var g_idx : u32 = global_id.x; 582 | 583 | // each thread reads four values from memory and performs a local scan 584 | var thread_vals : array; 585 | 586 | 587 | 588 | for (var i : u32 = 0u; i < 4; i++) { 589 | var c_idx : u32 = 4u * g_idx + i; 590 | if (c_idx < uniforms.num) { 591 | thread_vals[i] = vals[c_idx]; 592 | } 593 | } 594 | 595 | // compute the offsets across the workgroup 596 | scan_arr[l_idx] = get_val_vec(thread_vals[0]) 597 | + get_val_vec(thread_vals[1]) 598 | + get_val_vec(thread_vals[2]) 599 | + get_val_vec(thread_vals[3]); 600 | workgroupBarrier(); 601 | 602 | workgroup_scan(l_idx); 603 | 604 | // compute the offsets for each element & write to memory 605 | var thread_offs : array; 606 | thread_offs[0] = scan_arr[l_idx]; 607 | thread_offs[1] = thread_offs[0] + get_val_vec(thread_vals[0]); 608 | thread_offs[2] = thread_offs[1] + get_val_vec(thread_vals[1]); 609 | thread_offs[3] = thread_offs[2] + get_val_vec(thread_vals[2]); 610 | 611 | var global_offsets : vec4u; 612 | global_offsets[0u] = dot(vec4u(0u, 0u, 0u, 0u), l3_offsets[0u]); 613 | global_offsets[1u] = dot(vec4u(1u, 0u, 0u, 0u), l3_offsets[0u]); 614 | global_offsets[2u] = dot(vec4u(1u, 1u, 0u, 0u), l3_offsets[0u]); 615 | global_offsets[3u] = dot(vec4u(1u, 1u, 1u, 0u), l3_offsets[0u]); 616 | 617 | global_offsets += l1_offsets[g_idx / 64u]; 618 | global_offsets += l2_offsets[g_idx / (64u * 256u)]; 619 | 620 | for (var i : u32 = 0u; i < 4; i++) { 621 | var c_idx : u32 = 4u * g_idx + i; 622 | if (c_idx < uniforms.num) { 623 | var n_idx : u32 = (global_offsets + thread_offs[i])[get_val_u32(thread_vals[i])]; 624 | 625 | n_idxs[n_idx] = idxs[c_idx]; 626 | n_vals[n_idx] = thread_vals[i]; 627 | } 628 | } 629 | } 630 | 631 | // returns which radix index this input is 632 | fn get_val_u32(input : u32) -> u32 { 633 | return (input >> (2u * uniforms.win)) & 3u; 634 | } 635 | // likewise, but for vector 636 | fn get_val_vec(input : u32) -> vec4u { 637 | var shifted = get_val_u32(input); 638 | 639 | if (shifted == 0u) { 640 | return vec4u(1u, 0u, 0u, 0u); 641 | } 642 | if (shifted == 1u) { 643 | return vec4u(0u, 1u, 0u, 0u); 644 | } 645 | if (shifted == 2u) { 646 | return vec4u(0u, 0u, 1u, 0u); 647 | } 648 | 649 | return vec4u(0u, 0u, 0u, 1u); 650 | } 651 | 652 | // performs a 256-wide scan on vec4u in scan_arr 653 | fn workgroup_scan(idx : u32) { 654 | // upsweep pass 655 | if ((1u & idx) == 1u) { 656 | scan_arr[idx] += scan_arr[idx - 1u]; 657 | } 658 | workgroupBarrier(); 659 | 660 | if ((3u & idx) == 3u) { 661 | scan_arr[idx] += scan_arr[idx - 2u]; 662 | } 663 | workgroupBarrier(); 664 | 665 | if ((7u & idx) == 7u) { 666 | scan_arr[idx] += scan_arr[idx - 4u]; 667 | } 668 | workgroupBarrier(); 669 | 670 | if ((15u & idx) == 15u) { 671 | scan_arr[idx] += scan_arr[idx - 8u]; 672 | } 673 | workgroupBarrier(); 674 | 675 | if ((31u & idx) == 31u) { 676 | scan_arr[idx] += scan_arr[idx - 16u]; 677 | } 678 | workgroupBarrier(); 679 | 680 | // two special cases in transition from upsweep to downsweep 681 | if (idx == 63u) { 682 | scan_arr[idx] = scan_arr[31u]; 683 | } 684 | workgroupBarrier(); 685 | 686 | if (idx == 31u) { 687 | scan_arr[idx] = vec4u(0u); 688 | } 689 | workgroupBarrier(); 690 | 691 | // downsweep pass 692 | if ((15u & idx) == 15u && (idx & 16u) != 0u) { 693 | scan_arr[idx] = scan_arr[idx] + scan_arr[idx - 16u]; 694 | } 695 | workgroupBarrier(); 696 | 697 | if ((15u & idx) == 15u && (idx & 16u) == 0u) { 698 | scan_arr[idx] = scan_arr[idx + 16u] - scan_arr[idx]; 699 | } 700 | workgroupBarrier(); 701 | 702 | if ((7u & idx) == 7u && (idx & 8u) != 0u) { 703 | scan_arr[idx] = scan_arr[idx] + scan_arr[idx - 8u]; 704 | } 705 | workgroupBarrier(); 706 | 707 | if ((7u & idx) == 7u && (idx & 8u) == 0u) { 708 | scan_arr[idx] = scan_arr[idx + 8u] - scan_arr[idx]; 709 | } 710 | workgroupBarrier(); 711 | 712 | if ((3u & idx) == 3u && (idx & 4u) != 0u) { 713 | scan_arr[idx] = scan_arr[idx] + scan_arr[idx - 4u]; 714 | } 715 | workgroupBarrier(); 716 | 717 | if ((3u & idx) == 3u && (idx & 4u) == 0u) { 718 | scan_arr[idx] = scan_arr[idx + 4u] - scan_arr[idx]; 719 | } 720 | workgroupBarrier(); 721 | 722 | if ((1u & idx) == 1u && (idx & 2u) != 0u) { 723 | scan_arr[idx] = scan_arr[idx] + scan_arr[idx - 2u]; 724 | } 725 | workgroupBarrier(); 726 | 727 | if ((1u & idx) == 1u && (idx & 2u) == 0u) { 728 | scan_arr[idx] = scan_arr[idx + 2u] - scan_arr[idx]; 729 | } 730 | workgroupBarrier(); 731 | 732 | if ((idx & 1u) != 0u) { 733 | scan_arr[idx] = scan_arr[idx] + scan_arr[idx - 1u]; 734 | } 735 | workgroupBarrier(); 736 | 737 | if ((idx & 1u) == 0u) { 738 | scan_arr[idx] = scan_arr[idx + 1u] - scan_arr[idx]; 739 | } 740 | workgroupBarrier(); 741 | }` 742 | } 743 | 744 | function SCAN_UP_SRC() { 745 | return /* wgsl */ ` 746 | // bindgroup specific to the intermediate scans 747 | @group(0) @binding(0) var low_count : array; 748 | @group(0) @binding(1) var nex_count : array; 749 | 750 | struct Uniforms { 751 | num : u32, 752 | win : u32, 753 | lvl : u32, 754 | xtr : u32 755 | }; 756 | 757 | // bindgroup which stores the uniforms 758 | @group(1) @binding(0) var uniforms : Uniforms; 759 | 760 | // the LDS copy used in the workgroup-wide prefix scan 761 | var scan_arr : array; 762 | 763 | @compute @workgroup_size(64) 764 | fn scan_up( 765 | @builtin(global_invocation_id) global_id : vec3u, 766 | @builtin(local_invocation_id) local_id : vec3u 767 | ) { 768 | var l_idx : u32 = local_id.x; 769 | var g_idx : u32 = global_id.x; 770 | 771 | // each thread reads four values from memory and performs a local scan 772 | var thread_vals : array; 773 | var thread_offs : array; 774 | 775 | for (var i : u32 = 0u; i < 4; i++) { 776 | var c_idx : u32 = 4u * g_idx + i; 777 | 778 | if (c_idx < uniforms.num) { 779 | thread_vals[i] = low_count[4u * g_idx + i]; 780 | } 781 | } 782 | 783 | thread_offs[0] = vec4u(0u, 0u, 0u, 0u); 784 | thread_offs[1] = thread_vals[0]; 785 | thread_offs[2] = thread_offs[1] + thread_vals[1]; 786 | thread_offs[3] = thread_offs[2] + thread_vals[2]; 787 | 788 | // perform the workgroup-wide prefix scan 789 | scan_arr[l_idx] = thread_vals[0] + thread_vals[1] + thread_vals[2] + thread_vals[3]; 790 | workgroupBarrier(); 791 | 792 | workgroup_scan(l_idx); 793 | 794 | // complete the local scan and send it back to storage 795 | for (var i : u32 = 0u; i < 4; i++) { 796 | low_count[4u * g_idx + i] = scan_arr[l_idx] + thread_offs[i]; 797 | } 798 | 799 | // if we are the last thread in the group, send the total # to the next layer 800 | if (l_idx == 63u) { 801 | nex_count[g_idx / 64u] = scan_arr[63u] + thread_offs[3] + thread_vals[3]; 802 | } 803 | } 804 | 805 | // performs a 256-wide scan on vec4u in scan_arr 806 | fn workgroup_scan(idx : u32) { 807 | // upsweep pass 808 | if ((1u & idx) == 1u) { 809 | scan_arr[idx] += scan_arr[idx - 1u]; 810 | } 811 | workgroupBarrier(); 812 | 813 | if ((3u & idx) == 3u) { 814 | scan_arr[idx] += scan_arr[idx - 2u]; 815 | } 816 | workgroupBarrier(); 817 | 818 | if ((7u & idx) == 7u) { 819 | scan_arr[idx] += scan_arr[idx - 4u]; 820 | } 821 | workgroupBarrier(); 822 | 823 | if ((15u & idx) == 15u) { 824 | scan_arr[idx] += scan_arr[idx - 8u]; 825 | } 826 | workgroupBarrier(); 827 | 828 | if ((31u & idx) == 31u) { 829 | scan_arr[idx] += scan_arr[idx - 16u]; 830 | } 831 | workgroupBarrier(); 832 | 833 | // two special cases in transition from upsweep to downsweep 834 | if (idx == 63u) { 835 | scan_arr[idx] = scan_arr[31u]; 836 | } 837 | workgroupBarrier(); 838 | 839 | if (idx == 31u) { 840 | scan_arr[idx] = vec4u(0u); 841 | } 842 | workgroupBarrier(); 843 | 844 | // downsweep pass 845 | if ((15u & idx) == 15u && (idx & 16u) != 0u) { 846 | scan_arr[idx] = scan_arr[idx] + scan_arr[idx - 16u]; 847 | } 848 | workgroupBarrier(); 849 | 850 | if ((15u & idx) == 15u && (idx & 16u) == 0u) { 851 | scan_arr[idx] = scan_arr[idx + 16u] - scan_arr[idx]; 852 | } 853 | workgroupBarrier(); 854 | 855 | if ((7u & idx) == 7u && (idx & 8u) != 0u) { 856 | scan_arr[idx] = scan_arr[idx] + scan_arr[idx - 8u]; 857 | } 858 | workgroupBarrier(); 859 | 860 | if ((7u & idx) == 7u && (idx & 8u) == 0u) { 861 | scan_arr[idx] = scan_arr[idx + 8u] - scan_arr[idx]; 862 | } 863 | workgroupBarrier(); 864 | 865 | if ((3u & idx) == 3u && (idx & 4u) != 0u) { 866 | scan_arr[idx] = scan_arr[idx] + scan_arr[idx - 4u]; 867 | } 868 | workgroupBarrier(); 869 | 870 | if ((3u & idx) == 3u && (idx & 4u) == 0u) { 871 | scan_arr[idx] = scan_arr[idx + 4u] - scan_arr[idx]; 872 | } 873 | workgroupBarrier(); 874 | 875 | if ((1u & idx) == 1u && (idx & 2u) != 0u) { 876 | scan_arr[idx] = scan_arr[idx] + scan_arr[idx - 2u]; 877 | } 878 | workgroupBarrier(); 879 | 880 | if ((1u & idx) == 1u && (idx & 2u) == 0u) { 881 | scan_arr[idx] = scan_arr[idx + 2u] - scan_arr[idx]; 882 | } 883 | workgroupBarrier(); 884 | 885 | if ((idx & 1u) != 0u) { 886 | scan_arr[idx] = scan_arr[idx] + scan_arr[idx - 1u]; 887 | } 888 | workgroupBarrier(); 889 | 890 | if ((idx & 1u) == 0u) { 891 | scan_arr[idx] = scan_arr[idx + 1u] - scan_arr[idx]; 892 | } 893 | workgroupBarrier(); 894 | }` 895 | } 896 | } -------------------------------------------------------------------------------- /scripts/bvh/kernels/radix-tree.js: -------------------------------------------------------------------------------- 1 | 2 | function initRadixTreeKernel(device) { 3 | // shader parameters 4 | const WG_SIZE = 64 5 | 6 | // create bind group layout, shader module and pipeline 7 | const BG_LAYOUT = device.createBindGroupLayout({ 8 | entries: [ 9 | { 10 | binding: 0, 11 | visibility: GPUShaderStage.COMPUTE, 12 | buffer: { 13 | type: "storage" 14 | } 15 | }, 16 | { 17 | binding: 1, 18 | visibility: GPUShaderStage.COMPUTE, 19 | buffer: { 20 | type: "uniform" 21 | } 22 | }, 23 | { 24 | binding: 2, 25 | visibility: GPUShaderStage.COMPUTE, 26 | buffer: { 27 | type: "storage" 28 | } 29 | } 30 | ] 31 | }) 32 | 33 | const SM = device.createShaderModule({ 34 | code: SRC(), 35 | label: "radix tree shader module" 36 | }) 37 | 38 | const PIPELINE = device.createComputePipeline({ 39 | layout: device.createPipelineLayout({ 40 | bindGroupLayouts: [BG_LAYOUT] 41 | }), 42 | compute: { 43 | module: SM, 44 | entryPoint: "compute_radix_tree_pointers" 45 | } 46 | }) 47 | 48 | return { execute } 49 | 50 | async function execute(KEY_BUFFER, SIZE) { 51 | if (KEY_BUFFER.size != 4 * SIZE) { 52 | console.warn(`in radix tree: buffer size [ ${KEY_BUFFER.size} ] does not match requested size [ ${SIZE} ]`) 53 | return 54 | } 55 | 56 | // create all the necessary buffers 57 | const PARENT_BUFFER = device.createBuffer({ 58 | size: SIZE * 8, 59 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC 60 | }) 61 | const UNIFORM_BUFFER = device.createBuffer({ 62 | size: 16, 63 | usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST 64 | }) 65 | 66 | // create the necessary bind groups 67 | const BG = device.createBindGroup({ 68 | layout: BG_LAYOUT, 69 | entries: [ 70 | { 71 | binding: 0, 72 | visibility: GPUShaderStage.COMPUTE, 73 | resource: { 74 | buffer: KEY_BUFFER 75 | } 76 | }, 77 | { 78 | binding: 1, 79 | visibility: GPUShaderStage.COMPUTE, 80 | resource: { 81 | buffer: UNIFORM_BUFFER 82 | } 83 | }, 84 | { 85 | binding: 2, 86 | visibility: GPUShaderStage.COMPUTE, 87 | resource: { 88 | buffer: PARENT_BUFFER 89 | } 90 | } 91 | ] 92 | }) 93 | 94 | {// send work to GPU 95 | device.queue.writeBuffer( 96 | UNIFORM_BUFFER, 97 | 0, 98 | new Int32Array([ 99 | SIZE, 100 | 0, 101 | 0, 102 | 0 103 | ]) 104 | ) 105 | 106 | const CE = device.createCommandEncoder() 107 | const P = CE.beginComputePass() 108 | 109 | P.setPipeline(PIPELINE) 110 | P.setBindGroup(0, BG) 111 | P.dispatchWorkgroups(Math.ceil(SIZE / WG_SIZE)) 112 | P.end() 113 | 114 | device.queue.submit([CE.finish()]) 115 | } 116 | 117 | await device.queue.onSubmittedWorkDone() 118 | 119 | return { PARENT_BUFFER } 120 | } 121 | 122 | function SRC() { 123 | return /* wgsl */ ` 124 | 125 | struct Uniforms { 126 | num : i32, 127 | f_1 : i32, 128 | f_2 : i32, 129 | f_3 : i32 130 | }; 131 | 132 | @group(0) @binding(0) var keys : array; 133 | @group(0) @binding(1) var uniforms : Uniforms; 134 | @group(0) @binding(2) var parents : array; 135 | 136 | @compute @workgroup_size(${WG_SIZE}) 137 | fn compute_radix_tree_pointers(@builtin(global_invocation_id) global_id : vec3u) { 138 | var idx : i32 = i32(global_id.x); 139 | if (idx >= uniforms.num - 1) { 140 | return; 141 | } 142 | 143 | var pointers : vec2i = compute_child_index(idx); 144 | 145 | // write parent pointer to child nodes, accounting for leaf nodes as well 146 | if (pointers.x >= 0) { 147 | parents[pointers.x] = idx; 148 | } else { 149 | parents[uniforms.num + - (pointers.x + 1)] = idx; 150 | } 151 | 152 | if (pointers.y >= 0) { 153 | parents[pointers.y] = idx; 154 | } else { 155 | parents[uniforms.num + - (pointers.y + 1)] = idx; 156 | } 157 | } 158 | 159 | // computes the first bit (from the most significant) that the two keys differ on 160 | fn dif(key_1 : u32, key_2 : u32) -> i32 { 161 | for (var i = 0u; i < 32u; i++) { 162 | var mask : u32 = 1u << (31u - i); 163 | 164 | if ((key_1 & mask) != (key_2 & mask)) { 165 | return i32(i); 166 | } 167 | } 168 | return -1; 169 | } 170 | 171 | // computes the length of the common prefix between the keys at idx_1 and idx_2 172 | fn del(idx_1 : i32, idx_2 : i32) -> i32 { 173 | // if either index is out of bounds, del() = -1 174 | if (idx_1 >= uniforms.num || idx_2 >= uniforms.num || idx_1 < 0 || idx_2 < 0) { 175 | return -1; 176 | } 177 | 178 | var key_dif : i32 = dif(keys[idx_1], keys[idx_2]); 179 | 180 | if (key_dif == -1) { 181 | key_dif = 32 + dif(u32(idx_1), u32(idx_2)); 182 | } 183 | 184 | return key_dif; 185 | } 186 | 187 | // computes the index of the left and right child of a given node 188 | fn compute_child_index(i : i32) -> vec2i { 189 | // determine the direction of the child range 190 | var d : i32 = sign(del(i, i + 1) - del(i, i - 1)); 191 | 192 | // compute a bound on the size of the range 193 | var del_min : i32 = del(i, i - d); 194 | var l_max : i32 = 2; 195 | while (del(i, i + l_max * d) > del_min) { 196 | l_max *= 2; 197 | } 198 | 199 | // given this bound, find the true size using binary search 200 | var l : i32 = 0; 201 | { 202 | var t : i32 = l_max / 2; 203 | while (t > 0) { 204 | if (del(i, i + (l + t) * d) > del_min) { 205 | l += t; 206 | } 207 | t /= 2; 208 | } 209 | } 210 | var j : i32 = i + l * d; 211 | 212 | // find the split position using binary search 213 | var del_node : i32 = del(i, j); 214 | var s : i32 = 0; 215 | { 216 | var v : i32 = 2; 217 | var t : i32 = (l - 1 + v) / v; 218 | while (t > 0) { 219 | if (del(i, i + (s + t) * d) > del_node) { 220 | s += t; 221 | } 222 | v *= 2; 223 | t = (l - 1 + v) / v; 224 | } 225 | } 226 | var gamma : i32 = i + s * d + min(d, 0); 227 | 228 | // output (signed) child pointers, where negative indicates leaf node 229 | var returned : vec2i = vec2i(gamma, gamma + 1); 230 | if (min(i, j) == gamma) { 231 | returned.x = -returned.x - 1; 232 | } 233 | if (max(i, j) == gamma + 1) { 234 | returned.y = -returned.y - 1; 235 | } 236 | 237 | return returned; 238 | }` 239 | } 240 | } -------------------------------------------------------------------------------- /scripts/bvh/kernels/rearrange.js: -------------------------------------------------------------------------------- 1 | 2 | function initRearrangeKernel(device) { 3 | // shader parameters 4 | const WG_SIZE = 64 5 | 6 | // create bind group layout, shader module and pipeline 7 | const BG_LAYOUT = device.createBindGroupLayout({ 8 | entries: [ 9 | { 10 | binding: 0, 11 | visibility: GPUShaderStage.COMPUTE, 12 | buffer: { 13 | type: "storage" 14 | } 15 | }, 16 | { 17 | binding: 1, 18 | visibility: GPUShaderStage.COMPUTE, 19 | buffer: { 20 | type: "storage" 21 | } 22 | }, 23 | { 24 | binding: 2, 25 | visibility: GPUShaderStage.COMPUTE, 26 | buffer: { 27 | type: "storage" 28 | } 29 | }, 30 | { 31 | binding: 3, 32 | visibility: GPUShaderStage.COMPUTE, 33 | buffer: { 34 | type: "uniform" 35 | } 36 | } 37 | ] 38 | }) 39 | 40 | const SM = device.createShaderModule({ 41 | code: SRC(), 42 | label: "triangle rearrange shader module" 43 | }) 44 | 45 | const PIPELINE = device.createComputePipeline({ 46 | layout: device.createPipelineLayout({ 47 | bindGroupLayouts: [BG_LAYOUT] 48 | }), 49 | compute: { 50 | module: SM, 51 | entryPoint: "rearrange_triangles" 52 | } 53 | }) 54 | 55 | return { execute } 56 | 57 | async function execute(I_TRIANGLE_BUFFER, INDEX_BUFFER, size) { 58 | if (I_TRIANGLE_BUFFER.size != 48 * size) { 59 | console.warn(`in rearrange: triangle buffer size [ ${I_TRIANGLE_BUFFER.size} ] does not match requested size [ ${size} ]`) 60 | return 61 | } 62 | if (INDEX_BUFFER.size != 4 * size) { 63 | console.warn(`in rearrange: index buffer size [ ${INDEX_BUFFER.size} ] does not match requested size [ ${size} ]`) 64 | return 65 | } 66 | 67 | // create all the necessary buffers 68 | const O_TRIANGLE_BUFFER = device.createBuffer({ 69 | size: I_TRIANGLE_BUFFER.size, 70 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC 71 | }) 72 | const UNIFORM_BUFFER = device.createBuffer({ 73 | size: 16, 74 | usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST 75 | }) 76 | 77 | // create the bind group 78 | const BG = device.createBindGroup({ 79 | layout: BG_LAYOUT, 80 | entries: [ 81 | { 82 | binding: 0, 83 | visibility: GPUShaderStage.COMPUTE, 84 | resource: { 85 | buffer: I_TRIANGLE_BUFFER 86 | } 87 | }, 88 | { 89 | binding: 1, 90 | visibility: GPUShaderStage.COMPUTE, 91 | resource: { 92 | buffer: O_TRIANGLE_BUFFER 93 | } 94 | }, 95 | { 96 | binding: 2, 97 | visibility: GPUShaderStage.COMPUTE, 98 | resource: { 99 | buffer: INDEX_BUFFER 100 | } 101 | }, 102 | { 103 | binding: 3, 104 | visibility: GPUShaderStage.COMPUTE, 105 | resource: { 106 | buffer: UNIFORM_BUFFER 107 | } 108 | } 109 | ] 110 | }) 111 | 112 | {// send work to GPU 113 | device.queue.writeBuffer( 114 | UNIFORM_BUFFER, 115 | 0, 116 | new Int32Array([ 117 | size, 118 | 0, 119 | 0, 120 | 0 121 | ]), 122 | 0 123 | ) 124 | 125 | const CE = device.createCommandEncoder() 126 | const P = CE.beginComputePass() 127 | 128 | P.setPipeline(PIPELINE) 129 | P.setBindGroup(0, BG) 130 | P.dispatchWorkgroups(Math.ceil(size / WG_SIZE)) 131 | P.end() 132 | 133 | device.queue.submit([CE.finish()]) 134 | } 135 | 136 | await device.queue.onSubmittedWorkDone() 137 | 138 | return { O_TRIANGLE_BUFFER } 139 | } 140 | 141 | function SRC() { 142 | return /* wgsl */ ` 143 | 144 | struct Triangle { 145 | v0 : vec3f, 146 | v1 : vec3f, 147 | v2 : vec3f 148 | }; 149 | 150 | struct Uniforms { 151 | num : i32, 152 | f_1 : i32, 153 | f_2 : i32, 154 | f_3 : i32 155 | }; 156 | 157 | @group(0) @binding(0) var i_triangles : array; 158 | @group(0) @binding(1) var o_triangles : array; 159 | @group(0) @binding(2) var new_indices : array; 160 | @group(0) @binding(3) var uniforms : Uniforms; 161 | 162 | @compute @workgroup_size(${WG_SIZE}) 163 | fn rearrange_triangles(@builtin(global_invocation_id) global_id : vec3u) { 164 | var idx : i32 = i32(global_id.x); 165 | if (idx >= uniforms.num) { 166 | return; 167 | } 168 | o_triangles[idx] = i_triangles[new_indices[idx]]; 169 | }` 170 | } 171 | } -------------------------------------------------------------------------------- /scripts/main.js: -------------------------------------------------------------------------------- 1 | window.onload = async () => { 2 | const { adapter, device } = await initWebGPU() 3 | if (!adapter || !device) return 4 | 5 | setBuildTime() 6 | setParseTime() 7 | setTriangles() 8 | 9 | const BVH = initBVHBuild(device) 10 | let PT = null 11 | 12 | let queuedRotate = 0 13 | 14 | async function frame() { 15 | if (PT) { 16 | while (queuedRotate > 0) { 17 | PT.rotateView() 18 | queuedRotate-- 19 | } 20 | await PT.sample() 21 | await PT.sample() 22 | await PT.sample() 23 | await PT.draw() 24 | } 25 | 26 | window.requestAnimationFrame(frame) 27 | } 28 | 29 | frame() 30 | 31 | // bind all user inputs & UI 32 | document.querySelector("#rotate-view").addEventListener("mouseup", () => { 33 | if (PT) queuedRotate++ 34 | }) 35 | 36 | function setTriangles(count) { 37 | let str = "" 38 | if (count == null) { 39 | str = "----------" 40 | } else { 41 | str = count.toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",") 42 | } 43 | document.querySelector("#triangle-count").textContent = str 44 | } 45 | 46 | function setParseTime(time) { 47 | let str = "" 48 | if (time == null) { 49 | str = "----------" 50 | } else { 51 | str = time.toString().slice(0, Math.max(time.toString().length, 9)) + "s" 52 | } 53 | document.querySelector("#parse-time").textContent = str 54 | } 55 | 56 | function setBuildTime(time) { 57 | let str = "" 58 | if (time == null) { 59 | str = "----------" 60 | } else { 61 | str = time.toString().slice(0, Math.max(time.toString().length, 9)) + "s" 62 | } 63 | document.querySelector("#build-time").textContent = str 64 | } 65 | 66 | { 67 | async function readFiles(contents) { 68 | setBuildTime() 69 | setParseTime() 70 | setTriangles() 71 | let s, e 72 | s = Date.now() 73 | const { NUM_TRIS, TRI_ARR, BOUNDS } = parseObj(contents[0]) 74 | 75 | if (NUM_TRIS > 2_100_000) { 76 | alert("Warning: Model is too large. Try < 2,000,000 triangles.") 77 | return 78 | } 79 | 80 | e = Date.now() 81 | setParseTime((e - s) / 1000.) 82 | setTriangles(NUM_TRIS) 83 | 84 | // make thread sleep to update UI 85 | await new Promise(r => setTimeout(r, 10)) 86 | 87 | s = Date.now() 88 | const { BVH_BUFFER, O_TRIANGLE_BUFFER } = await BVH.build(TRI_ARR, NUM_TRIS, BOUNDS) 89 | e = Date.now() 90 | setBuildTime((e - s) / 1000.) 91 | PT = initPathTracer(device, document.querySelector("#canvas"), {BVH_BUFFER, O_TRIANGLE_BUFFER, BOUNDS}) 92 | } 93 | 94 | document.body.addEventListener("drop", (e) => { 95 | e.preventDefault() 96 | e.stopPropagation() 97 | 98 | const files = [] 99 | if (e.dataTransfer.items) { 100 | [...e.dataTransfer.items].forEach((item) => { 101 | if (item.kind === "file") { 102 | const file = item.getAsFile() 103 | if (file.name.endsWith(".obj")) { 104 | files.push(file) 105 | } 106 | } 107 | }) 108 | } else { 109 | [...e.dataTransfer.files].forEach((file) => { 110 | if (file.name.endsWith('.obj')) { 111 | files.push(file) 112 | } 113 | }) 114 | } 115 | 116 | // Read all .obj files as text 117 | const reader = new FileReader() 118 | const contents = [] 119 | let incr = 0 120 | 121 | reader.onload = () => { 122 | contents.push(reader.result) 123 | incr++ 124 | if (incr < files.length) { 125 | reader.readAsText(files[incr]) 126 | } else { 127 | readFiles(contents) 128 | } 129 | } 130 | 131 | if (files.length > 0) { 132 | reader.readAsText(files[incr]) 133 | } else { 134 | alert("File(s) is not valid.") 135 | } 136 | }) 137 | 138 | document.body.addEventListener('dragover', (e) => { 139 | e.preventDefault() 140 | e.stopPropagation() 141 | }) 142 | 143 | document.body.addEventListener('dragenter', (e) => { 144 | e.preventDefault() 145 | e.stopPropagation() 146 | }) 147 | } 148 | } 149 | 150 | async function initWebGPU() { 151 | const adapter = await navigator.gpu?.requestAdapter() 152 | const device = await adapter?.requestDevice() 153 | 154 | if (!device) { 155 | alert("browser does not support webGPU!") 156 | return null 157 | } 158 | 159 | return { adapter, device } 160 | } -------------------------------------------------------------------------------- /scripts/obj/obj-file-parser.js: -------------------------------------------------------------------------------- 1 | // from : https://github.com/WesUnwin/obj-file-parser 2 | class OBJFile { 3 | constructor(fileContents, defaultModelName) { 4 | this._reset(); 5 | this.fileContents = fileContents; 6 | this.defaultModelName = (defaultModelName || 'untitled'); 7 | } 8 | 9 | _reset() { 10 | this.result = { 11 | models: [], 12 | materialLibraries: [] 13 | }; 14 | this.currentMaterial = ''; 15 | this.currentGroup = ''; 16 | this.smoothingGroup = 0; 17 | } 18 | 19 | parse() { 20 | this._reset(); 21 | 22 | const _stripComments = (lineString) => { 23 | const commentIndex = lineString.indexOf('#'); 24 | if (commentIndex > -1) { return lineString.substring(0, commentIndex); } 25 | return lineString; 26 | }; 27 | 28 | const lines = this.fileContents.split('\n'); 29 | for (let i = 0; i < lines.length; i += 1) { 30 | const line = _stripComments(lines[i]); 31 | 32 | const lineItems = line.replace(/\s+/g, ' ').trim().split(' '); 33 | 34 | switch (lineItems[0].toLowerCase()) { 35 | case 'o': // Start A New Model 36 | this._parseObject(lineItems); 37 | break; 38 | case 'g': // Start a new polygon group 39 | this._parseGroup(lineItems); 40 | break; 41 | case 'v': // Define a vertex for the current model 42 | this._parseVertexCoords(lineItems); 43 | break; 44 | case 'vt': // Texture Coords 45 | this._parseTextureCoords(lineItems); 46 | break; 47 | case 'vn': // Define a vertex normal for the current model 48 | this._parseVertexNormal(lineItems); 49 | break; 50 | case 'l': // Define a line for the current model 51 | this._parseLine(lineItems); 52 | break; 53 | case 's': // Smooth shading statement 54 | this._parseSmoothShadingStatement(lineItems); 55 | break; 56 | case 'f': // Define a Face/Polygon 57 | this._parsePolygon(lineItems); 58 | break; 59 | case 'mtllib': // Reference to a material library file (.mtl) 60 | this._parseMtlLib(lineItems); 61 | break; 62 | case 'usemtl': // Sets the current material to be applied to polygons defined from this point forward 63 | this._parseUseMtl(lineItems); 64 | break; 65 | } 66 | } 67 | 68 | return this.result; 69 | } 70 | 71 | _createNewModel(name = this.defaultModelName) { 72 | return { 73 | name, 74 | vertices: [], 75 | textureCoords: [], 76 | vertexNormals: [], 77 | faces: [], 78 | lines: [] 79 | }; 80 | } 81 | 82 | _currentModel() { 83 | if (this.result.models.length == 0) { 84 | const defaultModel = this._createNewModel(); 85 | this.result.models.push(defaultModel); 86 | this.currentGroup = ''; 87 | this.smoothingGroup = 0; 88 | } 89 | 90 | return this.result.models[this.result.models.length - 1]; 91 | } 92 | 93 | _parseObject(lineItems) { 94 | const modelName = lineItems.length >= 2 ? lineItems[1] : this.defaultModelName; 95 | const model = this._createNewModel(modelName); 96 | this.result.models.push(model); 97 | this.currentGroup = ''; 98 | this.smoothingGroup = 0; 99 | } 100 | 101 | _parseGroup(lineItems) { 102 | if (lineItems.length != 2) { throw 'Group statements must have exactly 1 argument (eg. g group_1)'; } 103 | 104 | this.currentGroup = lineItems[1]; 105 | } 106 | 107 | _parseVertexCoords(lineItems) { 108 | const x = lineItems.length >= 2 ? parseFloat(lineItems[1]) : 0.0; 109 | const y = lineItems.length >= 3 ? parseFloat(lineItems[2]) : 0.0; 110 | const z = lineItems.length >= 4 ? parseFloat(lineItems[3]) : 0.0; 111 | 112 | this._currentModel().vertices.push({ x, y, z }); 113 | } 114 | 115 | _parseTextureCoords(lineItems) { 116 | const u = lineItems.length >= 2 ? parseFloat(lineItems[1]) : 0.0; 117 | const v = lineItems.length >= 3 ? parseFloat(lineItems[2]) : 0.0; 118 | const w = lineItems.length >= 4 ? parseFloat(lineItems[3]) : 0.0; 119 | 120 | this._currentModel().textureCoords.push({ u, v, w }); 121 | } 122 | 123 | _parseVertexNormal(lineItems) { 124 | const x = lineItems.length >= 2 ? parseFloat(lineItems[1]) : 0.0; 125 | const y = lineItems.length >= 3 ? parseFloat(lineItems[2]) : 0.0; 126 | const z = lineItems.length >= 4 ? parseFloat(lineItems[3]) : 0.0; 127 | 128 | this._currentModel().vertexNormals.push({ x, y, z }); 129 | } 130 | 131 | _parseLine(lineItems) { 132 | const totalVertices = (lineItems.length - 1); 133 | if (totalVertices < 2) { throw (`Line statement has less than 2 vertices${this.filePath}${this.lineNumber}`); } 134 | 135 | const line = []; 136 | 137 | for (let i = 0; i < totalVertices; i += 1) { 138 | const vertexString = lineItems[i + 1]; 139 | const vertexValues = vertexString.split('/'); 140 | 141 | if (vertexValues.length < 1 || vertexValues.length > 2) { throw (`Too many values (separated by /) for a single vertex${this.filePath}${this.lineNumber}`); } 142 | 143 | let vertexIndex = 0; 144 | let textureCoordsIndex = 0; 145 | vertexIndex = parseInt(vertexValues[0]); 146 | if (vertexValues.length > 1 && (vertexValues[1] != '')) { textureCoordsIndex = parseInt(vertexValues[1]); } 147 | 148 | line.push({ 149 | vertexIndex, 150 | textureCoordsIndex 151 | }); 152 | } 153 | this._currentModel().lines.push(line); 154 | } 155 | 156 | _parsePolygon(lineItems) { 157 | const totalVertices = (lineItems.length - 1); 158 | if (totalVertices < 3) { throw (`Face statement has less than 3 vertices${this.filePath}${this.lineNumber}`); } 159 | 160 | const face = { 161 | material: this.currentMaterial, 162 | group: this.currentGroup, 163 | smoothingGroup: this.smoothingGroup, 164 | vertices: [] 165 | }; 166 | 167 | for (let i = 0; i < totalVertices; i += 1) { 168 | const vertexString = lineItems[i + 1]; 169 | const vertexValues = vertexString.split('/'); 170 | 171 | if (vertexValues.length < 1 || vertexValues.length > 3) { throw (`Too many values (separated by /) for a single vertex${this.filePath}${this.lineNumber}`); } 172 | 173 | let vertexIndex = 0; 174 | let textureCoordsIndex = 0; 175 | let vertexNormalIndex = 0; 176 | vertexIndex = parseInt(vertexValues[0]); 177 | if (vertexValues.length > 1 && (vertexValues[1] != '')) { textureCoordsIndex = parseInt(vertexValues[1]); } 178 | if (vertexValues.length > 2) { vertexNormalIndex = parseInt(vertexValues[2]); } 179 | 180 | if (vertexIndex == 0) { throw 'Faces uses invalid vertex index of 0'; } 181 | 182 | // Negative vertex indices refer to the nth last defined vertex 183 | // convert these to postive indices for simplicity 184 | if (vertexIndex < 0) { vertexIndex = this._currentModel().vertices.length + 1 + vertexIndex; } 185 | 186 | face.vertices.push({ 187 | vertexIndex, 188 | textureCoordsIndex, 189 | vertexNormalIndex 190 | }); 191 | } 192 | this._currentModel().faces.push(face); 193 | } 194 | 195 | _parseMtlLib(lineItems) { 196 | if (lineItems.length >= 2) { this.result.materialLibraries.push(lineItems[1]); } 197 | } 198 | 199 | _parseUseMtl(lineItems) { 200 | if (lineItems.length >= 2) { this.currentMaterial = lineItems[1]; } 201 | } 202 | 203 | _parseSmoothShadingStatement(lineItems) { 204 | if (lineItems.length != 2) { throw 'Smoothing group statements must have exactly 1 argument (eg. s )'; } 205 | 206 | const groupNumber = (lineItems[1].toLowerCase() == 'off') ? 0 : parseInt(lineItems[1]); 207 | this.smoothingGroup = groupNumber; 208 | } 209 | } -------------------------------------------------------------------------------- /scripts/obj/parse-obj.js: -------------------------------------------------------------------------------- 1 | //import { OBJFile } from "./obj-file-parser.js" 2 | 3 | function parseObj(file) { 4 | const objFile = new OBJFile(file) 5 | const output = objFile.parse() 6 | 7 | let numTris = 0 8 | let trisArr = [] 9 | 10 | let x_min = 1e30 11 | let y_min = 1e30 12 | let z_min = 1e30 13 | 14 | let x_max = -1e30 15 | let y_max = -1e30 16 | let z_max = -1e30 17 | 18 | for (var x = 0; x < output.models[0].faces.length; x++) { 19 | let face = output.models[0].faces[x] 20 | 21 | let vr = face.vertices[0] 22 | 23 | let vr_x = output.models[0].vertices[vr.vertexIndex - 1].x 24 | let vr_z = output.models[0].vertices[vr.vertexIndex - 1].y 25 | let vr_y = output.models[0].vertices[vr.vertexIndex - 1].z 26 | 27 | x_min = Math.min(x_min, vr_x) 28 | y_min = Math.min(y_min, vr_y) 29 | z_min = Math.min(z_min, vr_z) 30 | 31 | x_max = Math.max(x_max, vr_x) 32 | y_max = Math.max(y_max, vr_y) 33 | z_max = Math.max(z_max, vr_z) 34 | 35 | for (var y = 1; y < face.vertices.length - 1; y++) { 36 | let v1 = face.vertices[y + 0] 37 | let v2 = face.vertices[y + 1] 38 | 39 | let v1_x = output.models[0].vertices[v1.vertexIndex - 1].x 40 | let v1_z = output.models[0].vertices[v1.vertexIndex - 1].y 41 | let v1_y = output.models[0].vertices[v1.vertexIndex - 1].z 42 | 43 | let v2_x = output.models[0].vertices[v2.vertexIndex - 1].x 44 | let v2_z = output.models[0].vertices[v2.vertexIndex - 1].y 45 | let v2_y = output.models[0].vertices[v2.vertexIndex - 1].z 46 | 47 | x_min = Math.min(v1_x, Math.min(x_min, v2_x)) 48 | y_min = Math.min(v1_y, Math.min(y_min, v2_y)) 49 | z_min = Math.min(v1_z, Math.min(z_min, v2_z)) 50 | 51 | x_max = Math.max(v1_x, Math.max(x_max, v2_x)) 52 | y_max = Math.max(v1_y, Math.max(y_max, v2_y)) 53 | z_max = Math.max(v1_z, Math.max(z_max, v2_z)) 54 | 55 | trisArr.push( 56 | vr_x, vr_y, vr_z, 3.1415, 57 | v1_x, v1_y, v1_z, 3.1415, 58 | v2_x, v2_y, v2_z, 3.1415, 59 | ) 60 | 61 | numTris++ 62 | } 63 | } 64 | 65 | // add a floor to the model 66 | numTris += 2 67 | 68 | let floorHeight = z_min + .01 69 | let floorSize = 10000. 70 | 71 | trisArr.push( 72 | -floorSize, -floorSize, floorHeight, 3.1415, 73 | floorSize, -floorSize, floorHeight, 3.1415, 74 | -floorSize, floorSize, floorHeight, 3.1415, 75 | floorSize, -floorSize, floorHeight, 3.1415, 76 | floorSize, floorSize, floorHeight, 3.1415, 77 | -floorSize, floorSize, floorHeight, 3.1415, 78 | ) 79 | 80 | return { 81 | NUM_TRIS: numTris, 82 | TRI_ARR: trisArr, 83 | BOUNDS: { 84 | min: [x_min, y_min, z_min], 85 | max: [x_max, y_max, z_max] 86 | } 87 | } 88 | } -------------------------------------------------------------------------------- /scripts/pathtracer/pathtracer.js: -------------------------------------------------------------------------------- 1 | // path tracer similar to https://github.com/AddisonPrairie/Personal-Site/blob/main/demos/sdf002/relic/script.js 2 | function initPathTracer(device, canvas, bvh) { 3 | const CANVAS = initCanvas(device, canvas) 4 | 5 | let rot = 0. 6 | let dist = 1.5 * Math.max( 7 | Math.max( 8 | bvh.BOUNDS.max[0] - bvh.BOUNDS.min[0], 9 | bvh.BOUNDS.max[1] - bvh.BOUNDS.min[1] 10 | ), 11 | bvh.BOUNDS.max[2] - bvh.BOUNDS.min[2] 12 | ) 13 | 14 | let lookAt = [ 15 | (bvh.BOUNDS.min[0] + bvh.BOUNDS.max[0]) * .5, 16 | (bvh.BOUNDS.min[1] + bvh.BOUNDS.max[1]) * .5, 17 | (bvh.BOUNDS.min[2] + bvh.BOUNDS.max[2]) * .5, 18 | ] 19 | let position = [ 20 | lookAt[0] + Math.cos(rot) * dist, 21 | lookAt[1] + Math.sin(rot) * dist, 22 | lookAt[2] 23 | ] 24 | let bReset = true 25 | 26 | 27 | const { VS, FS, CS } = SRC() 28 | 29 | // create textures for passing data between passes 30 | const oTextures = [ 31 | device.createTexture({size: [CANVAS.w, CANVAS.h], format: "rgba32float", dimension: "2d", usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING}), 32 | device.createTexture({size: [CANVAS.w, CANVAS.h], format: "rgba32float", dimension: "2d", usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING}) 33 | ] 34 | const dTextures = [ 35 | device.createTexture({size: [CANVAS.w, CANVAS.h], format: "rgba32float", dimension: "2d", usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING}), 36 | device.createTexture({size: [CANVAS.w, CANVAS.h], format: "rgba32float", dimension: "2d", usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING}) 37 | ] 38 | const tTextures = [ 39 | device.createTexture({size: [CANVAS.w, CANVAS.h], format: "rgba32float", dimension: "2d", usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING}), 40 | device.createTexture({size: [CANVAS.w, CANVAS.h], format: "rgba32float", dimension: "2d", usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING}) 41 | ] 42 | const bTextures = [ 43 | device.createTexture({size: [CANVAS.w, CANVAS.h], format: "rgba32float", dimension: "2d", usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING}), 44 | device.createTexture({size: [CANVAS.w, CANVAS.h], format: "rgba32float", dimension: "2d", usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING}) 45 | ] 46 | 47 | const DRAW_SM = device.createShaderModule({ 48 | code: VS + FS 49 | }) 50 | 51 | const DRAW_BG_LAYOUT = device.createBindGroupLayout({ 52 | entries: [ 53 | { 54 | binding: 0, 55 | visibility: GPUShaderStage.FRAGMENT, 56 | texture: { 57 | sampleType: "unfilterable-float", 58 | viewDimension: "2d", 59 | multisampled: false 60 | } 61 | } 62 | ] 63 | }) 64 | 65 | const DRAW_BGS = [ 66 | device.createBindGroup({ 67 | layout: DRAW_BG_LAYOUT, 68 | entries: [ 69 | { 70 | binding: 0, 71 | resource: tTextures[1].createView() 72 | } 73 | ] 74 | }), 75 | device.createBindGroup({ 76 | layout: DRAW_BG_LAYOUT, entries: [ 77 | { 78 | binding: 0, 79 | resource: tTextures[0].createView() 80 | } 81 | ] 82 | }) 83 | ] 84 | 85 | const DRAW_PIPELINE = device.createRenderPipeline({ 86 | layout: device.createPipelineLayout({bindGroupLayouts: [DRAW_BG_LAYOUT]}), 87 | vertex: { 88 | module: DRAW_SM, 89 | entryPoint: "vs" 90 | }, 91 | fragment: { 92 | module: DRAW_SM, 93 | entryPoint: "fs", 94 | targets: [ 95 | { 96 | format: CANVAS.presentationFormat 97 | } 98 | ] 99 | } 100 | }) 101 | 102 | const PT_I_BG_LAYOUT = device.createBindGroupLayout({ 103 | entries: [ 104 | { 105 | binding: 0, 106 | visibility: GPUShaderStage.COMPUTE, 107 | texture: { 108 | sampleType: "unfilterable-float", 109 | viewDimension: "2d", 110 | multisampled: false 111 | } 112 | }, 113 | { 114 | binding: 1, 115 | visibility: GPUShaderStage.COMPUTE, 116 | texture: { 117 | sampleType: "unfilterable-float", 118 | viewDimension: "2d", 119 | multisampled: false 120 | } 121 | }, 122 | { 123 | binding: 2, 124 | visibility: GPUShaderStage.COMPUTE, 125 | texture: { 126 | sampleType: "unfilterable-float", 127 | viewDimension: "2d", 128 | multisampled: false 129 | } 130 | }, 131 | { 132 | binding: 3, 133 | visibility: GPUShaderStage.COMPUTE, 134 | texture: { 135 | sampleType: "unfilterable-float", 136 | viewDimension: "2d", 137 | multisampled: false 138 | } 139 | } 140 | ], 141 | label: "PT_I_BG_LAYOUT" 142 | }) 143 | 144 | const PT_I_BGS = [ 145 | device.createBindGroup({ 146 | layout: PT_I_BG_LAYOUT, 147 | entries: [ 148 | { 149 | binding: 0, 150 | resource: oTextures[0].createView() 151 | }, 152 | { 153 | binding: 1, 154 | resource: dTextures[0].createView() 155 | }, 156 | { 157 | binding: 2, 158 | resource: tTextures[0].createView() 159 | }, 160 | { 161 | binding: 3, 162 | resource: bTextures[0].createView() 163 | } 164 | ] 165 | }), 166 | device.createBindGroup({ 167 | layout: PT_I_BG_LAYOUT, 168 | entries: [ 169 | { 170 | binding: 0, 171 | resource: oTextures[1].createView() 172 | }, 173 | { 174 | binding: 1, 175 | resource: dTextures[1].createView() 176 | }, 177 | { 178 | binding: 2, 179 | resource: tTextures[1].createView() 180 | }, 181 | { 182 | binding: 3, 183 | resource: bTextures[1].createView() 184 | } 185 | ] 186 | }), 187 | ] 188 | 189 | const PT_O_BG_LAYOUT = device.createBindGroupLayout({ 190 | entries: [ 191 | { 192 | binding: 0, 193 | visibility: GPUShaderStage.COMPUTE, 194 | storageTexture: { 195 | format: "rgba32float", 196 | viewDimension: "2d" 197 | } 198 | }, 199 | { 200 | binding: 1, 201 | visibility: GPUShaderStage.COMPUTE, 202 | storageTexture: { 203 | format: "rgba32float", 204 | viewDimension: "2d" 205 | } 206 | }, 207 | { 208 | binding: 2, 209 | visibility: GPUShaderStage.COMPUTE, 210 | storageTexture: { 211 | format: "rgba32float", 212 | viewDimension: "2d" 213 | } 214 | }, 215 | { 216 | binding: 3, 217 | visibility: GPUShaderStage.COMPUTE, 218 | storageTexture: { 219 | format: "rgba32float", 220 | viewDimension: "2d" 221 | } 222 | } 223 | ], 224 | label: "PT_O_BG_LAYOUTs" 225 | }) 226 | 227 | const PT_O_BGS = [ 228 | device.createBindGroup({ 229 | layout: PT_O_BG_LAYOUT, 230 | entries: [ 231 | { 232 | binding: 0, 233 | resource: oTextures[1].createView() 234 | }, 235 | { 236 | binding: 1, 237 | resource: dTextures[1].createView() 238 | }, 239 | { 240 | binding: 2, 241 | resource: tTextures[1].createView() 242 | }, 243 | { 244 | binding: 3, 245 | resource: bTextures[1].createView() 246 | } 247 | ] 248 | }), 249 | device.createBindGroup({ 250 | layout: PT_O_BG_LAYOUT, 251 | entries: [ 252 | { 253 | binding: 0, 254 | resource: oTextures[0].createView() 255 | }, 256 | { 257 | binding: 1, 258 | resource: dTextures[0].createView() 259 | }, 260 | { 261 | binding: 2, 262 | resource: tTextures[0].createView() 263 | }, 264 | { 265 | binding: 3, 266 | resource: bTextures[0].createView() 267 | } 268 | ] 269 | }) 270 | ] 271 | 272 | const PT_BVH_BG_LAYOUT = device.createBindGroupLayout({ 273 | entries: [ 274 | { 275 | binding: 0, 276 | visibility: GPUShaderStage.COMPUTE, 277 | buffer: { 278 | type: "storage" 279 | } 280 | }, 281 | { 282 | binding: 1, 283 | visibility: GPUShaderStage.COMPUTE, 284 | buffer: { 285 | type: "storage" 286 | } 287 | } 288 | ] 289 | }) 290 | 291 | const PT_BVH_BG = device.createBindGroup({ 292 | layout: PT_BVH_BG_LAYOUT, 293 | entries: [ 294 | { 295 | binding: 0, 296 | resource: { 297 | buffer: bvh.BVH_BUFFER 298 | } 299 | }, 300 | { 301 | binding: 1, 302 | resource: { 303 | buffer: bvh.O_TRIANGLE_BUFFER 304 | } 305 | } 306 | ] 307 | }) 308 | 309 | const PT_UNI_BG_LAYOUT = device.createBindGroupLayout({ 310 | entries: [ 311 | { 312 | binding: 0, 313 | visibility: GPUShaderStage.COMPUTE, 314 | buffer: { 315 | type: "uniform" 316 | } 317 | } 318 | ] 319 | }) 320 | 321 | const UNIFORM_BUFFER = device.createBuffer({ 322 | size: 32, 323 | usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.UNIFORM 324 | }) 325 | 326 | const PT_UNI_BG = device.createBindGroup({ 327 | layout: PT_UNI_BG_LAYOUT, 328 | entries: [ 329 | { 330 | binding: 0, 331 | resource: { 332 | buffer: UNIFORM_BUFFER 333 | } 334 | } 335 | ] 336 | }) 337 | 338 | const PT_SM = device.createShaderModule({ 339 | code: CS 340 | }) 341 | 342 | const PT_PIPELINE = device.createComputePipeline({ 343 | layout: device.createPipelineLayout({ 344 | bindGroupLayouts: [PT_I_BG_LAYOUT, PT_O_BG_LAYOUT, PT_BVH_BG_LAYOUT, PT_UNI_BG_LAYOUT] 345 | }), 346 | compute: { 347 | module: PT_SM, 348 | entryPoint: "main" 349 | } 350 | }) 351 | 352 | // some variables needed by the methods below 353 | let ctr = 0 354 | 355 | return { draw, sample, rotateView } 356 | 357 | async function sample() { 358 | if (bReset) ctr = 0 359 | if (ctr > 1024) return 360 | 361 | const PP_IDX = ctr++ % 2 362 | 363 | device.queue.writeBuffer( 364 | UNIFORM_BUFFER, 365 | 0, 366 | new Float32Array([ 367 | position[0], position[1], position[2], bReset ? 1 : 0, 368 | lookAt[0], lookAt[1], lookAt[2], 0. 369 | ]), 370 | 0 371 | ) 372 | 373 | // set reset flag to false so that we don't perpetually re-render 374 | bReset = false 375 | 376 | const CE = device.createCommandEncoder() 377 | const P = CE.beginComputePass() 378 | P.setPipeline(PT_PIPELINE) 379 | P.setBindGroup(0, PT_I_BGS[PP_IDX]) 380 | P.setBindGroup(1, PT_O_BGS[PP_IDX]) 381 | P.setBindGroup(2, PT_BVH_BG) 382 | P.setBindGroup(3, PT_UNI_BG) 383 | P.dispatchWorkgroups(Math.ceil(CANVAS.w / 8), Math.ceil(CANVAS.h / 8)) 384 | P.end() 385 | 386 | device.queue.submit([CE.finish()]) 387 | 388 | await device.queue.onSubmittedWorkDone() 389 | 390 | return 391 | } 392 | 393 | async function draw() { 394 | const PP_IDX = ctr % 2 395 | 396 | const CE = device.createCommandEncoder() 397 | const P = CE.beginRenderPass({ 398 | colorAttachments: [ 399 | { 400 | view: CANVAS.ctx.getCurrentTexture().createView(), 401 | clearValue: {r: 1., g: 0., b: 0., a: 1.}, 402 | loadOp: "clear", 403 | storeOp: "store" 404 | } 405 | ] 406 | }) 407 | P.setPipeline(DRAW_PIPELINE) 408 | P.setBindGroup(0, DRAW_BGS[PP_IDX]) 409 | P.draw(6) 410 | P.end() 411 | 412 | device.queue.submit([CE.finish()]) 413 | 414 | await device.queue.onSubmittedWorkDone() 415 | 416 | return 417 | } 418 | 419 | function rotateView() { 420 | bReset = true 421 | rot += Math.PI / 4 422 | position = [ 423 | lookAt[0] + Math.cos(rot) * dist, 424 | lookAt[1] + Math.sin(rot) * dist, 425 | lookAt[2] 426 | ] 427 | } 428 | 429 | function SRC() { 430 | let CS = /* wgsl */ ` 431 | @group(0) @binding(0) var otex : texture_2d; 432 | @group(0) @binding(1) var dtex : texture_2d; 433 | @group(0) @binding(2) var ttex : texture_2d; 434 | @group(0) @binding(3) var btex : texture_2d; 435 | 436 | @group(1) @binding(0) var oout : texture_storage_2d; 437 | @group(1) @binding(1) var dout : texture_storage_2d; 438 | @group(1) @binding(2) var tout : texture_storage_2d; 439 | @group(1) @binding(3) var bout : texture_storage_2d; 440 | 441 | struct BVHNode { 442 | aabb_l_min : vec3f, 443 | l_child : i32, 444 | aabb_l_max : vec3f, 445 | f_1 : i32, 446 | aabb_r_min : vec3f, 447 | r_child : i32, 448 | aabb_r_max : vec3f, 449 | f_2 : i32 450 | }; 451 | 452 | struct Triangle { 453 | v0 : vec3f, 454 | v1 : vec3f, 455 | v2 : vec3f 456 | }; 457 | 458 | @group(2) @binding(0) var bvh : array; 459 | @group(2) @binding(1) var tri : array; 460 | 461 | struct Uniforms { 462 | pos : vec3f, 463 | rst : f32, 464 | lat : vec3f, 465 | }; 466 | 467 | @group(3) @binding(0) var uniforms : Uniforms; 468 | 469 | const Pi = 3.14159265358979323846; 470 | const InvPi = 0.31830988618379067154; 471 | const Inv2Pi = 0.15915494309189533577; 472 | const Inv4Pi = 0.07957747154594766788; 473 | const PiOver2 = 1.57079632679489661923; 474 | const PiOver4 = 0.78539816339744830961; 475 | const Sqrt2 = 1.41421356237309504880; 476 | 477 | const sw_f : vec2f = vec2f(${CANVAS.w}., ${CANVAS.h}.); 478 | const sw_u : vec2u = vec2u(${CANVAS.w}u, ${CANVAS.h}u); 479 | 480 | const fov : f32 = 60.f; 481 | const sinfov : f32 = sin(.5 * fov * Pi / 180.f); 482 | const aspect : f32 = ${CANVAS.w / CANVAS.h}f; 483 | 484 | const eps : f32 = .0001; 485 | 486 | const mbounce : f32 = 5.; 487 | 488 | struct RayHit { 489 | norm : vec3f, 490 | dist : f32 491 | }; 492 | 493 | var stack : array; 494 | 495 | fn intersect_bvh(o_in : vec3f, d_in : vec3f) -> RayHit { 496 | 497 | var o : vec3f = o_in; 498 | var d : vec3f = d_in; 499 | 500 | // (lazy) fix for divide by zero errors - change later 501 | d += vec3f(abs(d) < vec3f(.00001)) * vec3f(.00001); 502 | 503 | var dist : f32 = 1e30f; 504 | var norm : vec3f = vec3f(0.f); 505 | 506 | var stack_ptr : i32 = 0; 507 | var node_idx : i32 = 0; 508 | 509 | while (stack_ptr >= 0) { 510 | // we are testing against a leaf node 511 | if (node_idx < 0) { 512 | var tr : Triangle = tri[-(node_idx + 1)]; 513 | 514 | var n_dis : vec4f = tri_intersect(o, d, tr); 515 | 516 | if (n_dis.w > 0.f && n_dis.w < dist) { 517 | norm = n_dis.xyz; 518 | dist = min(n_dis.w, dist); 519 | } 520 | 521 | stack_ptr -= 1; 522 | node_idx = stack[stack_ptr]; 523 | } else { 524 | var node : BVHNode = bvh[node_idx]; 525 | 526 | var l_dist : f32 = aabb_intersect( 527 | node.aabb_l_min, 528 | node.aabb_l_max, 529 | o, d 530 | ); 531 | 532 | var r_dist : f32 = aabb_intersect( 533 | node.aabb_r_min, 534 | node.aabb_r_max, 535 | o, d 536 | ); 537 | 538 | var l_valid : bool = l_dist != -1e30f && l_dist < dist; 539 | var r_valid : bool = r_dist != -1e30f && r_dist < dist; 540 | 541 | if (l_valid && r_valid) { 542 | var f_idx : i32; 543 | var c_idx : i32; 544 | 545 | if (l_dist < r_dist) { 546 | c_idx = node.l_child; 547 | f_idx = node.r_child; 548 | } else { 549 | c_idx = node.r_child; 550 | f_idx = node.l_child; 551 | } 552 | 553 | stack[stack_ptr] = f_idx; 554 | stack_ptr += 1; 555 | node_idx = c_idx; 556 | } else 557 | if (l_valid) { 558 | node_idx = node.l_child; 559 | } else 560 | if (r_valid) { 561 | node_idx = node.r_child; 562 | } else { 563 | stack_ptr -= 1; 564 | node_idx = stack[stack_ptr]; 565 | } 566 | } 567 | } 568 | 569 | var returned : RayHit; 570 | 571 | returned.dist = dist; 572 | 573 | if (dot(d, -norm) > 0.) { 574 | returned.norm = norm; 575 | } else { 576 | returned.norm = -norm; 577 | } 578 | 579 | if (returned.dist == 1e30f) { 580 | returned.dist = -1.f; 581 | } 582 | 583 | return returned; 584 | } 585 | 586 | // from: https://www.scratchapixel.com/lessons/3d-basic-rendering/ray-tracing-rendering-a-triangle/moller-trumbore-ray-triangle-intersection.html 587 | fn tri_intersect(o : vec3f, d : vec3f, tri : Triangle) -> vec4f { 588 | var v0v1 : vec3f = tri.v1 - tri.v0; 589 | var v0v2 : vec3f = tri.v2 - tri.v0; 590 | var pvec : vec3f = cross(d, v0v2); 591 | 592 | var det : f32 = dot(v0v1, pvec); 593 | 594 | if (abs(det) < 1e-10) { 595 | return vec4f(-1.f); 596 | } 597 | 598 | var i_det : f32 = 1.f / det; 599 | 600 | var tvec : vec3f = o - tri.v0; 601 | 602 | var u : f32 = dot(tvec, pvec) * i_det; 603 | 604 | if (u < 0.f || u > 1.f) { 605 | return vec4f(-1.f); 606 | } 607 | 608 | var qvec : vec3f = cross(tvec, v0v1); 609 | 610 | var v : f32 = dot(d, qvec) * i_det; 611 | if (v < 0.f || u + v > 1.f) { 612 | return vec4f(-1.f); 613 | } 614 | 615 | return vec4f( 616 | normalize(cross(v0v1, v0v2)), 617 | dot(v0v2, qvec) * i_det 618 | ); 619 | } 620 | 621 | fn aabb_intersect(low : vec3f, high : vec3f, o : vec3f, d : vec3f) -> f32 { 622 | var iDir = 1. / d; 623 | var f = (high - o) * iDir; var n = (low - o) * iDir; 624 | var tmax = max(f, n); var tmin = min(f, n); 625 | var t0 = max(tmin.x, max(tmin.y, tmin.z)); 626 | var t1 = min(tmax.x, min(tmax.y, tmax.z)); 627 | return select(-1e30, select(t0, -1e30, t1 < 0.), t1 >= t0); 628 | } 629 | 630 | @compute @workgroup_size(8, 8, 1) 631 | fn main(@builtin(global_invocation_id) global_id : vec3u) { 632 | if (any(global_id.xy >= sw_u)) {return;} 633 | var coord : vec2i = vec2i(global_id.xy); 634 | 635 | var o : vec4f; 636 | var d : vec4f; 637 | var t : vec4f; 638 | var b : vec4f; 639 | 640 | if (uniforms.rst == 0.) { 641 | o = textureLoad(otex, coord, 0); 642 | d = textureLoad(dtex, coord, 0); 643 | t = textureLoad(ttex, coord, 0); 644 | b = textureLoad(btex, coord, 0); 645 | } 646 | 647 | ptStep(coord, &o, &d, &b, &t); 648 | 649 | textureStore(oout, coord, o); 650 | textureStore(dout, coord, d); 651 | textureStore(tout, coord, t); 652 | textureStore(bout, coord, b); 653 | } 654 | 655 | fn ptStep(coord : vec2i, oin : ptr, din : ptr, bin : ptr, tin : ptr) { 656 | var o : vec3f = (*oin).xyz; 657 | var d : vec3f = (*din).xyz; 658 | var b : vec3f = (*bin).xyz; 659 | 660 | var seed : f32 = (*oin).a; 661 | var bounces : f32 = (*din).a; 662 | 663 | var bNewPath : bool = all(b == vec3f(0.)); 664 | var frame0 : bool = bNewPath && ((*tin).a == 0.); 665 | if (frame0) { 666 | seed = f32(baseHash(vec2u(coord))) / f32(0xffffffffu) + .008; 667 | } 668 | 669 | if (bNewPath) { 670 | getCameraRay(vec2f(coord) + rand2(seed), &o, &d); seed += 2.; 671 | b = vec3f(1.); 672 | } 673 | 674 | var res : RayHit = intersect_bvh(o, d); 675 | if (res.dist >= 0.) { 676 | var o1 : vec3f = normalize(ortho(res.norm)); 677 | var o2 : vec3f = normalize(cross(o1, res.norm)); 678 | 679 | var wo : vec3f = toLocal(o1, o2, res.norm, -d); 680 | var wi : vec3f; 681 | var c : vec3f; 682 | 683 | o = o + d * res.dist; 684 | 685 | c = lambertDiffuse(&seed, &wi, wo, vec3f(.3f)); 686 | //c = ggxSmith(&seed, &wi, wo, vec3f(.33f), .1); 687 | //c = perfectMirror(&wi, wo, vec3f(.2)); 688 | 689 | b *= c; 690 | o += res.norm * 1.01 * eps; 691 | d = toWorld(o1, o2, res.norm, wi); 692 | 693 | if (bounces > 3) { 694 | var q : f32 = max(.05f, 1. - b.y); 695 | if (rand2(seed).x < q) { 696 | b = vec3f(0.); 697 | } else { 698 | b /= 1. - q; 699 | } seed += 2.; 700 | } 701 | 702 | if (all(b == vec3f(0.))) { 703 | *tin += vec4f(0., 0., 0., 1.); 704 | bounces = -1.; 705 | } 706 | } else { 707 | *tin += vec4f(b * 8., 1.); 708 | bounces = -1.; 709 | b = vec3f(0.); 710 | } 711 | 712 | *oin = vec4f(o, seed); 713 | *din = vec4f(d, bounces + 1.); 714 | *bin = vec4f(b, 1.); 715 | } 716 | 717 | fn lambertDiffuse(seed : ptr, wi : ptr, wo : vec3f, c : vec3f) -> vec3f { 718 | *wi = cosineSampleHemisphere(rand2(*seed)); *seed += 2.; 719 | return pow(c, vec3f(2.2)); 720 | } 721 | 722 | fn getCameraRay(coord : vec2f, o : ptr, d : ptr) { 723 | var sspace : vec2f = coord / sw_f; sspace = sspace * 2. - vec2f(1.); sspace.y *= -1.; 724 | var local : vec3f = vec3f( 725 | aspect * sspace.x * sinfov, 726 | 1., 727 | sspace.y * sinfov 728 | ); 729 | var forward : vec3f = normalize(vec3f(uniforms.lat - uniforms.pos)); 730 | var right : vec3f = normalize(vec3f(forward.y, -forward.x, 0.)); 731 | var up : vec3f = cross(right, forward); 732 | 733 | *o = uniforms.pos; 734 | *d = toWorld(right, forward, up, normalize(local)); 735 | } 736 | 737 | fn ortho(v : vec3) -> vec3 { 738 | if (abs(v.x) > abs(v.y)) { 739 | return vec3(-v.y, v.x, 0.); 740 | } 741 | return vec3(0., -v.z, v.y); 742 | } 743 | 744 | fn toLocal(v_x : vec3f, v_y : vec3f, v_z : vec3f, w : vec3f) -> vec3f { 745 | return vec3f(dot(v_x, w), dot(v_y, w), dot(v_z, w)); 746 | } 747 | 748 | fn toWorld(v_x : vec3f, v_y : vec3f, v_z : vec3f, w : vec3f) -> vec3f { 749 | return v_x * w.x + v_y * w.y + v_z * w.z; 750 | } 751 | 752 | //GPU hashes from: https://www.shadertoy.com/view/XlycWh 753 | fn baseHash(p : vec2u) -> u32 { 754 | var p2 : vec2u = 1103515245u*((p >> vec2u(1u))^(p.yx)); 755 | var h32 : u32 = 1103515245u*((p2.x)^(p2.y>>3u)); 756 | return h32^(h32 >> 16u); 757 | } 758 | fn rand2(seed : f32) -> vec2f { 759 | var n : u32 = baseHash(bitcast(vec2f(seed + 1., seed + 2.))); 760 | var rz : vec2u = vec2u(n, n * 48271u); 761 | return vec2f(rz.xy & vec2u(0x7fffffffu))/f32(0x7fffffff); 762 | } 763 | 764 | //from: pbrt 765 | fn cosineSampleHemisphere(r2 : vec2f) -> vec3f { 766 | var d : vec2f = uniformSampleDisk(r2); 767 | var z : f32 = sqrt(max(0., 1. - d.x * d.x - d.y * d.y)); 768 | return vec3f(d.xy, z); 769 | } 770 | fn uniformSampleDisk(r2 : vec2f) -> vec2f { 771 | var r : f32 = sqrt(max(r2.x, 0.)); 772 | var theta : f32 = 2. * Pi * r2.y; 773 | return vec2f(r * cos(theta), r * sin(theta)); 774 | }` 775 | 776 | let VS = /* wgsl */ ` 777 | @vertex 778 | fn vs(@builtin(vertex_index) vertexIndex : u32) -> @builtin(position) vec4f { 779 | switch(vertexIndex) { 780 | case 0u: { 781 | return vec4f(1., 1., 0., 1.);} 782 | case 1u: { 783 | return vec4f(-1., 1., 0., 1.);} 784 | case 2u: { 785 | return vec4f(-1., -1., 0., 1.);} 786 | case 3u: { 787 | return vec4f(1., -1., 0., 1.);} 788 | case 4u: { 789 | return vec4f(1., 1., 0., 1.);} 790 | case 5u: { 791 | return vec4f(-1., -1., 0., 1.);} 792 | default: { 793 | return vec4f(0., 0., 0., 0.);} 794 | } 795 | }` 796 | 797 | let FS = /* wgsl */ ` 798 | @group(0) @binding(0) var image : texture_2d; 799 | 800 | fn lum(z : vec3f) -> f32 { 801 | return dot(z, vec3f(.2126, .7152, .0722)); 802 | } 803 | 804 | @fragment 805 | fn fs(@builtin(position) fragCoord : vec4f) -> @location(0) vec4f { 806 | var raw : vec4f = textureLoad(image, vec2i(fragCoord.xy), 0); 807 | var col : vec3f = raw.xyz / raw.a; 808 | 809 | // apply reinhard tonemap 810 | col = col / (1.f + lum(col)); 811 | 812 | return vec4f( 813 | pow(col, vec3f(1. / 2.2)), 814 | 1. 815 | ); 816 | }` 817 | 818 | return { CS, VS, FS } 819 | } 820 | } 821 | 822 | 823 | function initCanvas(device, canvas) { 824 | let ctx = canvas.getContext("webgpu") 825 | 826 | let presentationFormat = navigator.gpu.getPreferredCanvasFormat() 827 | ctx.configure({device, format: presentationFormat}) 828 | 829 | const w = Math.ceil(canvas.clientWidth * 1.5) 830 | const h = Math.ceil(canvas.clientHeight * 1.5) 831 | 832 | canvas.width = w 833 | canvas.height = h 834 | 835 | return { 836 | ctx, presentationFormat, w, h 837 | } 838 | } --------------------------------------------------------------------------------