├── README.md
├── index.html
├── media
└── demo.gif
└── scripts
├── bvh
├── build-bvh.js
└── kernels
│ ├── aabb-z-idx.js
│ ├── bvh-up-pass.js
│ ├── radix-sort.js
│ ├── radix-tree.js
│ └── rearrange.js
├── main.js
├── obj
├── obj-file-parser.js
└── parse-obj.js
└── pathtracer
└── pathtracer.js
/README.md:
--------------------------------------------------------------------------------
1 | # WEBGPU LBVH demo
2 |
3 | This demo builds a linear BVH on the GPU using compute shaders based on the algorithm in [this paper](https://research.nvidia.com/sites/default/files/pubs/2012-06_Maximizing-Parallelism-in/karras2012hpg_paper.pdf).
4 |
5 | 
6 |
7 | You can try the demo [here](https://addisonprairie.github.io/WebGPU-LVBH-demo/) - just drag and drop an .obj model. It uses [this repository](https://github.com/WesUnwin/obj-file-parser) for parsing .obj files.
--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
WEBGPU BVH DEMO
40 |
Uses WebGPU compute shaders to build a BVH and renders the result with path tracing. Drag and drop an .obj file to begin.
41 | Triangles : {
2,000,000 }
42 | Parse File: {
0.000001s }
43 | Build BVH : {
0.000001s }
44 |
[ Rotate View ]
45 |
46 |
47 |
48 |
--------------------------------------------------------------------------------
/media/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AddisonPrairie/WebGPU-LVBH-demo/afd0b8f946f56827b41a86c34f6222e137777027/media/demo.gif
--------------------------------------------------------------------------------
/scripts/bvh/build-bvh.js:
--------------------------------------------------------------------------------
1 | function initBVHBuild(device) {
2 |
3 | // initialize all other shaders
4 | const radixSortKernel = initRadixSortKernel(device)
5 | const radixTreeKernel = initRadixTreeKernel(device)
6 | const aabb_ZidxKernel = initAABB_ZidxKernel(device)
7 | const bvhUpPassKernel = initBVHUpPassKernel(device)
8 | const rearrangeKernel = initRearrangeKernel(device)
9 |
10 | return { build }
11 |
12 | async function build(TRI_ARRAY, NUM_TRIS, MODEL_BOUNDS) {
13 | // create GPU triangle buffer and copy values to it
14 | const I_TRIANGE_BUFFER = device.createBuffer({
15 | size: NUM_TRIS * 48,
16 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
17 | mappedAtCreation: true
18 | })
19 |
20 | new Float32Array(I_TRIANGE_BUFFER.getMappedRange()).set(TRI_ARRAY)
21 | I_TRIANGE_BUFFER.unmap()
22 |
23 | // compute AABB and morton code for each triangle
24 | const { AABB_BUFFER, Z_IDX_BUFFER } = await aabb_ZidxKernel.execute(
25 | I_TRIANGE_BUFFER,
26 | NUM_TRIS,
27 | MODEL_BOUNDS
28 | )
29 |
30 | // sort the morton code buffer and store how indices change
31 | const { IDX_BUFFER } = await radixSortKernel.execute(
32 | Z_IDX_BUFFER,
33 | NUM_TRIS
34 | )
35 |
36 | // compute the radix tree over the morton codes
37 | const { PARENT_BUFFER } = await radixTreeKernel.execute(
38 | Z_IDX_BUFFER,
39 | NUM_TRIS
40 | )
41 |
42 | // combine all information from previous passes into BVH
43 | const { BVH_BUFFER } = await bvhUpPassKernel.execute(
44 | IDX_BUFFER,
45 | AABB_BUFFER,
46 | PARENT_BUFFER,
47 | NUM_TRIS
48 | )
49 |
50 |
51 | // rearrange the triangles
52 | const { O_TRIANGLE_BUFFER } = await rearrangeKernel.execute(
53 | I_TRIANGE_BUFFER,
54 | IDX_BUFFER,
55 | NUM_TRIS
56 | )
57 |
58 | // free all buffers that are not input/output
59 | AABB_BUFFER.destroy()
60 | Z_IDX_BUFFER.destroy()
61 | PARENT_BUFFER.destroy()
62 | IDX_BUFFER.destroy()
63 | I_TRIANGE_BUFFER.destroy()
64 |
65 | return { BVH_BUFFER, O_TRIANGLE_BUFFER }
66 | }
67 | }
--------------------------------------------------------------------------------
/scripts/bvh/kernels/aabb-z-idx.js:
--------------------------------------------------------------------------------
1 |
2 | function initAABB_ZidxKernel(device) {
3 | // shader parameters
4 | const WG_SIZE = 64
5 |
6 | // create bind group layout, shader module and pipeline
7 | const BG_LAYOUT = device.createBindGroupLayout({
8 | entries: [
9 | {
10 | binding: 0,
11 | visibility: GPUShaderStage.COMPUTE,
12 | buffer: {
13 | type: "storage"
14 | }
15 | },
16 | {
17 | binding: 1,
18 | visibility: GPUShaderStage.COMPUTE,
19 | buffer: {
20 | type: "storage"
21 | }
22 | },
23 | {
24 | binding: 2,
25 | visibility: GPUShaderStage.COMPUTE,
26 | buffer: {
27 | type: "storage"
28 | }
29 | },
30 | {
31 | binding: 3,
32 | visibility: GPUShaderStage.COMPUTE,
33 | buffer: {
34 | type: "uniform"
35 | }
36 | }
37 | ]
38 | })
39 |
40 | const SM = device.createShaderModule({
41 | code: SRC(),
42 | label: "AABB/Z-index shader module"
43 | })
44 |
45 | const PIPELINE = device.createComputePipeline({
46 | layout: device.createPipelineLayout({
47 | bindGroupLayouts: [BG_LAYOUT]
48 | }),
49 | compute: {
50 | module: SM,
51 | entryPoint: "compute_aabb_z_idx"
52 | }
53 | })
54 |
55 | return { execute }
56 |
57 | async function execute(TRIANGLE_BUFFER, size, bounds) {
58 | if (TRIANGLE_BUFFER.size != 48 * size) {
59 | console.warn(`in AABB/Z-index: buffer size [ ${TRIANGLE_BUFFER.size} ] does not match requested size [ ${size} ]`)
60 | return
61 | }
62 |
63 | // create all the necessary buffers
64 | const AABB_BUFFER = device.createBuffer({
65 | size: size * 32,
66 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
67 | })
68 | const Z_IDX_BUFFER = device.createBuffer({
69 | size: size * 4,
70 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
71 | })
72 | const UNIFORM_BUFFER = device.createBuffer({
73 | size: 32,
74 | usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
75 | })
76 |
77 | // create the bind group
78 | const BG = device.createBindGroup({
79 | layout: BG_LAYOUT,
80 | entries: [
81 | {
82 | binding: 0,
83 | visibility: GPUShaderStage.COMPUTE,
84 | resource: {
85 | buffer: TRIANGLE_BUFFER
86 | }
87 | },
88 | {
89 | binding: 1,
90 | visibility: GPUShaderStage.COMPUTE,
91 | resource: {
92 | buffer: AABB_BUFFER
93 | }
94 | },
95 | {
96 | binding: 2,
97 | visibility: GPUShaderStage.COMPUTE,
98 | resource: {
99 | buffer: Z_IDX_BUFFER
100 | }
101 | },
102 | {
103 | binding: 3,
104 | visibility: GPUShaderStage.COMPUTE,
105 | resource: {
106 | buffer: UNIFORM_BUFFER
107 | }
108 | }
109 | ]
110 | })
111 |
112 | {// send work to GPU
113 | const BUFF = new ArrayBuffer(32)
114 | const DV = new DataView(BUFF)
115 |
116 | DV.setFloat32( 0, bounds.min[0], true)
117 | DV.setFloat32( 4, bounds.min[1], true)
118 | DV.setFloat32( 8, bounds.min[2], true)
119 |
120 | DV.setFloat32(16, bounds.max[0], true)
121 | DV.setFloat32(20, bounds.max[1], true)
122 | DV.setFloat32(24, bounds.max[2], true)
123 |
124 | DV.setInt32(12, size, true)
125 |
126 | device.queue.writeBuffer(
127 | UNIFORM_BUFFER,
128 | 0,
129 | BUFF,
130 | 0,
131 | 32
132 | )
133 |
134 | const CE = device.createCommandEncoder()
135 | const P = CE.beginComputePass()
136 |
137 | P.setPipeline(PIPELINE)
138 | P.setBindGroup(0, BG)
139 | P.dispatchWorkgroups(Math.ceil(size / WG_SIZE))
140 | P.end()
141 |
142 | device.queue.submit([CE.finish()])
143 | }
144 |
145 | await device.queue.onSubmittedWorkDone()
146 |
147 | return { AABB_BUFFER, Z_IDX_BUFFER }
148 | }
149 |
150 | function SRC() {
151 | return /* wgsl */ `
152 |
153 | struct Triangle {
154 | v0 : vec3f,
155 | v1 : vec3f,
156 | v2 : vec3f
157 | };
158 |
159 | struct AABB {
160 | min : vec3f,
161 | max : vec3f
162 | };
163 |
164 | struct Uniforms {
165 | min : vec3f,
166 | num : i32,
167 | max : vec3f,
168 | f_1 : i32
169 | };
170 |
171 | @group(0) @binding(0) var triangles : array;
172 | @group(0) @binding(1) var aabbs : array;
173 | @group(0) @binding(2) var z_indexes : array;
174 | @group(0) @binding(3) var uniforms : Uniforms;
175 |
176 | @compute @workgroup_size(${WG_SIZE})
177 | fn compute_aabb_z_idx(@builtin(global_invocation_id) global_id : vec3u) {
178 | var idx : i32 = i32(global_id.x);
179 | if (idx >= uniforms.num) {
180 | return;
181 | }
182 |
183 | var tri : Triangle = triangles[idx];
184 |
185 | var box : AABB;
186 | box.min = min(tri.v0, min(tri.v1, tri.v2));
187 | box.max = max(tri.v0, max(tri.v1, tri.v2));
188 |
189 | aabbs[idx] = box;
190 |
191 | var cen : vec3f = (box.max + box.min) * .5f;
192 | var rel : vec3f = (cen - uniforms.min) / (uniforms.max - uniforms.min);
193 |
194 | z_indexes[idx] = morton_code(vec3u(rel * 1023.99f));
195 | }
196 |
197 | fn morton_code(upos : vec3u) -> u32 {
198 | return split_3(upos.x) | (split_3(upos.y) << 1) | (split_3(upos.z) << 2);
199 | }
200 |
201 | // from: https://stackoverflow.com/questions/1024754/how-to-compute-a-3d-morton-number-interleave-the-bits-of-3-ints
202 | fn split_3(u : u32) -> u32 {
203 | var x : u32 = u;
204 | x = (x | (x << 16)) & 0x030000FFu;
205 | x = (x | (x << 8)) & 0x0300F00Fu;
206 | x = (x | (x << 4)) & 0x030C30C3u;
207 | x = (x | (x << 2)) & 0x09249249u;
208 | return x;
209 | }`
210 | }
211 | }
--------------------------------------------------------------------------------
/scripts/bvh/kernels/bvh-up-pass.js:
--------------------------------------------------------------------------------
1 |
2 | function initBVHUpPassKernel(device) {
3 | // shader parameters
4 | const WG_SIZE = 64
5 |
6 | // create bind group layouts, shader module and pipeline
7 | const BG_LAYOUTS = [
8 | device.createBindGroupLayout({
9 | entries: [
10 | {
11 | binding: 0,
12 | visibility: GPUShaderStage.COMPUTE,
13 | buffer: {
14 | type: "storage"
15 | }
16 | },
17 | {
18 | binding: 1,
19 | visibility: GPUShaderStage.COMPUTE,
20 | buffer: {
21 | type: "storage"
22 | }
23 | },
24 | {
25 | binding: 2,
26 | visibility: GPUShaderStage.COMPUTE,
27 | buffer: {
28 | type: "storage"
29 | }
30 | }
31 | ]
32 | }),
33 | device.createBindGroupLayout({
34 | entries: [
35 | {
36 | binding: 0,
37 | visibility: GPUShaderStage.COMPUTE,
38 | buffer: {
39 | type: "storage"
40 | }
41 | },
42 | {
43 | binding: 1,
44 | visibility: GPUShaderStage.COMPUTE,
45 | buffer: {
46 | type: "uniform"
47 | }
48 | },
49 | ]
50 | })
51 | ]
52 |
53 | const SM = device.createShaderModule({
54 | code: SRC(),
55 | label: "radix tree shader module"
56 | })
57 |
58 | const PIPELINE = device.createComputePipeline({
59 | layout: device.createPipelineLayout({
60 | bindGroupLayouts: BG_LAYOUTS
61 | }),
62 | compute: {
63 | module: SM,
64 | entryPoint: "bvh_upward_pass"
65 | }
66 | })
67 |
68 | return { execute }
69 |
70 | async function execute(IDX_BUFFER, AABB_BUFFER, PARENT_BUFFER, size) {
71 | // create all the necessary buffers
72 | const BVH_BUFFER = device.createBuffer({
73 | size: size * 64,
74 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
75 | })
76 | const UNIFORM_BUFFER = device.createBuffer({
77 | size: 16,
78 | usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
79 | })
80 |
81 | const BGS = [
82 | device.createBindGroup({
83 | layout: BG_LAYOUTS[0],
84 | entries: [
85 | {
86 | binding: 0,
87 | visibility: GPUShaderStage.COMPUTE,
88 | resource: {
89 | buffer: IDX_BUFFER
90 | }
91 | },
92 | {
93 | binding: 1,
94 | visibility: GPUShaderStage.COMPUTE,
95 | resource: {
96 | buffer: AABB_BUFFER
97 | }
98 | },
99 | {
100 | binding: 2,
101 | visibility: GPUShaderStage.COMPUTE,
102 | resource: {
103 | buffer: PARENT_BUFFER
104 | }
105 | }
106 | ]
107 | }),
108 | device.createBindGroup({
109 | layout: BG_LAYOUTS[1],
110 | entries: [
111 | {
112 | binding: 0,
113 | visibility: GPUShaderStage.COMPUTE,
114 | resource: {
115 | buffer: BVH_BUFFER
116 | }
117 | },
118 | {
119 | binding: 1,
120 | visibility: GPUShaderStage.COMPUTE,
121 | resource: {
122 | buffer: UNIFORM_BUFFER
123 | }
124 | }
125 | ]
126 | })
127 | ]
128 |
129 | {// send work to GPU
130 | device.queue.writeBuffer(
131 | UNIFORM_BUFFER,
132 | 0,
133 | new Int32Array([
134 | size,
135 | 0,
136 | 0,
137 | 0
138 | ])
139 | )
140 |
141 | const CE = device.createCommandEncoder()
142 | const P = CE.beginComputePass()
143 |
144 | P.setPipeline(PIPELINE)
145 | P.setBindGroup(0, BGS[0])
146 | P.setBindGroup(1, BGS[1])
147 | P.dispatchWorkgroups(Math.ceil(size / WG_SIZE))
148 | P.end()
149 |
150 | device.queue.submit([CE.finish()])
151 | }
152 |
153 | await device.queue.onSubmittedWorkDone()
154 |
155 | return { BVH_BUFFER }
156 | }
157 |
158 | function SRC() {
159 | return /* wgsl */ `
160 |
161 | struct BVHNode {
162 | aabb_l_min_x : atomic,
163 | aabb_l_min_y : atomic,
164 | aabb_l_min_z : atomic,
165 | l_child : atomic,
166 | aabb_l_max_x : atomic,
167 | aabb_l_max_y : atomic,
168 | aabb_l_max_z : atomic,
169 | f_1 : atomic, // Used for synchronization
170 | aabb_r_min_x : atomic,
171 | aabb_r_min_y : atomic,
172 | aabb_r_min_z : atomic,
173 | r_child : atomic,
174 | aabb_r_max_x : atomic,
175 | aabb_r_max_y : atomic,
176 | aabb_r_max_z : atomic,
177 | f_2 : atomic
178 | };
179 |
180 | struct AABB {
181 | min : vec3f,
182 | max : vec3f
183 | };
184 |
185 | struct Uniforms {
186 | num : i32,
187 | f_1 : i32,
188 | f_2 : i32,
189 | f_3 : i32
190 | };
191 |
192 | @group(0) @binding(0) var idx_arr : array;
193 | @group(0) @binding(1) var aabb_arr : array;
194 | @group(0) @binding(2) var par_arr : array;
195 |
196 | @group(1) @binding(0) var bvh : array;
197 | @group(1) @binding(1) var uniforms : Uniforms;
198 |
199 | @compute @workgroup_size(${WG_SIZE})
200 | fn bvh_upward_pass(@builtin(global_invocation_id) global_id : vec3u) {
201 | var idx : i32 = i32(global_id.x);
202 | if (idx >= uniforms.num) {
203 | return;
204 | }
205 |
206 | var bbox : AABB = aabb_arr[idx_arr[idx]];
207 |
208 | // slightly perturb the bounding box position for check on line ~266
209 | bbox.min -= vec3f(bbox.min == vec3f(0.)) * vec3f(1e-8f);
210 | bbox.max += vec3f(bbox.max == vec3f(0.)) * vec3f(1e-8f);
211 |
212 | var c_idx : i32 = idx;
213 | var w_idx : i32 = -(idx + 1);
214 | var level : i32 = 0;
215 |
216 | var bSkipped : bool = false;
217 |
218 | while ((w_idx != 0 || level == 0) && !bSkipped) {
219 | var p_idx : i32;
220 | if (level == 0) {
221 | p_idx = par_arr[c_idx + uniforms.num];
222 | } else {
223 | p_idx = par_arr[c_idx];
224 | }
225 |
226 | if (!bSkipped) {
227 | var sibling : i32;
228 |
229 | if (!bSkipped) {
230 | sibling = atomicAdd(&bvh[p_idx].f_1, 1);
231 | }
232 |
233 | if (sibling == 0 && !bSkipped) {
234 | atomicStore(&bvh[p_idx].aabb_l_min_x, bitcast(bbox.min.x));
235 | atomicStore(&bvh[p_idx].aabb_l_min_y, bitcast(bbox.min.y));
236 | atomicStore(&bvh[p_idx].aabb_l_min_z, bitcast(bbox.min.z));
237 | atomicStore(&bvh[p_idx].aabb_l_max_x, bitcast(bbox.max.x));
238 | atomicStore(&bvh[p_idx].aabb_l_max_y, bitcast(bbox.max.y));
239 | atomicStore(&bvh[p_idx].aabb_l_max_z, bitcast(bbox.max.z));
240 | atomicStore(&bvh[p_idx].l_child, w_idx);
241 |
242 | bSkipped = true;
243 | }
244 |
245 | if (sibling != 0 && !bSkipped) {
246 | atomicStore(&bvh[p_idx].aabb_r_min_x, bitcast(bbox.min.x));
247 | atomicStore(&bvh[p_idx].aabb_r_min_y, bitcast(bbox.min.y));
248 | atomicStore(&bvh[p_idx].aabb_r_min_z, bitcast(bbox.min.z));
249 | atomicStore(&bvh[p_idx].aabb_r_max_x, bitcast(bbox.max.x));
250 | atomicStore(&bvh[p_idx].aabb_r_max_y, bitcast(bbox.max.y));
251 | atomicStore(&bvh[p_idx].aabb_r_max_z, bitcast(bbox.max.z));
252 | atomicStore(&bvh[p_idx].r_child, w_idx);
253 |
254 | var l_min : vec3f = vec3f(
255 | bitcast(atomicLoad(&bvh[p_idx].aabb_l_min_x)),
256 | bitcast(atomicLoad(&bvh[p_idx].aabb_l_min_y)),
257 | bitcast(atomicLoad(&bvh[p_idx].aabb_l_min_z))
258 | );
259 | var l_max : vec3f = vec3f(
260 | bitcast(atomicLoad(&bvh[p_idx].aabb_l_max_x)),
261 | bitcast(atomicLoad(&bvh[p_idx].aabb_l_max_y)),
262 | bitcast(atomicLoad(&bvh[p_idx].aabb_l_max_z))
263 | );
264 |
265 | // don't do anything if the other is not loaded yet
266 | if (any(l_min == vec3f(0.)) || any(l_max == vec3f(0.))) {
267 | continue;
268 | }
269 |
270 | bbox.min = min(bbox.min, l_min);
271 | bbox.max = max(bbox.max, l_max);
272 |
273 | // Move to parent
274 | c_idx = p_idx;
275 | w_idx = p_idx;
276 | level += 1;
277 | }
278 | }
279 | }
280 | }`
281 | }
282 | }
--------------------------------------------------------------------------------
/scripts/bvh/kernels/radix-sort.js:
--------------------------------------------------------------------------------
1 |
2 | function initRadixSortKernel(device) {
3 | // create bind group layouts
4 | const SCAN_UP_BG_LAYOUTS = [
5 | device.createBindGroupLayout({
6 | entries: [
7 | {
8 | binding: 0,
9 | visibility: GPUShaderStage.COMPUTE,
10 | buffer: {
11 | type: "storage"
12 | }
13 | },
14 | {
15 | binding: 1,
16 | visibility: GPUShaderStage.COMPUTE,
17 | buffer: {
18 | type: "storage"
19 | }
20 | }
21 | ]
22 | }),
23 | device.createBindGroupLayout({
24 | entries: [
25 | {
26 | binding: 0,
27 | visibility: GPUShaderStage.COMPUTE,
28 | buffer: {
29 | type: "uniform"
30 | }
31 | }
32 | ]
33 | })
34 | ]
35 |
36 | const INPUT_L_BG_LAYOUTS = [
37 | device.createBindGroupLayout({
38 | entries: [
39 | {
40 | binding: 0,
41 | visibility: GPUShaderStage.COMPUTE,
42 | buffer: {
43 | type: "storage"
44 | }
45 | },
46 | {
47 | binding: 1,
48 | visibility: GPUShaderStage.COMPUTE,
49 | buffer: {
50 | type: "storage"
51 | }
52 | },
53 | {
54 | binding: 2,
55 | visibility: GPUShaderStage.COMPUTE,
56 | buffer: {
57 | type: "storage"
58 | }
59 | },
60 | {
61 | binding: 3,
62 | visibility: GPUShaderStage.COMPUTE,
63 | buffer: {
64 | type: "storage"
65 | }
66 | }
67 | ]
68 | }),
69 | device.createBindGroupLayout({
70 | entries: [
71 | {
72 | binding: 0,
73 | visibility: GPUShaderStage.COMPUTE,
74 | buffer: {
75 | type: "storage"
76 | }
77 | },
78 | {
79 | binding: 1,
80 | visibility: GPUShaderStage.COMPUTE,
81 | buffer: {
82 | type: "storage"
83 | }
84 | },
85 | {
86 | binding: 2,
87 | visibility: GPUShaderStage.COMPUTE,
88 | buffer: {
89 | type: "storage"
90 | }
91 | }
92 | ]
93 | }),
94 | device.createBindGroupLayout({
95 | entries: [
96 | {
97 | binding: 0,
98 | visibility: GPUShaderStage.COMPUTE,
99 | buffer: {
100 | type: "uniform"
101 | }
102 | }
103 | ]
104 | })
105 | ]
106 |
107 | // compile shaders
108 | const SCAN_UP_SM = device.createShaderModule({
109 | code: SCAN_UP_SRC(),
110 | label: "scan up shader module"
111 | })
112 |
113 | const INPUT_L_SM = device.createShaderModule({
114 | code: INPUT_L_SRC(),
115 | label: "input level shader module"
116 | })
117 |
118 | // create pipelines
119 | const INIT_IDX_PIPELINE = device.createComputePipeline({
120 | layout: device.createPipelineLayout({
121 | bindGroupLayouts: INPUT_L_BG_LAYOUTS
122 | }),
123 | compute: {
124 | module: INPUT_L_SM,
125 | entryPoint: "init_idx"
126 | }
127 | })
128 |
129 | const INIT_OFF_PIPELINE = device.createComputePipeline({
130 | layout: device.createPipelineLayout({
131 | bindGroupLayouts: INPUT_L_BG_LAYOUTS
132 | }),
133 | compute: {
134 | module: INPUT_L_SM,
135 | entryPoint: "init_off"
136 | }
137 | })
138 |
139 | const L_SCAN_PIPELINE = device.createComputePipeline({
140 | layout: device.createPipelineLayout({
141 | bindGroupLayouts: INPUT_L_BG_LAYOUTS
142 | }),
143 | compute: {
144 | module: INPUT_L_SM,
145 | entryPoint: "scan_and_sort"
146 | }
147 | })
148 |
149 | const SCAN_UP_PIPELINE = device.createComputePipeline({
150 | layout: device.createPipelineLayout({
151 | bindGroupLayouts: SCAN_UP_BG_LAYOUTS
152 | }),
153 | compute: {
154 | module: SCAN_UP_SM,
155 | entryPoint: "scan_up"
156 | }
157 | })
158 |
159 | return { execute }
160 |
161 | // takes as input a buffer of u32's returns a buffer with keys rearranged - is destructive to the buffer!
162 | async function execute(valBuffer, size) {
163 | if (valBuffer.size != size * 4) {
164 | console.warning(`in radix sort: buffer size [ ${valBuffer.size} ] does not match requested size [ ${size} ]`)
165 | return
166 | }
167 |
168 | // create all necessary buffers
169 |
170 | const valBuffers = [
171 | valBuffer,
172 | device.createBuffer({
173 | size: valBuffer.size,
174 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
175 | })
176 | ]
177 |
178 | const idxBuffers = [
179 | device.createBuffer({
180 | size: valBuffer.size,
181 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
182 | }),
183 | device.createBuffer({
184 | size: valBuffer.size,
185 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
186 | })
187 | ]
188 |
189 | const l1OffsetsBuffer = device.createBuffer({
190 | size: 256 * 256 * 16,
191 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
192 | })
193 |
194 | const l2OffsetsBuffer = device.createBuffer({
195 | size: 256 * 16,
196 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
197 | })
198 |
199 | const l3OffsetsBuffer = device.createBuffer({
200 | size: 16,
201 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
202 | })
203 |
204 | const uniformBuffer = device.createBuffer({
205 | size: 16,
206 | usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
207 | })
208 |
209 | // create necessary bind groups
210 |
211 | const SCAN_UP_BGS = [
212 | device.createBindGroup({
213 | layout: SCAN_UP_BG_LAYOUTS[0],
214 | entries: [
215 | {
216 | binding: 0,
217 | visibility: GPUShaderStage.COMPUTE,
218 | resource: {
219 | buffer: l1OffsetsBuffer
220 | }
221 | },
222 | {
223 | binding: 1,
224 | visibility: GPUShaderStage.COMPUTE,
225 | resource: {
226 | buffer: l2OffsetsBuffer
227 | }
228 | }
229 | ]
230 | }),
231 | device.createBindGroup({
232 | layout: SCAN_UP_BG_LAYOUTS[0],
233 | entries: [
234 | {
235 | binding: 0,
236 | visibility: GPUShaderStage.COMPUTE,
237 | resource: {
238 | buffer: l2OffsetsBuffer
239 | }
240 | },
241 | {
242 | binding: 1,
243 | visibility: GPUShaderStage.COMPUTE,
244 | resource: {
245 | buffer: l3OffsetsBuffer
246 | }
247 | }
248 | ]
249 | }),
250 | device.createBindGroup({
251 | layout: SCAN_UP_BG_LAYOUTS[1],
252 | entries: [
253 | {
254 | binding: 0,
255 | visibility: GPUShaderStage.COMPUTE,
256 | resource: {
257 | buffer: uniformBuffer
258 | }
259 | }
260 | ]
261 | })
262 | ]
263 |
264 | const INPUT_L_BGS = [
265 | device.createBindGroup({
266 | layout: INPUT_L_BG_LAYOUTS[0],
267 | entries: [
268 | {
269 | binding: 0,
270 | visibility: GPUShaderStage.COMPUTE,
271 | resource: {
272 | buffer: idxBuffers[0]
273 | }
274 | },
275 | {
276 | binding: 1,
277 | visibility: GPUShaderStage.COMPUTE,
278 | resource: {
279 | buffer: valBuffers[0]
280 | }
281 | },
282 | {
283 | binding: 2,
284 | visibility: GPUShaderStage.COMPUTE,
285 | resource: {
286 | buffer: idxBuffers[1]
287 | }
288 | },
289 | {
290 | binding: 3,
291 | visibility: GPUShaderStage.COMPUTE,
292 | resource: {
293 | buffer: valBuffers[1]
294 | }
295 | },
296 | ]
297 | }),
298 | device.createBindGroup({
299 | layout: INPUT_L_BG_LAYOUTS[0],
300 | entries: [
301 | {
302 | binding: 0,
303 | visibility: GPUShaderStage.COMPUTE,
304 | resource: {
305 | buffer: idxBuffers[1]
306 | }
307 | },
308 | {
309 | binding: 1,
310 | visibility: GPUShaderStage.COMPUTE,
311 | resource: {
312 | buffer: valBuffers[1]
313 | }
314 | },
315 | {
316 | binding: 2,
317 | visibility: GPUShaderStage.COMPUTE,
318 | resource: {
319 | buffer: idxBuffers[0]
320 | }
321 | },
322 | {
323 | binding: 3,
324 | visibility: GPUShaderStage.COMPUTE,
325 | resource: {
326 | buffer: valBuffers[0]
327 | }
328 | },
329 | ]
330 | }),
331 | device.createBindGroup({
332 | layout: INPUT_L_BG_LAYOUTS[1],
333 | entries: [
334 | {
335 | binding: 0,
336 | visibility: GPUShaderStage.COMPUTE,
337 | resource: {
338 | buffer: l1OffsetsBuffer
339 | }
340 | },
341 | {
342 | binding: 1,
343 | visibility: GPUShaderStage.COMPUTE,
344 | resource: {
345 | buffer: l2OffsetsBuffer
346 | }
347 | },
348 | {
349 | binding: 2,
350 | visibility: GPUShaderStage.COMPUTE,
351 | resource: {
352 | buffer: l3OffsetsBuffer
353 | }
354 | }
355 | ]
356 | }),
357 | ]
358 |
359 | // initialize the index array
360 | {
361 | device.queue.writeBuffer(
362 | uniformBuffer,
363 | 0,
364 | new Uint32Array([
365 | size,
366 | 0,
367 | 0,
368 | 0
369 | ])
370 | )
371 |
372 | const CE = device.createCommandEncoder()
373 | const P = CE.beginComputePass()
374 |
375 | P.setPipeline(INIT_IDX_PIPELINE)
376 | P.setBindGroup(0, INPUT_L_BGS[0])
377 | P.setBindGroup(1, INPUT_L_BGS[2])
378 | P.setBindGroup(2, SCAN_UP_BGS[2])
379 | P.dispatchWorkgroups(Math.ceil(size / 256))
380 | P.end()
381 |
382 | device.queue.submit([CE.finish()])
383 | }
384 |
385 | // sort the given array based on the 2k, 2k + 1-th bits
386 | async function sortKthBits(k) {
387 | {// first pass - update the offsets from the first layer
388 | const CE = device.createCommandEncoder()
389 |
390 | device.queue.writeBuffer(
391 | uniformBuffer,
392 | 0,
393 | new Uint32Array([
394 | size,
395 | k,
396 | 0,
397 | 0
398 | ])
399 | )
400 |
401 | const P = CE.beginComputePass()
402 | P.setPipeline(INIT_OFF_PIPELINE)
403 | P.setBindGroup(0, INPUT_L_BGS[k % 2])
404 | P.setBindGroup(1, INPUT_L_BGS[2])
405 | P.setBindGroup(2, SCAN_UP_BGS[2])
406 | P.dispatchWorkgroups(Math.ceil(size / 256))
407 | P.end()
408 |
409 | device.queue.submit([CE.finish()])
410 | }
411 | {// second pass - scan the level 1 offsets
412 | const CE = device.createCommandEncoder()
413 |
414 | device.queue.writeBuffer(
415 | uniformBuffer,
416 | 0,
417 | new Uint32Array([
418 | Math.ceil(size / 256),
419 | k,
420 | 1,
421 | 0
422 | ])
423 | )
424 |
425 | const P = CE.beginComputePass()
426 | P.setPipeline(SCAN_UP_PIPELINE)
427 | P.setBindGroup(0, SCAN_UP_BGS[0])
428 | P.setBindGroup(1, SCAN_UP_BGS[2])
429 | P.dispatchWorkgroups(Math.ceil(size / (256 * 256)))
430 | P.end()
431 |
432 | device.queue.submit([CE.finish()])
433 | }
434 | {// third pass - scan the level 2 offsets
435 | const CE = device.createCommandEncoder()
436 |
437 | device.queue.writeBuffer(
438 | uniformBuffer,
439 | 0,
440 | new Uint32Array([
441 | Math.ceil(size / (256 * 256)),
442 | k,
443 | 2,
444 | 0
445 | ])
446 | )
447 |
448 | const P = CE.beginComputePass()
449 | P.setPipeline(SCAN_UP_PIPELINE)
450 | P.setBindGroup(0, SCAN_UP_BGS[1])
451 | P.setBindGroup(1, SCAN_UP_BGS[2])
452 | P.dispatchWorkgroups(1)
453 | P.end()
454 |
455 | device.queue.submit([CE.finish()])
456 | }
457 | {// final pass - scan and write at the first level
458 | const CE = device.createCommandEncoder()
459 |
460 | device.queue.writeBuffer(
461 | uniformBuffer,
462 | 0,
463 | new Uint32Array([
464 | size,
465 | k,
466 | 0,
467 | 0
468 | ])
469 | )
470 |
471 | const P = CE.beginComputePass()
472 | P.setPipeline(L_SCAN_PIPELINE)
473 | P.setBindGroup(0, INPUT_L_BGS[k % 2])
474 | P.setBindGroup(1, INPUT_L_BGS[2])
475 | P.setBindGroup(2, SCAN_UP_BGS[2])
476 | P.dispatchWorkgroups(Math.ceil(size / 256))
477 | P.end()
478 |
479 | device.queue.submit([CE.finish()])
480 | }
481 |
482 | await device.queue.onSubmittedWorkDone()
483 | }
484 |
485 | // run the 2-bit radix sort 16 times
486 | for (var k = 0; k < 16; k++) {
487 | await sortKthBits(k);
488 | }
489 |
490 | // destroy remaining, unused buffers
491 | uniformBuffer.destroy()
492 | valBuffers[1].destroy()
493 | idxBuffers[1].destroy()
494 | l1OffsetsBuffer.destroy()
495 | l2OffsetsBuffer.destroy()
496 | l3OffsetsBuffer.destroy()
497 |
498 | // return the two key buffers
499 | return { IDX_BUFFER : idxBuffers[0] }
500 | }
501 |
502 | function INPUT_L_SRC() {
503 | return /* wgsl */ `
504 | // bindgroup specific to interactions with the actual input
505 | @group(0) @binding(0) var idxs : array;
506 | @group(0) @binding(1) var vals : array;
507 | @group(0) @binding(2) var n_idxs : array;
508 | @group(0) @binding(3) var n_vals : array;
509 |
510 | // bindgroup with counts from intermediate steps
511 | @group(1) @binding(0) var l1_offsets : array;
512 | @group(1) @binding(1) var l2_offsets : array;
513 | @group(1) @binding(2) var l3_offsets : array;
514 |
515 | struct Uniforms {
516 | num : u32,
517 | win : u32,
518 | lvl : u32,
519 | xtr : u32
520 | };
521 |
522 | // bindgroup which stores the uniforms
523 | @group(2) @binding(0) var uniforms : Uniforms;
524 |
525 | // set idx in the buffer to just count 0, 1, 2, ...
526 | @compute @workgroup_size(64)
527 | fn init_idx(@builtin(global_invocation_id) global_id : vec3u) {
528 | for (var i : u32 = 0u; i < 4; i++) {
529 | var idx : u32 = 4u * global_id.x + i;
530 | if (idx < uniforms.num) {
531 | idxs[idx] = i32(idx);
532 | }
533 | }
534 | }
535 |
536 | var wg_count : array, 4>;
537 |
538 | // get the number of each element within each group
539 | @compute @workgroup_size(64)
540 | fn init_off(
541 | @builtin(global_invocation_id) global_id : vec3u,
542 | @builtin(local_invocation_id) local_id : vec3u
543 | ) {
544 | // loop over all of this thread's entries and tally how many are of each type
545 | var l_count : array;
546 | for (var i : u32 = 0u; i < 4; i++) {
547 | var idx : u32 = 4u * global_id.x + i;
548 | if (idx < uniforms.num) {
549 | var value : u32 = vals[idx];
550 | l_count[(value >> (2u * uniforms.win)) & 3u]++;
551 | }
552 | }
553 |
554 | // send this to workgroup memory
555 | atomicAdd(&wg_count[0], l_count[0]);
556 | atomicAdd(&wg_count[1], l_count[1]);
557 | atomicAdd(&wg_count[2], l_count[2]);
558 | atomicAdd(&wg_count[3], l_count[3]);
559 |
560 | // the last thread writes the resulting vector to global memory
561 | workgroupBarrier();
562 | if (local_id.x == 63u) {
563 | l1_offsets[global_id.x / 64u] = vec4u(
564 | atomicLoad(&wg_count[0]),
565 | atomicLoad(&wg_count[1]),
566 | atomicLoad(&wg_count[2]),
567 | atomicLoad(&wg_count[3])
568 | );
569 | }
570 | }
571 |
572 | var scan_arr : array;
573 |
574 | // scan across the workgroup locally, then reorder everything globally
575 | @compute @workgroup_size(64)
576 | fn scan_and_sort(
577 | @builtin(global_invocation_id) global_id : vec3u,
578 | @builtin(local_invocation_id) local_id : vec3u
579 | ) {
580 | var l_idx : u32 = local_id.x;
581 | var g_idx : u32 = global_id.x;
582 |
583 | // each thread reads four values from memory and performs a local scan
584 | var thread_vals : array;
585 |
586 |
587 |
588 | for (var i : u32 = 0u; i < 4; i++) {
589 | var c_idx : u32 = 4u * g_idx + i;
590 | if (c_idx < uniforms.num) {
591 | thread_vals[i] = vals[c_idx];
592 | }
593 | }
594 |
595 | // compute the offsets across the workgroup
596 | scan_arr[l_idx] = get_val_vec(thread_vals[0])
597 | + get_val_vec(thread_vals[1])
598 | + get_val_vec(thread_vals[2])
599 | + get_val_vec(thread_vals[3]);
600 | workgroupBarrier();
601 |
602 | workgroup_scan(l_idx);
603 |
604 | // compute the offsets for each element & write to memory
605 | var thread_offs : array;
606 | thread_offs[0] = scan_arr[l_idx];
607 | thread_offs[1] = thread_offs[0] + get_val_vec(thread_vals[0]);
608 | thread_offs[2] = thread_offs[1] + get_val_vec(thread_vals[1]);
609 | thread_offs[3] = thread_offs[2] + get_val_vec(thread_vals[2]);
610 |
611 | var global_offsets : vec4u;
612 | global_offsets[0u] = dot(vec4u(0u, 0u, 0u, 0u), l3_offsets[0u]);
613 | global_offsets[1u] = dot(vec4u(1u, 0u, 0u, 0u), l3_offsets[0u]);
614 | global_offsets[2u] = dot(vec4u(1u, 1u, 0u, 0u), l3_offsets[0u]);
615 | global_offsets[3u] = dot(vec4u(1u, 1u, 1u, 0u), l3_offsets[0u]);
616 |
617 | global_offsets += l1_offsets[g_idx / 64u];
618 | global_offsets += l2_offsets[g_idx / (64u * 256u)];
619 |
620 | for (var i : u32 = 0u; i < 4; i++) {
621 | var c_idx : u32 = 4u * g_idx + i;
622 | if (c_idx < uniforms.num) {
623 | var n_idx : u32 = (global_offsets + thread_offs[i])[get_val_u32(thread_vals[i])];
624 |
625 | n_idxs[n_idx] = idxs[c_idx];
626 | n_vals[n_idx] = thread_vals[i];
627 | }
628 | }
629 | }
630 |
631 | // returns which radix index this input is
632 | fn get_val_u32(input : u32) -> u32 {
633 | return (input >> (2u * uniforms.win)) & 3u;
634 | }
635 | // likewise, but for vector
636 | fn get_val_vec(input : u32) -> vec4u {
637 | var shifted = get_val_u32(input);
638 |
639 | if (shifted == 0u) {
640 | return vec4u(1u, 0u, 0u, 0u);
641 | }
642 | if (shifted == 1u) {
643 | return vec4u(0u, 1u, 0u, 0u);
644 | }
645 | if (shifted == 2u) {
646 | return vec4u(0u, 0u, 1u, 0u);
647 | }
648 |
649 | return vec4u(0u, 0u, 0u, 1u);
650 | }
651 |
652 | // performs a 256-wide scan on vec4u in scan_arr
653 | fn workgroup_scan(idx : u32) {
654 | // upsweep pass
655 | if ((1u & idx) == 1u) {
656 | scan_arr[idx] += scan_arr[idx - 1u];
657 | }
658 | workgroupBarrier();
659 |
660 | if ((3u & idx) == 3u) {
661 | scan_arr[idx] += scan_arr[idx - 2u];
662 | }
663 | workgroupBarrier();
664 |
665 | if ((7u & idx) == 7u) {
666 | scan_arr[idx] += scan_arr[idx - 4u];
667 | }
668 | workgroupBarrier();
669 |
670 | if ((15u & idx) == 15u) {
671 | scan_arr[idx] += scan_arr[idx - 8u];
672 | }
673 | workgroupBarrier();
674 |
675 | if ((31u & idx) == 31u) {
676 | scan_arr[idx] += scan_arr[idx - 16u];
677 | }
678 | workgroupBarrier();
679 |
680 | // two special cases in transition from upsweep to downsweep
681 | if (idx == 63u) {
682 | scan_arr[idx] = scan_arr[31u];
683 | }
684 | workgroupBarrier();
685 |
686 | if (idx == 31u) {
687 | scan_arr[idx] = vec4u(0u);
688 | }
689 | workgroupBarrier();
690 |
691 | // downsweep pass
692 | if ((15u & idx) == 15u && (idx & 16u) != 0u) {
693 | scan_arr[idx] = scan_arr[idx] + scan_arr[idx - 16u];
694 | }
695 | workgroupBarrier();
696 |
697 | if ((15u & idx) == 15u && (idx & 16u) == 0u) {
698 | scan_arr[idx] = scan_arr[idx + 16u] - scan_arr[idx];
699 | }
700 | workgroupBarrier();
701 |
702 | if ((7u & idx) == 7u && (idx & 8u) != 0u) {
703 | scan_arr[idx] = scan_arr[idx] + scan_arr[idx - 8u];
704 | }
705 | workgroupBarrier();
706 |
707 | if ((7u & idx) == 7u && (idx & 8u) == 0u) {
708 | scan_arr[idx] = scan_arr[idx + 8u] - scan_arr[idx];
709 | }
710 | workgroupBarrier();
711 |
712 | if ((3u & idx) == 3u && (idx & 4u) != 0u) {
713 | scan_arr[idx] = scan_arr[idx] + scan_arr[idx - 4u];
714 | }
715 | workgroupBarrier();
716 |
717 | if ((3u & idx) == 3u && (idx & 4u) == 0u) {
718 | scan_arr[idx] = scan_arr[idx + 4u] - scan_arr[idx];
719 | }
720 | workgroupBarrier();
721 |
722 | if ((1u & idx) == 1u && (idx & 2u) != 0u) {
723 | scan_arr[idx] = scan_arr[idx] + scan_arr[idx - 2u];
724 | }
725 | workgroupBarrier();
726 |
727 | if ((1u & idx) == 1u && (idx & 2u) == 0u) {
728 | scan_arr[idx] = scan_arr[idx + 2u] - scan_arr[idx];
729 | }
730 | workgroupBarrier();
731 |
732 | if ((idx & 1u) != 0u) {
733 | scan_arr[idx] = scan_arr[idx] + scan_arr[idx - 1u];
734 | }
735 | workgroupBarrier();
736 |
737 | if ((idx & 1u) == 0u) {
738 | scan_arr[idx] = scan_arr[idx + 1u] - scan_arr[idx];
739 | }
740 | workgroupBarrier();
741 | }`
742 | }
743 |
744 | function SCAN_UP_SRC() {
745 | return /* wgsl */ `
746 | // bindgroup specific to the intermediate scans
747 | @group(0) @binding(0) var low_count : array;
748 | @group(0) @binding(1) var nex_count : array;
749 |
750 | struct Uniforms {
751 | num : u32,
752 | win : u32,
753 | lvl : u32,
754 | xtr : u32
755 | };
756 |
757 | // bindgroup which stores the uniforms
758 | @group(1) @binding(0) var uniforms : Uniforms;
759 |
760 | // the LDS copy used in the workgroup-wide prefix scan
761 | var scan_arr : array;
762 |
763 | @compute @workgroup_size(64)
764 | fn scan_up(
765 | @builtin(global_invocation_id) global_id : vec3u,
766 | @builtin(local_invocation_id) local_id : vec3u
767 | ) {
768 | var l_idx : u32 = local_id.x;
769 | var g_idx : u32 = global_id.x;
770 |
771 | // each thread reads four values from memory and performs a local scan
772 | var thread_vals : array;
773 | var thread_offs : array;
774 |
775 | for (var i : u32 = 0u; i < 4; i++) {
776 | var c_idx : u32 = 4u * g_idx + i;
777 |
778 | if (c_idx < uniforms.num) {
779 | thread_vals[i] = low_count[4u * g_idx + i];
780 | }
781 | }
782 |
783 | thread_offs[0] = vec4u(0u, 0u, 0u, 0u);
784 | thread_offs[1] = thread_vals[0];
785 | thread_offs[2] = thread_offs[1] + thread_vals[1];
786 | thread_offs[3] = thread_offs[2] + thread_vals[2];
787 |
788 | // perform the workgroup-wide prefix scan
789 | scan_arr[l_idx] = thread_vals[0] + thread_vals[1] + thread_vals[2] + thread_vals[3];
790 | workgroupBarrier();
791 |
792 | workgroup_scan(l_idx);
793 |
794 | // complete the local scan and send it back to storage
795 | for (var i : u32 = 0u; i < 4; i++) {
796 | low_count[4u * g_idx + i] = scan_arr[l_idx] + thread_offs[i];
797 | }
798 |
799 | // if we are the last thread in the group, send the total # to the next layer
800 | if (l_idx == 63u) {
801 | nex_count[g_idx / 64u] = scan_arr[63u] + thread_offs[3] + thread_vals[3];
802 | }
803 | }
804 |
805 | // performs a 256-wide scan on vec4u in scan_arr
806 | fn workgroup_scan(idx : u32) {
807 | // upsweep pass
808 | if ((1u & idx) == 1u) {
809 | scan_arr[idx] += scan_arr[idx - 1u];
810 | }
811 | workgroupBarrier();
812 |
813 | if ((3u & idx) == 3u) {
814 | scan_arr[idx] += scan_arr[idx - 2u];
815 | }
816 | workgroupBarrier();
817 |
818 | if ((7u & idx) == 7u) {
819 | scan_arr[idx] += scan_arr[idx - 4u];
820 | }
821 | workgroupBarrier();
822 |
823 | if ((15u & idx) == 15u) {
824 | scan_arr[idx] += scan_arr[idx - 8u];
825 | }
826 | workgroupBarrier();
827 |
828 | if ((31u & idx) == 31u) {
829 | scan_arr[idx] += scan_arr[idx - 16u];
830 | }
831 | workgroupBarrier();
832 |
833 | // two special cases in transition from upsweep to downsweep
834 | if (idx == 63u) {
835 | scan_arr[idx] = scan_arr[31u];
836 | }
837 | workgroupBarrier();
838 |
839 | if (idx == 31u) {
840 | scan_arr[idx] = vec4u(0u);
841 | }
842 | workgroupBarrier();
843 |
844 | // downsweep pass
845 | if ((15u & idx) == 15u && (idx & 16u) != 0u) {
846 | scan_arr[idx] = scan_arr[idx] + scan_arr[idx - 16u];
847 | }
848 | workgroupBarrier();
849 |
850 | if ((15u & idx) == 15u && (idx & 16u) == 0u) {
851 | scan_arr[idx] = scan_arr[idx + 16u] - scan_arr[idx];
852 | }
853 | workgroupBarrier();
854 |
855 | if ((7u & idx) == 7u && (idx & 8u) != 0u) {
856 | scan_arr[idx] = scan_arr[idx] + scan_arr[idx - 8u];
857 | }
858 | workgroupBarrier();
859 |
860 | if ((7u & idx) == 7u && (idx & 8u) == 0u) {
861 | scan_arr[idx] = scan_arr[idx + 8u] - scan_arr[idx];
862 | }
863 | workgroupBarrier();
864 |
865 | if ((3u & idx) == 3u && (idx & 4u) != 0u) {
866 | scan_arr[idx] = scan_arr[idx] + scan_arr[idx - 4u];
867 | }
868 | workgroupBarrier();
869 |
870 | if ((3u & idx) == 3u && (idx & 4u) == 0u) {
871 | scan_arr[idx] = scan_arr[idx + 4u] - scan_arr[idx];
872 | }
873 | workgroupBarrier();
874 |
875 | if ((1u & idx) == 1u && (idx & 2u) != 0u) {
876 | scan_arr[idx] = scan_arr[idx] + scan_arr[idx - 2u];
877 | }
878 | workgroupBarrier();
879 |
880 | if ((1u & idx) == 1u && (idx & 2u) == 0u) {
881 | scan_arr[idx] = scan_arr[idx + 2u] - scan_arr[idx];
882 | }
883 | workgroupBarrier();
884 |
885 | if ((idx & 1u) != 0u) {
886 | scan_arr[idx] = scan_arr[idx] + scan_arr[idx - 1u];
887 | }
888 | workgroupBarrier();
889 |
890 | if ((idx & 1u) == 0u) {
891 | scan_arr[idx] = scan_arr[idx + 1u] - scan_arr[idx];
892 | }
893 | workgroupBarrier();
894 | }`
895 | }
896 | }
--------------------------------------------------------------------------------
/scripts/bvh/kernels/radix-tree.js:
--------------------------------------------------------------------------------
1 |
2 | function initRadixTreeKernel(device) {
3 | // shader parameters
4 | const WG_SIZE = 64
5 |
6 | // create bind group layout, shader module and pipeline
7 | const BG_LAYOUT = device.createBindGroupLayout({
8 | entries: [
9 | {
10 | binding: 0,
11 | visibility: GPUShaderStage.COMPUTE,
12 | buffer: {
13 | type: "storage"
14 | }
15 | },
16 | {
17 | binding: 1,
18 | visibility: GPUShaderStage.COMPUTE,
19 | buffer: {
20 | type: "uniform"
21 | }
22 | },
23 | {
24 | binding: 2,
25 | visibility: GPUShaderStage.COMPUTE,
26 | buffer: {
27 | type: "storage"
28 | }
29 | }
30 | ]
31 | })
32 |
33 | const SM = device.createShaderModule({
34 | code: SRC(),
35 | label: "radix tree shader module"
36 | })
37 |
38 | const PIPELINE = device.createComputePipeline({
39 | layout: device.createPipelineLayout({
40 | bindGroupLayouts: [BG_LAYOUT]
41 | }),
42 | compute: {
43 | module: SM,
44 | entryPoint: "compute_radix_tree_pointers"
45 | }
46 | })
47 |
48 | return { execute }
49 |
50 | async function execute(KEY_BUFFER, SIZE) {
51 | if (KEY_BUFFER.size != 4 * SIZE) {
52 | console.warn(`in radix tree: buffer size [ ${KEY_BUFFER.size} ] does not match requested size [ ${SIZE} ]`)
53 | return
54 | }
55 |
56 | // create all the necessary buffers
57 | const PARENT_BUFFER = device.createBuffer({
58 | size: SIZE * 8,
59 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
60 | })
61 | const UNIFORM_BUFFER = device.createBuffer({
62 | size: 16,
63 | usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
64 | })
65 |
66 | // create the necessary bind groups
67 | const BG = device.createBindGroup({
68 | layout: BG_LAYOUT,
69 | entries: [
70 | {
71 | binding: 0,
72 | visibility: GPUShaderStage.COMPUTE,
73 | resource: {
74 | buffer: KEY_BUFFER
75 | }
76 | },
77 | {
78 | binding: 1,
79 | visibility: GPUShaderStage.COMPUTE,
80 | resource: {
81 | buffer: UNIFORM_BUFFER
82 | }
83 | },
84 | {
85 | binding: 2,
86 | visibility: GPUShaderStage.COMPUTE,
87 | resource: {
88 | buffer: PARENT_BUFFER
89 | }
90 | }
91 | ]
92 | })
93 |
94 | {// send work to GPU
95 | device.queue.writeBuffer(
96 | UNIFORM_BUFFER,
97 | 0,
98 | new Int32Array([
99 | SIZE,
100 | 0,
101 | 0,
102 | 0
103 | ])
104 | )
105 |
106 | const CE = device.createCommandEncoder()
107 | const P = CE.beginComputePass()
108 |
109 | P.setPipeline(PIPELINE)
110 | P.setBindGroup(0, BG)
111 | P.dispatchWorkgroups(Math.ceil(SIZE / WG_SIZE))
112 | P.end()
113 |
114 | device.queue.submit([CE.finish()])
115 | }
116 |
117 | await device.queue.onSubmittedWorkDone()
118 |
119 | return { PARENT_BUFFER }
120 | }
121 |
122 | function SRC() {
123 | return /* wgsl */ `
124 |
125 | struct Uniforms {
126 | num : i32,
127 | f_1 : i32,
128 | f_2 : i32,
129 | f_3 : i32
130 | };
131 |
132 | @group(0) @binding(0) var keys : array;
133 | @group(0) @binding(1) var uniforms : Uniforms;
134 | @group(0) @binding(2) var parents : array;
135 |
136 | @compute @workgroup_size(${WG_SIZE})
137 | fn compute_radix_tree_pointers(@builtin(global_invocation_id) global_id : vec3u) {
138 | var idx : i32 = i32(global_id.x);
139 | if (idx >= uniforms.num - 1) {
140 | return;
141 | }
142 |
143 | var pointers : vec2i = compute_child_index(idx);
144 |
145 | // write parent pointer to child nodes, accounting for leaf nodes as well
146 | if (pointers.x >= 0) {
147 | parents[pointers.x] = idx;
148 | } else {
149 | parents[uniforms.num + - (pointers.x + 1)] = idx;
150 | }
151 |
152 | if (pointers.y >= 0) {
153 | parents[pointers.y] = idx;
154 | } else {
155 | parents[uniforms.num + - (pointers.y + 1)] = idx;
156 | }
157 | }
158 |
159 | // computes the first bit (from the most significant) that the two keys differ on
160 | fn dif(key_1 : u32, key_2 : u32) -> i32 {
161 | for (var i = 0u; i < 32u; i++) {
162 | var mask : u32 = 1u << (31u - i);
163 |
164 | if ((key_1 & mask) != (key_2 & mask)) {
165 | return i32(i);
166 | }
167 | }
168 | return -1;
169 | }
170 |
171 | // computes the length of the common prefix between the keys at idx_1 and idx_2
172 | fn del(idx_1 : i32, idx_2 : i32) -> i32 {
173 | // if either index is out of bounds, del() = -1
174 | if (idx_1 >= uniforms.num || idx_2 >= uniforms.num || idx_1 < 0 || idx_2 < 0) {
175 | return -1;
176 | }
177 |
178 | var key_dif : i32 = dif(keys[idx_1], keys[idx_2]);
179 |
180 | if (key_dif == -1) {
181 | key_dif = 32 + dif(u32(idx_1), u32(idx_2));
182 | }
183 |
184 | return key_dif;
185 | }
186 |
187 | // computes the index of the left and right child of a given node
188 | fn compute_child_index(i : i32) -> vec2i {
189 | // determine the direction of the child range
190 | var d : i32 = sign(del(i, i + 1) - del(i, i - 1));
191 |
192 | // compute a bound on the size of the range
193 | var del_min : i32 = del(i, i - d);
194 | var l_max : i32 = 2;
195 | while (del(i, i + l_max * d) > del_min) {
196 | l_max *= 2;
197 | }
198 |
199 | // given this bound, find the true size using binary search
200 | var l : i32 = 0;
201 | {
202 | var t : i32 = l_max / 2;
203 | while (t > 0) {
204 | if (del(i, i + (l + t) * d) > del_min) {
205 | l += t;
206 | }
207 | t /= 2;
208 | }
209 | }
210 | var j : i32 = i + l * d;
211 |
212 | // find the split position using binary search
213 | var del_node : i32 = del(i, j);
214 | var s : i32 = 0;
215 | {
216 | var v : i32 = 2;
217 | var t : i32 = (l - 1 + v) / v;
218 | while (t > 0) {
219 | if (del(i, i + (s + t) * d) > del_node) {
220 | s += t;
221 | }
222 | v *= 2;
223 | t = (l - 1 + v) / v;
224 | }
225 | }
226 | var gamma : i32 = i + s * d + min(d, 0);
227 |
228 | // output (signed) child pointers, where negative indicates leaf node
229 | var returned : vec2i = vec2i(gamma, gamma + 1);
230 | if (min(i, j) == gamma) {
231 | returned.x = -returned.x - 1;
232 | }
233 | if (max(i, j) == gamma + 1) {
234 | returned.y = -returned.y - 1;
235 | }
236 |
237 | return returned;
238 | }`
239 | }
240 | }
--------------------------------------------------------------------------------
/scripts/bvh/kernels/rearrange.js:
--------------------------------------------------------------------------------
1 |
2 | function initRearrangeKernel(device) {
3 | // shader parameters
4 | const WG_SIZE = 64
5 |
6 | // create bind group layout, shader module and pipeline
7 | const BG_LAYOUT = device.createBindGroupLayout({
8 | entries: [
9 | {
10 | binding: 0,
11 | visibility: GPUShaderStage.COMPUTE,
12 | buffer: {
13 | type: "storage"
14 | }
15 | },
16 | {
17 | binding: 1,
18 | visibility: GPUShaderStage.COMPUTE,
19 | buffer: {
20 | type: "storage"
21 | }
22 | },
23 | {
24 | binding: 2,
25 | visibility: GPUShaderStage.COMPUTE,
26 | buffer: {
27 | type: "storage"
28 | }
29 | },
30 | {
31 | binding: 3,
32 | visibility: GPUShaderStage.COMPUTE,
33 | buffer: {
34 | type: "uniform"
35 | }
36 | }
37 | ]
38 | })
39 |
40 | const SM = device.createShaderModule({
41 | code: SRC(),
42 | label: "triangle rearrange shader module"
43 | })
44 |
45 | const PIPELINE = device.createComputePipeline({
46 | layout: device.createPipelineLayout({
47 | bindGroupLayouts: [BG_LAYOUT]
48 | }),
49 | compute: {
50 | module: SM,
51 | entryPoint: "rearrange_triangles"
52 | }
53 | })
54 |
55 | return { execute }
56 |
57 | async function execute(I_TRIANGLE_BUFFER, INDEX_BUFFER, size) {
58 | if (I_TRIANGLE_BUFFER.size != 48 * size) {
59 | console.warn(`in rearrange: triangle buffer size [ ${I_TRIANGLE_BUFFER.size} ] does not match requested size [ ${size} ]`)
60 | return
61 | }
62 | if (INDEX_BUFFER.size != 4 * size) {
63 | console.warn(`in rearrange: index buffer size [ ${INDEX_BUFFER.size} ] does not match requested size [ ${size} ]`)
64 | return
65 | }
66 |
67 | // create all the necessary buffers
68 | const O_TRIANGLE_BUFFER = device.createBuffer({
69 | size: I_TRIANGLE_BUFFER.size,
70 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
71 | })
72 | const UNIFORM_BUFFER = device.createBuffer({
73 | size: 16,
74 | usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
75 | })
76 |
77 | // create the bind group
78 | const BG = device.createBindGroup({
79 | layout: BG_LAYOUT,
80 | entries: [
81 | {
82 | binding: 0,
83 | visibility: GPUShaderStage.COMPUTE,
84 | resource: {
85 | buffer: I_TRIANGLE_BUFFER
86 | }
87 | },
88 | {
89 | binding: 1,
90 | visibility: GPUShaderStage.COMPUTE,
91 | resource: {
92 | buffer: O_TRIANGLE_BUFFER
93 | }
94 | },
95 | {
96 | binding: 2,
97 | visibility: GPUShaderStage.COMPUTE,
98 | resource: {
99 | buffer: INDEX_BUFFER
100 | }
101 | },
102 | {
103 | binding: 3,
104 | visibility: GPUShaderStage.COMPUTE,
105 | resource: {
106 | buffer: UNIFORM_BUFFER
107 | }
108 | }
109 | ]
110 | })
111 |
112 | {// send work to GPU
113 | device.queue.writeBuffer(
114 | UNIFORM_BUFFER,
115 | 0,
116 | new Int32Array([
117 | size,
118 | 0,
119 | 0,
120 | 0
121 | ]),
122 | 0
123 | )
124 |
125 | const CE = device.createCommandEncoder()
126 | const P = CE.beginComputePass()
127 |
128 | P.setPipeline(PIPELINE)
129 | P.setBindGroup(0, BG)
130 | P.dispatchWorkgroups(Math.ceil(size / WG_SIZE))
131 | P.end()
132 |
133 | device.queue.submit([CE.finish()])
134 | }
135 |
136 | await device.queue.onSubmittedWorkDone()
137 |
138 | return { O_TRIANGLE_BUFFER }
139 | }
140 |
141 | function SRC() {
142 | return /* wgsl */ `
143 |
144 | struct Triangle {
145 | v0 : vec3f,
146 | v1 : vec3f,
147 | v2 : vec3f
148 | };
149 |
150 | struct Uniforms {
151 | num : i32,
152 | f_1 : i32,
153 | f_2 : i32,
154 | f_3 : i32
155 | };
156 |
157 | @group(0) @binding(0) var i_triangles : array;
158 | @group(0) @binding(1) var o_triangles : array;
159 | @group(0) @binding(2) var new_indices : array;
160 | @group(0) @binding(3) var uniforms : Uniforms;
161 |
162 | @compute @workgroup_size(${WG_SIZE})
163 | fn rearrange_triangles(@builtin(global_invocation_id) global_id : vec3u) {
164 | var idx : i32 = i32(global_id.x);
165 | if (idx >= uniforms.num) {
166 | return;
167 | }
168 | o_triangles[idx] = i_triangles[new_indices[idx]];
169 | }`
170 | }
171 | }
--------------------------------------------------------------------------------
/scripts/main.js:
--------------------------------------------------------------------------------
1 | window.onload = async () => {
2 | const { adapter, device } = await initWebGPU()
3 | if (!adapter || !device) return
4 |
5 | setBuildTime()
6 | setParseTime()
7 | setTriangles()
8 |
9 | const BVH = initBVHBuild(device)
10 | let PT = null
11 |
12 | let queuedRotate = 0
13 |
14 | async function frame() {
15 | if (PT) {
16 | while (queuedRotate > 0) {
17 | PT.rotateView()
18 | queuedRotate--
19 | }
20 | await PT.sample()
21 | await PT.sample()
22 | await PT.sample()
23 | await PT.draw()
24 | }
25 |
26 | window.requestAnimationFrame(frame)
27 | }
28 |
29 | frame()
30 |
31 | // bind all user inputs & UI
32 | document.querySelector("#rotate-view").addEventListener("mouseup", () => {
33 | if (PT) queuedRotate++
34 | })
35 |
36 | function setTriangles(count) {
37 | let str = ""
38 | if (count == null) {
39 | str = "----------"
40 | } else {
41 | str = count.toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",")
42 | }
43 | document.querySelector("#triangle-count").textContent = str
44 | }
45 |
46 | function setParseTime(time) {
47 | let str = ""
48 | if (time == null) {
49 | str = "----------"
50 | } else {
51 | str = time.toString().slice(0, Math.max(time.toString().length, 9)) + "s"
52 | }
53 | document.querySelector("#parse-time").textContent = str
54 | }
55 |
56 | function setBuildTime(time) {
57 | let str = ""
58 | if (time == null) {
59 | str = "----------"
60 | } else {
61 | str = time.toString().slice(0, Math.max(time.toString().length, 9)) + "s"
62 | }
63 | document.querySelector("#build-time").textContent = str
64 | }
65 |
66 | {
67 | async function readFiles(contents) {
68 | setBuildTime()
69 | setParseTime()
70 | setTriangles()
71 | let s, e
72 | s = Date.now()
73 | const { NUM_TRIS, TRI_ARR, BOUNDS } = parseObj(contents[0])
74 |
75 | if (NUM_TRIS > 2_100_000) {
76 | alert("Warning: Model is too large. Try < 2,000,000 triangles.")
77 | return
78 | }
79 |
80 | e = Date.now()
81 | setParseTime((e - s) / 1000.)
82 | setTriangles(NUM_TRIS)
83 |
84 | // make thread sleep to update UI
85 | await new Promise(r => setTimeout(r, 10))
86 |
87 | s = Date.now()
88 | const { BVH_BUFFER, O_TRIANGLE_BUFFER } = await BVH.build(TRI_ARR, NUM_TRIS, BOUNDS)
89 | e = Date.now()
90 | setBuildTime((e - s) / 1000.)
91 | PT = initPathTracer(device, document.querySelector("#canvas"), {BVH_BUFFER, O_TRIANGLE_BUFFER, BOUNDS})
92 | }
93 |
94 | document.body.addEventListener("drop", (e) => {
95 | e.preventDefault()
96 | e.stopPropagation()
97 |
98 | const files = []
99 | if (e.dataTransfer.items) {
100 | [...e.dataTransfer.items].forEach((item) => {
101 | if (item.kind === "file") {
102 | const file = item.getAsFile()
103 | if (file.name.endsWith(".obj")) {
104 | files.push(file)
105 | }
106 | }
107 | })
108 | } else {
109 | [...e.dataTransfer.files].forEach((file) => {
110 | if (file.name.endsWith('.obj')) {
111 | files.push(file)
112 | }
113 | })
114 | }
115 |
116 | // Read all .obj files as text
117 | const reader = new FileReader()
118 | const contents = []
119 | let incr = 0
120 |
121 | reader.onload = () => {
122 | contents.push(reader.result)
123 | incr++
124 | if (incr < files.length) {
125 | reader.readAsText(files[incr])
126 | } else {
127 | readFiles(contents)
128 | }
129 | }
130 |
131 | if (files.length > 0) {
132 | reader.readAsText(files[incr])
133 | } else {
134 | alert("File(s) is not valid.")
135 | }
136 | })
137 |
138 | document.body.addEventListener('dragover', (e) => {
139 | e.preventDefault()
140 | e.stopPropagation()
141 | })
142 |
143 | document.body.addEventListener('dragenter', (e) => {
144 | e.preventDefault()
145 | e.stopPropagation()
146 | })
147 | }
148 | }
149 |
150 | async function initWebGPU() {
151 | const adapter = await navigator.gpu?.requestAdapter()
152 | const device = await adapter?.requestDevice()
153 |
154 | if (!device) {
155 | alert("browser does not support webGPU!")
156 | return null
157 | }
158 |
159 | return { adapter, device }
160 | }
--------------------------------------------------------------------------------
/scripts/obj/obj-file-parser.js:
--------------------------------------------------------------------------------
1 | // from : https://github.com/WesUnwin/obj-file-parser
2 | class OBJFile {
3 | constructor(fileContents, defaultModelName) {
4 | this._reset();
5 | this.fileContents = fileContents;
6 | this.defaultModelName = (defaultModelName || 'untitled');
7 | }
8 |
9 | _reset() {
10 | this.result = {
11 | models: [],
12 | materialLibraries: []
13 | };
14 | this.currentMaterial = '';
15 | this.currentGroup = '';
16 | this.smoothingGroup = 0;
17 | }
18 |
19 | parse() {
20 | this._reset();
21 |
22 | const _stripComments = (lineString) => {
23 | const commentIndex = lineString.indexOf('#');
24 | if (commentIndex > -1) { return lineString.substring(0, commentIndex); }
25 | return lineString;
26 | };
27 |
28 | const lines = this.fileContents.split('\n');
29 | for (let i = 0; i < lines.length; i += 1) {
30 | const line = _stripComments(lines[i]);
31 |
32 | const lineItems = line.replace(/\s+/g, ' ').trim().split(' ');
33 |
34 | switch (lineItems[0].toLowerCase()) {
35 | case 'o': // Start A New Model
36 | this._parseObject(lineItems);
37 | break;
38 | case 'g': // Start a new polygon group
39 | this._parseGroup(lineItems);
40 | break;
41 | case 'v': // Define a vertex for the current model
42 | this._parseVertexCoords(lineItems);
43 | break;
44 | case 'vt': // Texture Coords
45 | this._parseTextureCoords(lineItems);
46 | break;
47 | case 'vn': // Define a vertex normal for the current model
48 | this._parseVertexNormal(lineItems);
49 | break;
50 | case 'l': // Define a line for the current model
51 | this._parseLine(lineItems);
52 | break;
53 | case 's': // Smooth shading statement
54 | this._parseSmoothShadingStatement(lineItems);
55 | break;
56 | case 'f': // Define a Face/Polygon
57 | this._parsePolygon(lineItems);
58 | break;
59 | case 'mtllib': // Reference to a material library file (.mtl)
60 | this._parseMtlLib(lineItems);
61 | break;
62 | case 'usemtl': // Sets the current material to be applied to polygons defined from this point forward
63 | this._parseUseMtl(lineItems);
64 | break;
65 | }
66 | }
67 |
68 | return this.result;
69 | }
70 |
71 | _createNewModel(name = this.defaultModelName) {
72 | return {
73 | name,
74 | vertices: [],
75 | textureCoords: [],
76 | vertexNormals: [],
77 | faces: [],
78 | lines: []
79 | };
80 | }
81 |
82 | _currentModel() {
83 | if (this.result.models.length == 0) {
84 | const defaultModel = this._createNewModel();
85 | this.result.models.push(defaultModel);
86 | this.currentGroup = '';
87 | this.smoothingGroup = 0;
88 | }
89 |
90 | return this.result.models[this.result.models.length - 1];
91 | }
92 |
93 | _parseObject(lineItems) {
94 | const modelName = lineItems.length >= 2 ? lineItems[1] : this.defaultModelName;
95 | const model = this._createNewModel(modelName);
96 | this.result.models.push(model);
97 | this.currentGroup = '';
98 | this.smoothingGroup = 0;
99 | }
100 |
101 | _parseGroup(lineItems) {
102 | if (lineItems.length != 2) { throw 'Group statements must have exactly 1 argument (eg. g group_1)'; }
103 |
104 | this.currentGroup = lineItems[1];
105 | }
106 |
107 | _parseVertexCoords(lineItems) {
108 | const x = lineItems.length >= 2 ? parseFloat(lineItems[1]) : 0.0;
109 | const y = lineItems.length >= 3 ? parseFloat(lineItems[2]) : 0.0;
110 | const z = lineItems.length >= 4 ? parseFloat(lineItems[3]) : 0.0;
111 |
112 | this._currentModel().vertices.push({ x, y, z });
113 | }
114 |
115 | _parseTextureCoords(lineItems) {
116 | const u = lineItems.length >= 2 ? parseFloat(lineItems[1]) : 0.0;
117 | const v = lineItems.length >= 3 ? parseFloat(lineItems[2]) : 0.0;
118 | const w = lineItems.length >= 4 ? parseFloat(lineItems[3]) : 0.0;
119 |
120 | this._currentModel().textureCoords.push({ u, v, w });
121 | }
122 |
123 | _parseVertexNormal(lineItems) {
124 | const x = lineItems.length >= 2 ? parseFloat(lineItems[1]) : 0.0;
125 | const y = lineItems.length >= 3 ? parseFloat(lineItems[2]) : 0.0;
126 | const z = lineItems.length >= 4 ? parseFloat(lineItems[3]) : 0.0;
127 |
128 | this._currentModel().vertexNormals.push({ x, y, z });
129 | }
130 |
131 | _parseLine(lineItems) {
132 | const totalVertices = (lineItems.length - 1);
133 | if (totalVertices < 2) { throw (`Line statement has less than 2 vertices${this.filePath}${this.lineNumber}`); }
134 |
135 | const line = [];
136 |
137 | for (let i = 0; i < totalVertices; i += 1) {
138 | const vertexString = lineItems[i + 1];
139 | const vertexValues = vertexString.split('/');
140 |
141 | if (vertexValues.length < 1 || vertexValues.length > 2) { throw (`Too many values (separated by /) for a single vertex${this.filePath}${this.lineNumber}`); }
142 |
143 | let vertexIndex = 0;
144 | let textureCoordsIndex = 0;
145 | vertexIndex = parseInt(vertexValues[0]);
146 | if (vertexValues.length > 1 && (vertexValues[1] != '')) { textureCoordsIndex = parseInt(vertexValues[1]); }
147 |
148 | line.push({
149 | vertexIndex,
150 | textureCoordsIndex
151 | });
152 | }
153 | this._currentModel().lines.push(line);
154 | }
155 |
156 | _parsePolygon(lineItems) {
157 | const totalVertices = (lineItems.length - 1);
158 | if (totalVertices < 3) { throw (`Face statement has less than 3 vertices${this.filePath}${this.lineNumber}`); }
159 |
160 | const face = {
161 | material: this.currentMaterial,
162 | group: this.currentGroup,
163 | smoothingGroup: this.smoothingGroup,
164 | vertices: []
165 | };
166 |
167 | for (let i = 0; i < totalVertices; i += 1) {
168 | const vertexString = lineItems[i + 1];
169 | const vertexValues = vertexString.split('/');
170 |
171 | if (vertexValues.length < 1 || vertexValues.length > 3) { throw (`Too many values (separated by /) for a single vertex${this.filePath}${this.lineNumber}`); }
172 |
173 | let vertexIndex = 0;
174 | let textureCoordsIndex = 0;
175 | let vertexNormalIndex = 0;
176 | vertexIndex = parseInt(vertexValues[0]);
177 | if (vertexValues.length > 1 && (vertexValues[1] != '')) { textureCoordsIndex = parseInt(vertexValues[1]); }
178 | if (vertexValues.length > 2) { vertexNormalIndex = parseInt(vertexValues[2]); }
179 |
180 | if (vertexIndex == 0) { throw 'Faces uses invalid vertex index of 0'; }
181 |
182 | // Negative vertex indices refer to the nth last defined vertex
183 | // convert these to postive indices for simplicity
184 | if (vertexIndex < 0) { vertexIndex = this._currentModel().vertices.length + 1 + vertexIndex; }
185 |
186 | face.vertices.push({
187 | vertexIndex,
188 | textureCoordsIndex,
189 | vertexNormalIndex
190 | });
191 | }
192 | this._currentModel().faces.push(face);
193 | }
194 |
195 | _parseMtlLib(lineItems) {
196 | if (lineItems.length >= 2) { this.result.materialLibraries.push(lineItems[1]); }
197 | }
198 |
199 | _parseUseMtl(lineItems) {
200 | if (lineItems.length >= 2) { this.currentMaterial = lineItems[1]; }
201 | }
202 |
203 | _parseSmoothShadingStatement(lineItems) {
204 | if (lineItems.length != 2) { throw 'Smoothing group statements must have exactly 1 argument (eg. s )'; }
205 |
206 | const groupNumber = (lineItems[1].toLowerCase() == 'off') ? 0 : parseInt(lineItems[1]);
207 | this.smoothingGroup = groupNumber;
208 | }
209 | }
--------------------------------------------------------------------------------
/scripts/obj/parse-obj.js:
--------------------------------------------------------------------------------
1 | //import { OBJFile } from "./obj-file-parser.js"
2 |
3 | function parseObj(file) {
4 | const objFile = new OBJFile(file)
5 | const output = objFile.parse()
6 |
7 | let numTris = 0
8 | let trisArr = []
9 |
10 | let x_min = 1e30
11 | let y_min = 1e30
12 | let z_min = 1e30
13 |
14 | let x_max = -1e30
15 | let y_max = -1e30
16 | let z_max = -1e30
17 |
18 | for (var x = 0; x < output.models[0].faces.length; x++) {
19 | let face = output.models[0].faces[x]
20 |
21 | let vr = face.vertices[0]
22 |
23 | let vr_x = output.models[0].vertices[vr.vertexIndex - 1].x
24 | let vr_z = output.models[0].vertices[vr.vertexIndex - 1].y
25 | let vr_y = output.models[0].vertices[vr.vertexIndex - 1].z
26 |
27 | x_min = Math.min(x_min, vr_x)
28 | y_min = Math.min(y_min, vr_y)
29 | z_min = Math.min(z_min, vr_z)
30 |
31 | x_max = Math.max(x_max, vr_x)
32 | y_max = Math.max(y_max, vr_y)
33 | z_max = Math.max(z_max, vr_z)
34 |
35 | for (var y = 1; y < face.vertices.length - 1; y++) {
36 | let v1 = face.vertices[y + 0]
37 | let v2 = face.vertices[y + 1]
38 |
39 | let v1_x = output.models[0].vertices[v1.vertexIndex - 1].x
40 | let v1_z = output.models[0].vertices[v1.vertexIndex - 1].y
41 | let v1_y = output.models[0].vertices[v1.vertexIndex - 1].z
42 |
43 | let v2_x = output.models[0].vertices[v2.vertexIndex - 1].x
44 | let v2_z = output.models[0].vertices[v2.vertexIndex - 1].y
45 | let v2_y = output.models[0].vertices[v2.vertexIndex - 1].z
46 |
47 | x_min = Math.min(v1_x, Math.min(x_min, v2_x))
48 | y_min = Math.min(v1_y, Math.min(y_min, v2_y))
49 | z_min = Math.min(v1_z, Math.min(z_min, v2_z))
50 |
51 | x_max = Math.max(v1_x, Math.max(x_max, v2_x))
52 | y_max = Math.max(v1_y, Math.max(y_max, v2_y))
53 | z_max = Math.max(v1_z, Math.max(z_max, v2_z))
54 |
55 | trisArr.push(
56 | vr_x, vr_y, vr_z, 3.1415,
57 | v1_x, v1_y, v1_z, 3.1415,
58 | v2_x, v2_y, v2_z, 3.1415,
59 | )
60 |
61 | numTris++
62 | }
63 | }
64 |
65 | // add a floor to the model
66 | numTris += 2
67 |
68 | let floorHeight = z_min + .01
69 | let floorSize = 10000.
70 |
71 | trisArr.push(
72 | -floorSize, -floorSize, floorHeight, 3.1415,
73 | floorSize, -floorSize, floorHeight, 3.1415,
74 | -floorSize, floorSize, floorHeight, 3.1415,
75 | floorSize, -floorSize, floorHeight, 3.1415,
76 | floorSize, floorSize, floorHeight, 3.1415,
77 | -floorSize, floorSize, floorHeight, 3.1415,
78 | )
79 |
80 | return {
81 | NUM_TRIS: numTris,
82 | TRI_ARR: trisArr,
83 | BOUNDS: {
84 | min: [x_min, y_min, z_min],
85 | max: [x_max, y_max, z_max]
86 | }
87 | }
88 | }
--------------------------------------------------------------------------------
/scripts/pathtracer/pathtracer.js:
--------------------------------------------------------------------------------
1 | // path tracer similar to https://github.com/AddisonPrairie/Personal-Site/blob/main/demos/sdf002/relic/script.js
2 | function initPathTracer(device, canvas, bvh) {
3 | const CANVAS = initCanvas(device, canvas)
4 |
5 | let rot = 0.
6 | let dist = 1.5 * Math.max(
7 | Math.max(
8 | bvh.BOUNDS.max[0] - bvh.BOUNDS.min[0],
9 | bvh.BOUNDS.max[1] - bvh.BOUNDS.min[1]
10 | ),
11 | bvh.BOUNDS.max[2] - bvh.BOUNDS.min[2]
12 | )
13 |
14 | let lookAt = [
15 | (bvh.BOUNDS.min[0] + bvh.BOUNDS.max[0]) * .5,
16 | (bvh.BOUNDS.min[1] + bvh.BOUNDS.max[1]) * .5,
17 | (bvh.BOUNDS.min[2] + bvh.BOUNDS.max[2]) * .5,
18 | ]
19 | let position = [
20 | lookAt[0] + Math.cos(rot) * dist,
21 | lookAt[1] + Math.sin(rot) * dist,
22 | lookAt[2]
23 | ]
24 | let bReset = true
25 |
26 |
27 | const { VS, FS, CS } = SRC()
28 |
29 | // create textures for passing data between passes
30 | const oTextures = [
31 | device.createTexture({size: [CANVAS.w, CANVAS.h], format: "rgba32float", dimension: "2d", usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING}),
32 | device.createTexture({size: [CANVAS.w, CANVAS.h], format: "rgba32float", dimension: "2d", usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING})
33 | ]
34 | const dTextures = [
35 | device.createTexture({size: [CANVAS.w, CANVAS.h], format: "rgba32float", dimension: "2d", usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING}),
36 | device.createTexture({size: [CANVAS.w, CANVAS.h], format: "rgba32float", dimension: "2d", usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING})
37 | ]
38 | const tTextures = [
39 | device.createTexture({size: [CANVAS.w, CANVAS.h], format: "rgba32float", dimension: "2d", usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING}),
40 | device.createTexture({size: [CANVAS.w, CANVAS.h], format: "rgba32float", dimension: "2d", usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING})
41 | ]
42 | const bTextures = [
43 | device.createTexture({size: [CANVAS.w, CANVAS.h], format: "rgba32float", dimension: "2d", usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING}),
44 | device.createTexture({size: [CANVAS.w, CANVAS.h], format: "rgba32float", dimension: "2d", usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING})
45 | ]
46 |
47 | const DRAW_SM = device.createShaderModule({
48 | code: VS + FS
49 | })
50 |
51 | const DRAW_BG_LAYOUT = device.createBindGroupLayout({
52 | entries: [
53 | {
54 | binding: 0,
55 | visibility: GPUShaderStage.FRAGMENT,
56 | texture: {
57 | sampleType: "unfilterable-float",
58 | viewDimension: "2d",
59 | multisampled: false
60 | }
61 | }
62 | ]
63 | })
64 |
65 | const DRAW_BGS = [
66 | device.createBindGroup({
67 | layout: DRAW_BG_LAYOUT,
68 | entries: [
69 | {
70 | binding: 0,
71 | resource: tTextures[1].createView()
72 | }
73 | ]
74 | }),
75 | device.createBindGroup({
76 | layout: DRAW_BG_LAYOUT, entries: [
77 | {
78 | binding: 0,
79 | resource: tTextures[0].createView()
80 | }
81 | ]
82 | })
83 | ]
84 |
85 | const DRAW_PIPELINE = device.createRenderPipeline({
86 | layout: device.createPipelineLayout({bindGroupLayouts: [DRAW_BG_LAYOUT]}),
87 | vertex: {
88 | module: DRAW_SM,
89 | entryPoint: "vs"
90 | },
91 | fragment: {
92 | module: DRAW_SM,
93 | entryPoint: "fs",
94 | targets: [
95 | {
96 | format: CANVAS.presentationFormat
97 | }
98 | ]
99 | }
100 | })
101 |
102 | const PT_I_BG_LAYOUT = device.createBindGroupLayout({
103 | entries: [
104 | {
105 | binding: 0,
106 | visibility: GPUShaderStage.COMPUTE,
107 | texture: {
108 | sampleType: "unfilterable-float",
109 | viewDimension: "2d",
110 | multisampled: false
111 | }
112 | },
113 | {
114 | binding: 1,
115 | visibility: GPUShaderStage.COMPUTE,
116 | texture: {
117 | sampleType: "unfilterable-float",
118 | viewDimension: "2d",
119 | multisampled: false
120 | }
121 | },
122 | {
123 | binding: 2,
124 | visibility: GPUShaderStage.COMPUTE,
125 | texture: {
126 | sampleType: "unfilterable-float",
127 | viewDimension: "2d",
128 | multisampled: false
129 | }
130 | },
131 | {
132 | binding: 3,
133 | visibility: GPUShaderStage.COMPUTE,
134 | texture: {
135 | sampleType: "unfilterable-float",
136 | viewDimension: "2d",
137 | multisampled: false
138 | }
139 | }
140 | ],
141 | label: "PT_I_BG_LAYOUT"
142 | })
143 |
144 | const PT_I_BGS = [
145 | device.createBindGroup({
146 | layout: PT_I_BG_LAYOUT,
147 | entries: [
148 | {
149 | binding: 0,
150 | resource: oTextures[0].createView()
151 | },
152 | {
153 | binding: 1,
154 | resource: dTextures[0].createView()
155 | },
156 | {
157 | binding: 2,
158 | resource: tTextures[0].createView()
159 | },
160 | {
161 | binding: 3,
162 | resource: bTextures[0].createView()
163 | }
164 | ]
165 | }),
166 | device.createBindGroup({
167 | layout: PT_I_BG_LAYOUT,
168 | entries: [
169 | {
170 | binding: 0,
171 | resource: oTextures[1].createView()
172 | },
173 | {
174 | binding: 1,
175 | resource: dTextures[1].createView()
176 | },
177 | {
178 | binding: 2,
179 | resource: tTextures[1].createView()
180 | },
181 | {
182 | binding: 3,
183 | resource: bTextures[1].createView()
184 | }
185 | ]
186 | }),
187 | ]
188 |
189 | const PT_O_BG_LAYOUT = device.createBindGroupLayout({
190 | entries: [
191 | {
192 | binding: 0,
193 | visibility: GPUShaderStage.COMPUTE,
194 | storageTexture: {
195 | format: "rgba32float",
196 | viewDimension: "2d"
197 | }
198 | },
199 | {
200 | binding: 1,
201 | visibility: GPUShaderStage.COMPUTE,
202 | storageTexture: {
203 | format: "rgba32float",
204 | viewDimension: "2d"
205 | }
206 | },
207 | {
208 | binding: 2,
209 | visibility: GPUShaderStage.COMPUTE,
210 | storageTexture: {
211 | format: "rgba32float",
212 | viewDimension: "2d"
213 | }
214 | },
215 | {
216 | binding: 3,
217 | visibility: GPUShaderStage.COMPUTE,
218 | storageTexture: {
219 | format: "rgba32float",
220 | viewDimension: "2d"
221 | }
222 | }
223 | ],
224 | label: "PT_O_BG_LAYOUTs"
225 | })
226 |
227 | const PT_O_BGS = [
228 | device.createBindGroup({
229 | layout: PT_O_BG_LAYOUT,
230 | entries: [
231 | {
232 | binding: 0,
233 | resource: oTextures[1].createView()
234 | },
235 | {
236 | binding: 1,
237 | resource: dTextures[1].createView()
238 | },
239 | {
240 | binding: 2,
241 | resource: tTextures[1].createView()
242 | },
243 | {
244 | binding: 3,
245 | resource: bTextures[1].createView()
246 | }
247 | ]
248 | }),
249 | device.createBindGroup({
250 | layout: PT_O_BG_LAYOUT,
251 | entries: [
252 | {
253 | binding: 0,
254 | resource: oTextures[0].createView()
255 | },
256 | {
257 | binding: 1,
258 | resource: dTextures[0].createView()
259 | },
260 | {
261 | binding: 2,
262 | resource: tTextures[0].createView()
263 | },
264 | {
265 | binding: 3,
266 | resource: bTextures[0].createView()
267 | }
268 | ]
269 | })
270 | ]
271 |
272 | const PT_BVH_BG_LAYOUT = device.createBindGroupLayout({
273 | entries: [
274 | {
275 | binding: 0,
276 | visibility: GPUShaderStage.COMPUTE,
277 | buffer: {
278 | type: "storage"
279 | }
280 | },
281 | {
282 | binding: 1,
283 | visibility: GPUShaderStage.COMPUTE,
284 | buffer: {
285 | type: "storage"
286 | }
287 | }
288 | ]
289 | })
290 |
291 | const PT_BVH_BG = device.createBindGroup({
292 | layout: PT_BVH_BG_LAYOUT,
293 | entries: [
294 | {
295 | binding: 0,
296 | resource: {
297 | buffer: bvh.BVH_BUFFER
298 | }
299 | },
300 | {
301 | binding: 1,
302 | resource: {
303 | buffer: bvh.O_TRIANGLE_BUFFER
304 | }
305 | }
306 | ]
307 | })
308 |
309 | const PT_UNI_BG_LAYOUT = device.createBindGroupLayout({
310 | entries: [
311 | {
312 | binding: 0,
313 | visibility: GPUShaderStage.COMPUTE,
314 | buffer: {
315 | type: "uniform"
316 | }
317 | }
318 | ]
319 | })
320 |
321 | const UNIFORM_BUFFER = device.createBuffer({
322 | size: 32,
323 | usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.UNIFORM
324 | })
325 |
326 | const PT_UNI_BG = device.createBindGroup({
327 | layout: PT_UNI_BG_LAYOUT,
328 | entries: [
329 | {
330 | binding: 0,
331 | resource: {
332 | buffer: UNIFORM_BUFFER
333 | }
334 | }
335 | ]
336 | })
337 |
338 | const PT_SM = device.createShaderModule({
339 | code: CS
340 | })
341 |
342 | const PT_PIPELINE = device.createComputePipeline({
343 | layout: device.createPipelineLayout({
344 | bindGroupLayouts: [PT_I_BG_LAYOUT, PT_O_BG_LAYOUT, PT_BVH_BG_LAYOUT, PT_UNI_BG_LAYOUT]
345 | }),
346 | compute: {
347 | module: PT_SM,
348 | entryPoint: "main"
349 | }
350 | })
351 |
352 | // some variables needed by the methods below
353 | let ctr = 0
354 |
355 | return { draw, sample, rotateView }
356 |
357 | async function sample() {
358 | if (bReset) ctr = 0
359 | if (ctr > 1024) return
360 |
361 | const PP_IDX = ctr++ % 2
362 |
363 | device.queue.writeBuffer(
364 | UNIFORM_BUFFER,
365 | 0,
366 | new Float32Array([
367 | position[0], position[1], position[2], bReset ? 1 : 0,
368 | lookAt[0], lookAt[1], lookAt[2], 0.
369 | ]),
370 | 0
371 | )
372 |
373 | // set reset flag to false so that we don't perpetually re-render
374 | bReset = false
375 |
376 | const CE = device.createCommandEncoder()
377 | const P = CE.beginComputePass()
378 | P.setPipeline(PT_PIPELINE)
379 | P.setBindGroup(0, PT_I_BGS[PP_IDX])
380 | P.setBindGroup(1, PT_O_BGS[PP_IDX])
381 | P.setBindGroup(2, PT_BVH_BG)
382 | P.setBindGroup(3, PT_UNI_BG)
383 | P.dispatchWorkgroups(Math.ceil(CANVAS.w / 8), Math.ceil(CANVAS.h / 8))
384 | P.end()
385 |
386 | device.queue.submit([CE.finish()])
387 |
388 | await device.queue.onSubmittedWorkDone()
389 |
390 | return
391 | }
392 |
393 | async function draw() {
394 | const PP_IDX = ctr % 2
395 |
396 | const CE = device.createCommandEncoder()
397 | const P = CE.beginRenderPass({
398 | colorAttachments: [
399 | {
400 | view: CANVAS.ctx.getCurrentTexture().createView(),
401 | clearValue: {r: 1., g: 0., b: 0., a: 1.},
402 | loadOp: "clear",
403 | storeOp: "store"
404 | }
405 | ]
406 | })
407 | P.setPipeline(DRAW_PIPELINE)
408 | P.setBindGroup(0, DRAW_BGS[PP_IDX])
409 | P.draw(6)
410 | P.end()
411 |
412 | device.queue.submit([CE.finish()])
413 |
414 | await device.queue.onSubmittedWorkDone()
415 |
416 | return
417 | }
418 |
419 | function rotateView() {
420 | bReset = true
421 | rot += Math.PI / 4
422 | position = [
423 | lookAt[0] + Math.cos(rot) * dist,
424 | lookAt[1] + Math.sin(rot) * dist,
425 | lookAt[2]
426 | ]
427 | }
428 |
429 | function SRC() {
430 | let CS = /* wgsl */ `
431 | @group(0) @binding(0) var otex : texture_2d;
432 | @group(0) @binding(1) var dtex : texture_2d;
433 | @group(0) @binding(2) var ttex : texture_2d;
434 | @group(0) @binding(3) var btex : texture_2d;
435 |
436 | @group(1) @binding(0) var oout : texture_storage_2d;
437 | @group(1) @binding(1) var dout : texture_storage_2d;
438 | @group(1) @binding(2) var tout : texture_storage_2d;
439 | @group(1) @binding(3) var bout : texture_storage_2d;
440 |
441 | struct BVHNode {
442 | aabb_l_min : vec3f,
443 | l_child : i32,
444 | aabb_l_max : vec3f,
445 | f_1 : i32,
446 | aabb_r_min : vec3f,
447 | r_child : i32,
448 | aabb_r_max : vec3f,
449 | f_2 : i32
450 | };
451 |
452 | struct Triangle {
453 | v0 : vec3f,
454 | v1 : vec3f,
455 | v2 : vec3f
456 | };
457 |
458 | @group(2) @binding(0) var bvh : array;
459 | @group(2) @binding(1) var tri : array;
460 |
461 | struct Uniforms {
462 | pos : vec3f,
463 | rst : f32,
464 | lat : vec3f,
465 | };
466 |
467 | @group(3) @binding(0) var uniforms : Uniforms;
468 |
469 | const Pi = 3.14159265358979323846;
470 | const InvPi = 0.31830988618379067154;
471 | const Inv2Pi = 0.15915494309189533577;
472 | const Inv4Pi = 0.07957747154594766788;
473 | const PiOver2 = 1.57079632679489661923;
474 | const PiOver4 = 0.78539816339744830961;
475 | const Sqrt2 = 1.41421356237309504880;
476 |
477 | const sw_f : vec2f = vec2f(${CANVAS.w}., ${CANVAS.h}.);
478 | const sw_u : vec2u = vec2u(${CANVAS.w}u, ${CANVAS.h}u);
479 |
480 | const fov : f32 = 60.f;
481 | const sinfov : f32 = sin(.5 * fov * Pi / 180.f);
482 | const aspect : f32 = ${CANVAS.w / CANVAS.h}f;
483 |
484 | const eps : f32 = .0001;
485 |
486 | const mbounce : f32 = 5.;
487 |
488 | struct RayHit {
489 | norm : vec3f,
490 | dist : f32
491 | };
492 |
493 | var stack : array;
494 |
495 | fn intersect_bvh(o_in : vec3f, d_in : vec3f) -> RayHit {
496 |
497 | var o : vec3f = o_in;
498 | var d : vec3f = d_in;
499 |
500 | // (lazy) fix for divide by zero errors - change later
501 | d += vec3f(abs(d) < vec3f(.00001)) * vec3f(.00001);
502 |
503 | var dist : f32 = 1e30f;
504 | var norm : vec3f = vec3f(0.f);
505 |
506 | var stack_ptr : i32 = 0;
507 | var node_idx : i32 = 0;
508 |
509 | while (stack_ptr >= 0) {
510 | // we are testing against a leaf node
511 | if (node_idx < 0) {
512 | var tr : Triangle = tri[-(node_idx + 1)];
513 |
514 | var n_dis : vec4f = tri_intersect(o, d, tr);
515 |
516 | if (n_dis.w > 0.f && n_dis.w < dist) {
517 | norm = n_dis.xyz;
518 | dist = min(n_dis.w, dist);
519 | }
520 |
521 | stack_ptr -= 1;
522 | node_idx = stack[stack_ptr];
523 | } else {
524 | var node : BVHNode = bvh[node_idx];
525 |
526 | var l_dist : f32 = aabb_intersect(
527 | node.aabb_l_min,
528 | node.aabb_l_max,
529 | o, d
530 | );
531 |
532 | var r_dist : f32 = aabb_intersect(
533 | node.aabb_r_min,
534 | node.aabb_r_max,
535 | o, d
536 | );
537 |
538 | var l_valid : bool = l_dist != -1e30f && l_dist < dist;
539 | var r_valid : bool = r_dist != -1e30f && r_dist < dist;
540 |
541 | if (l_valid && r_valid) {
542 | var f_idx : i32;
543 | var c_idx : i32;
544 |
545 | if (l_dist < r_dist) {
546 | c_idx = node.l_child;
547 | f_idx = node.r_child;
548 | } else {
549 | c_idx = node.r_child;
550 | f_idx = node.l_child;
551 | }
552 |
553 | stack[stack_ptr] = f_idx;
554 | stack_ptr += 1;
555 | node_idx = c_idx;
556 | } else
557 | if (l_valid) {
558 | node_idx = node.l_child;
559 | } else
560 | if (r_valid) {
561 | node_idx = node.r_child;
562 | } else {
563 | stack_ptr -= 1;
564 | node_idx = stack[stack_ptr];
565 | }
566 | }
567 | }
568 |
569 | var returned : RayHit;
570 |
571 | returned.dist = dist;
572 |
573 | if (dot(d, -norm) > 0.) {
574 | returned.norm = norm;
575 | } else {
576 | returned.norm = -norm;
577 | }
578 |
579 | if (returned.dist == 1e30f) {
580 | returned.dist = -1.f;
581 | }
582 |
583 | return returned;
584 | }
585 |
586 | // from: https://www.scratchapixel.com/lessons/3d-basic-rendering/ray-tracing-rendering-a-triangle/moller-trumbore-ray-triangle-intersection.html
587 | fn tri_intersect(o : vec3f, d : vec3f, tri : Triangle) -> vec4f {
588 | var v0v1 : vec3f = tri.v1 - tri.v0;
589 | var v0v2 : vec3f = tri.v2 - tri.v0;
590 | var pvec : vec3f = cross(d, v0v2);
591 |
592 | var det : f32 = dot(v0v1, pvec);
593 |
594 | if (abs(det) < 1e-10) {
595 | return vec4f(-1.f);
596 | }
597 |
598 | var i_det : f32 = 1.f / det;
599 |
600 | var tvec : vec3f = o - tri.v0;
601 |
602 | var u : f32 = dot(tvec, pvec) * i_det;
603 |
604 | if (u < 0.f || u > 1.f) {
605 | return vec4f(-1.f);
606 | }
607 |
608 | var qvec : vec3f = cross(tvec, v0v1);
609 |
610 | var v : f32 = dot(d, qvec) * i_det;
611 | if (v < 0.f || u + v > 1.f) {
612 | return vec4f(-1.f);
613 | }
614 |
615 | return vec4f(
616 | normalize(cross(v0v1, v0v2)),
617 | dot(v0v2, qvec) * i_det
618 | );
619 | }
620 |
621 | fn aabb_intersect(low : vec3f, high : vec3f, o : vec3f, d : vec3f) -> f32 {
622 | var iDir = 1. / d;
623 | var f = (high - o) * iDir; var n = (low - o) * iDir;
624 | var tmax = max(f, n); var tmin = min(f, n);
625 | var t0 = max(tmin.x, max(tmin.y, tmin.z));
626 | var t1 = min(tmax.x, min(tmax.y, tmax.z));
627 | return select(-1e30, select(t0, -1e30, t1 < 0.), t1 >= t0);
628 | }
629 |
630 | @compute @workgroup_size(8, 8, 1)
631 | fn main(@builtin(global_invocation_id) global_id : vec3u) {
632 | if (any(global_id.xy >= sw_u)) {return;}
633 | var coord : vec2i = vec2i(global_id.xy);
634 |
635 | var o : vec4f;
636 | var d : vec4f;
637 | var t : vec4f;
638 | var b : vec4f;
639 |
640 | if (uniforms.rst == 0.) {
641 | o = textureLoad(otex, coord, 0);
642 | d = textureLoad(dtex, coord, 0);
643 | t = textureLoad(ttex, coord, 0);
644 | b = textureLoad(btex, coord, 0);
645 | }
646 |
647 | ptStep(coord, &o, &d, &b, &t);
648 |
649 | textureStore(oout, coord, o);
650 | textureStore(dout, coord, d);
651 | textureStore(tout, coord, t);
652 | textureStore(bout, coord, b);
653 | }
654 |
655 | fn ptStep(coord : vec2i, oin : ptr, din : ptr, bin : ptr, tin : ptr) {
656 | var o : vec3f = (*oin).xyz;
657 | var d : vec3f = (*din).xyz;
658 | var b : vec3f = (*bin).xyz;
659 |
660 | var seed : f32 = (*oin).a;
661 | var bounces : f32 = (*din).a;
662 |
663 | var bNewPath : bool = all(b == vec3f(0.));
664 | var frame0 : bool = bNewPath && ((*tin).a == 0.);
665 | if (frame0) {
666 | seed = f32(baseHash(vec2u(coord))) / f32(0xffffffffu) + .008;
667 | }
668 |
669 | if (bNewPath) {
670 | getCameraRay(vec2f(coord) + rand2(seed), &o, &d); seed += 2.;
671 | b = vec3f(1.);
672 | }
673 |
674 | var res : RayHit = intersect_bvh(o, d);
675 | if (res.dist >= 0.) {
676 | var o1 : vec3f = normalize(ortho(res.norm));
677 | var o2 : vec3f = normalize(cross(o1, res.norm));
678 |
679 | var wo : vec3f = toLocal(o1, o2, res.norm, -d);
680 | var wi : vec3f;
681 | var c : vec3f;
682 |
683 | o = o + d * res.dist;
684 |
685 | c = lambertDiffuse(&seed, &wi, wo, vec3f(.3f));
686 | //c = ggxSmith(&seed, &wi, wo, vec3f(.33f), .1);
687 | //c = perfectMirror(&wi, wo, vec3f(.2));
688 |
689 | b *= c;
690 | o += res.norm * 1.01 * eps;
691 | d = toWorld(o1, o2, res.norm, wi);
692 |
693 | if (bounces > 3) {
694 | var q : f32 = max(.05f, 1. - b.y);
695 | if (rand2(seed).x < q) {
696 | b = vec3f(0.);
697 | } else {
698 | b /= 1. - q;
699 | } seed += 2.;
700 | }
701 |
702 | if (all(b == vec3f(0.))) {
703 | *tin += vec4f(0., 0., 0., 1.);
704 | bounces = -1.;
705 | }
706 | } else {
707 | *tin += vec4f(b * 8., 1.);
708 | bounces = -1.;
709 | b = vec3f(0.);
710 | }
711 |
712 | *oin = vec4f(o, seed);
713 | *din = vec4f(d, bounces + 1.);
714 | *bin = vec4f(b, 1.);
715 | }
716 |
717 | fn lambertDiffuse(seed : ptr, wi : ptr, wo : vec3f, c : vec3f) -> vec3f {
718 | *wi = cosineSampleHemisphere(rand2(*seed)); *seed += 2.;
719 | return pow(c, vec3f(2.2));
720 | }
721 |
722 | fn getCameraRay(coord : vec2f, o : ptr, d : ptr) {
723 | var sspace : vec2f = coord / sw_f; sspace = sspace * 2. - vec2f(1.); sspace.y *= -1.;
724 | var local : vec3f = vec3f(
725 | aspect * sspace.x * sinfov,
726 | 1.,
727 | sspace.y * sinfov
728 | );
729 | var forward : vec3f = normalize(vec3f(uniforms.lat - uniforms.pos));
730 | var right : vec3f = normalize(vec3f(forward.y, -forward.x, 0.));
731 | var up : vec3f = cross(right, forward);
732 |
733 | *o = uniforms.pos;
734 | *d = toWorld(right, forward, up, normalize(local));
735 | }
736 |
737 | fn ortho(v : vec3) -> vec3 {
738 | if (abs(v.x) > abs(v.y)) {
739 | return vec3(-v.y, v.x, 0.);
740 | }
741 | return vec3(0., -v.z, v.y);
742 | }
743 |
744 | fn toLocal(v_x : vec3f, v_y : vec3f, v_z : vec3f, w : vec3f) -> vec3f {
745 | return vec3f(dot(v_x, w), dot(v_y, w), dot(v_z, w));
746 | }
747 |
748 | fn toWorld(v_x : vec3f, v_y : vec3f, v_z : vec3f, w : vec3f) -> vec3f {
749 | return v_x * w.x + v_y * w.y + v_z * w.z;
750 | }
751 |
752 | //GPU hashes from: https://www.shadertoy.com/view/XlycWh
753 | fn baseHash(p : vec2u) -> u32 {
754 | var p2 : vec2u = 1103515245u*((p >> vec2u(1u))^(p.yx));
755 | var h32 : u32 = 1103515245u*((p2.x)^(p2.y>>3u));
756 | return h32^(h32 >> 16u);
757 | }
758 | fn rand2(seed : f32) -> vec2f {
759 | var n : u32 = baseHash(bitcast(vec2f(seed + 1., seed + 2.)));
760 | var rz : vec2u = vec2u(n, n * 48271u);
761 | return vec2f(rz.xy & vec2u(0x7fffffffu))/f32(0x7fffffff);
762 | }
763 |
764 | //from: pbrt
765 | fn cosineSampleHemisphere(r2 : vec2f) -> vec3f {
766 | var d : vec2f = uniformSampleDisk(r2);
767 | var z : f32 = sqrt(max(0., 1. - d.x * d.x - d.y * d.y));
768 | return vec3f(d.xy, z);
769 | }
770 | fn uniformSampleDisk(r2 : vec2f) -> vec2f {
771 | var r : f32 = sqrt(max(r2.x, 0.));
772 | var theta : f32 = 2. * Pi * r2.y;
773 | return vec2f(r * cos(theta), r * sin(theta));
774 | }`
775 |
776 | let VS = /* wgsl */ `
777 | @vertex
778 | fn vs(@builtin(vertex_index) vertexIndex : u32) -> @builtin(position) vec4f {
779 | switch(vertexIndex) {
780 | case 0u: {
781 | return vec4f(1., 1., 0., 1.);}
782 | case 1u: {
783 | return vec4f(-1., 1., 0., 1.);}
784 | case 2u: {
785 | return vec4f(-1., -1., 0., 1.);}
786 | case 3u: {
787 | return vec4f(1., -1., 0., 1.);}
788 | case 4u: {
789 | return vec4f(1., 1., 0., 1.);}
790 | case 5u: {
791 | return vec4f(-1., -1., 0., 1.);}
792 | default: {
793 | return vec4f(0., 0., 0., 0.);}
794 | }
795 | }`
796 |
797 | let FS = /* wgsl */ `
798 | @group(0) @binding(0) var image : texture_2d;
799 |
800 | fn lum(z : vec3f) -> f32 {
801 | return dot(z, vec3f(.2126, .7152, .0722));
802 | }
803 |
804 | @fragment
805 | fn fs(@builtin(position) fragCoord : vec4f) -> @location(0) vec4f {
806 | var raw : vec4f = textureLoad(image, vec2i(fragCoord.xy), 0);
807 | var col : vec3f = raw.xyz / raw.a;
808 |
809 | // apply reinhard tonemap
810 | col = col / (1.f + lum(col));
811 |
812 | return vec4f(
813 | pow(col, vec3f(1. / 2.2)),
814 | 1.
815 | );
816 | }`
817 |
818 | return { CS, VS, FS }
819 | }
820 | }
821 |
822 |
823 | function initCanvas(device, canvas) {
824 | let ctx = canvas.getContext("webgpu")
825 |
826 | let presentationFormat = navigator.gpu.getPreferredCanvasFormat()
827 | ctx.configure({device, format: presentationFormat})
828 |
829 | const w = Math.ceil(canvas.clientWidth * 1.5)
830 | const h = Math.ceil(canvas.clientHeight * 1.5)
831 |
832 | canvas.width = w
833 | canvas.height = h
834 |
835 | return {
836 | ctx, presentationFormat, w, h
837 | }
838 | }
--------------------------------------------------------------------------------