├── .gitignore ├── .vscode ├── launch.json └── settings.json ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── benches └── benchmark.rs ├── build.rs ├── kernels ├── Cargo.lock ├── Cargo.toml └── src │ ├── bsdf.rs │ ├── intersection.rs │ ├── lib.rs │ ├── light_pick.rs │ ├── rng.rs │ ├── skybox.rs │ ├── util.rs │ └── vec.rs ├── rust-toolchain.toml ├── scene.glb ├── scenes ├── BreakTime.glb ├── DarkCornell.glb ├── FurnaceTest.glb ├── GlassTest.glb ├── PBRTest.glb └── VeachMIS.glb ├── shared_structs ├── Cargo.lock ├── Cargo.toml └── src │ ├── image_polyfill.rs │ └── lib.rs ├── src ├── app.rs ├── asset.rs ├── atlas.rs ├── bvh.rs ├── lib.rs ├── light_pick.rs ├── main.rs ├── resources │ ├── bluenoise.png │ └── render.wgsl └── trace.rs └── tests └── correctness_tests.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /kernels/target 3 | .benchmarks/ -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "type": "lldb", 9 | "request": "launch", 10 | "name": "Debug executable 'rustic'", 11 | "cargo": { 12 | "args": [ 13 | "build", 14 | "--bin=rustic", 15 | "--package=rustic" 16 | ], 17 | "filter": { 18 | "name": "rustic", 19 | "kind": "bin" 20 | } 21 | }, 22 | "args": [], 23 | "cwd": "${workspaceFolder}" 24 | }, 25 | { 26 | "type": "lldb", 27 | "request": "launch", 28 | "name": "Debug unit tests in executable 'rustic'", 29 | "cargo": { 30 | "args": [ 31 | "test", 32 | "--no-run", 33 | "--bin=rustic", 34 | "--package=rustic" 35 | ], 36 | "filter": { 37 | "name": "rustic", 38 | "kind": "bin" 39 | } 40 | }, 41 | "args": [], 42 | "cwd": "${workspaceFolder}" 43 | } 44 | ] 45 | } -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "rust-analyzer.linkedProjects": [ 3 | "./kernels/Cargo.toml", 4 | "./Cargo.toml" 5 | ] 6 | } -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rustic" 3 | version = "0.1.0" 4 | authors = ["Pema Malling "] 5 | edition = "2021" 6 | 7 | [dependencies] 8 | shared_structs = { path = "shared_structs" } 9 | kernels = { path = "kernels" } 10 | bytemuck = { version = "1.13.1", features = ["derive"] } 11 | glam = { version = "0.22.0", features = ["bytemuck"] } 12 | gpgpu = { git = "https://github.com/pema99/gpgpu-rs.git", branch = "dev", features = ["image", "integrate-image"] } 13 | rand = "0.8.5" 14 | oidn = { version = "1.4.3", optional = true } 15 | lazy_static = "1.4.0" 16 | russimp = { version = "2.0.5", features = ["prebuilt"] } 17 | image = { version = "0.24.6", default-features = false, features = ["png", "jpeg", "hdr", "tga", "exr", "openexr"] } 18 | parking_lot = "0.12.1" 19 | winit = "0.27.5" 20 | wgpu = { version = "0.14.2", features = ["spirv"] } 21 | egui-wgpu = "0.20.0" 22 | egui_winit_platform = "0.17.0" 23 | egui = "0.20.0" 24 | pollster = "0.2.5" 25 | tinyfiledialogs = "3.9.1" 26 | fast_image_resize = "2.7.3" 27 | rayon = "1.7.0" 28 | 29 | [build-dependencies] 30 | spirv-builder = "0.7.0" 31 | 32 | [features] 33 | oidn = ["dep:oidn"] 34 | 35 | [profile.release.build-override] 36 | opt-level = 3 37 | codegen-units = 16 38 | 39 | [profile.dev.build-override] 40 | opt-level = 3 41 | 42 | [profile.dev] 43 | opt-level = 1 44 | incremental = true 45 | 46 | [dev-dependencies] 47 | criterion = { version = "0.3", features = ["html_reports"] } 48 | 49 | [[bench]] 50 | name = "benchmark" 51 | harness = false -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2023 Pema Malling 2 | 3 | Permission is hereby granted, free of charge, to any 4 | person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without 7 | limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software 10 | is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions 15 | of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Yet another path tracer 2 | 3 | Yet another GPU accelerated toy path tracer, but everything is written in Rust (:rocket:) - both the GPU and CPU code. Uses [rust-gpu](https://github.com/EmbarkStudios/rust-gpu) to transpile Rust (:rocket:) code to SPIR-V, and then uses [wgpu](https://github.com/gfx-rs/wgpu) to execute that SPIR-V. 4 | 5 | # Features 6 | - Simple GPU accelerated path tracing. 7 | - Supports PBR materials with roughness/metallic workflow. These can be set on a per-mesh basis. 8 | - Supports texture mapping. Can load albedo, normal, roughness and metallic maps from scene file. 9 | - Ray intersections are made fast using a [BVH](https://en.wikipedia.org/wiki/Bounding_volume_hierarchy) built in a binned manner using the [surface area heuristic](https://en.wikipedia.org/wiki/Bounding_interval_hierarchy#Construction). 10 | - Convergence rate is improved by the use of a [low-discrepancy sequence](http://extremelearning.com.au/unreasonable-effectiveness-of-quasirandom-sequences/) in place of uniform random sampling. 11 | - Basic [next event estimation](https://www.youtube.com/watch?v=FU1dbi827LY) (direct light sampling). 12 | - Uses [assimp](https://github.com/assimp/assimp) for scene loading, so can load many scene and model file formats, such as glTF, FBX, obj, etc. 13 | - Uses a nice procedural atmospheric skybox (thanks @nyrox). Alternatively, can load HDR images to use as the skybox. 14 | - Cross platform. Tested on Windows 10 and Arch Linux. 15 | - All the GPU code can be run on the CPU via a dropdown in the UI. Mostly useful for debugging. 16 | 17 | # How to build and run 18 | ```sh 19 | # builds without denoising support 20 | cargo run 21 | ``` 22 | 23 | The path tracer optionally supports denoising via OpenImageDenoise, via feature flag `oidn`. To use this feature, first [install OpenImageDenoise 1.4.3](https://github.com/OpenImageDenoise/oidn/releases/tag/v1.4.3) and ensure that the `OIDN_DIR` environment variable points to your install location. 24 | 25 | ```sh 26 | # with denoising (requires OIDN to be installed and available on PATH) 27 | export OIDN_DIR 28 | cargo run -F oidn 29 | ``` 30 | 31 | Once built and launched, to start rendering, simply drag any compatible scene file onto the window, or use the file picker. Holding right click and using WASD will let you move the camera. 32 | 33 | I've only tested using Vulkan. If `wgpu` for whatever reason defaults to a different backend on your system, you can fix this by setting the `WGPU_BACKEND` environment variable to `"vulkan"`. 34 | 35 | GPU kernel code is in `kernels/`, code shared between GPU and CPU is in `shared_structs/`, pure CPU code is in `src/`. 36 | 37 | # Pretty pictures 38 | ![image](https://github.com/pema99/rust-path-tracer/assets/11212115/4f6e0936-77b7-40bf-917c-0424b37b8c74) 39 | ![image](https://user-images.githubusercontent.com/11212115/236666588-51cb006b-a1c6-4688-b49a-9dfc906cfa6c.png) 40 | ![image](https://github.com/pema99/rust-path-tracer/assets/11212115/7ba5c1bc-5d85-4e8e-b155-bd2233bad7e2) 41 | ![image](https://github.com/pema99/rust-path-tracer/assets/11212115/ba63c245-60a2-4eb5-990c-9d2a2e4d5449) 42 | ![image](https://user-images.githubusercontent.com/11212115/236580283-10b90b04-48fd-4863-95df-ca5f27afff26.png) 43 | ![image](https://user-images.githubusercontent.com/11212115/236580256-e1bda1b2-37fb-461d-919d-3a3c037eb955.png) 44 | -------------------------------------------------------------------------------- /benches/benchmark.rs: -------------------------------------------------------------------------------- 1 | // This file contains benchmarks for the purpose of guarding against 2 | // performance regressions. To run them, use `cargo bench`. 3 | 4 | use rustic::trace::*; 5 | 6 | use criterion::{criterion_group, criterion_main, Criterion}; 7 | 8 | pub fn criterion_benchmark(c: &mut Criterion) { 9 | let mut group = c.benchmark_group("Performance regression tests"); 10 | group.sample_size(10); 11 | group.bench_function("Startup time (GPU)", |b| { // 3.021s 12 | b.iter(|| trace_gpu("scenes/BreakTime.glb", None, setup_trace(1280, 720, 0))) 13 | }); 14 | group.bench_function("Startup time (CPU)", |b| { // 2.855s 15 | b.iter(|| trace_cpu("scenes/BreakTime.glb", None, setup_trace(1280, 720, 0))) 16 | }); 17 | group.bench_function("160 samples (GPU)", |b| { // 2.408s 18 | b.iter(|| trace_gpu("scenes/DarkCornell.glb", None, setup_trace(1280, 720, 160))) 19 | }); 20 | group.bench_function("32 samples (CPU)", |b| { // 12.891s 21 | b.iter(|| trace_cpu("scenes/DarkCornell.glb", None, setup_trace(1280, 720, 32))) 22 | }); 23 | group.finish(); 24 | } 25 | 26 | criterion_group!(benches, criterion_benchmark); 27 | criterion_main!(benches); 28 | -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | use spirv_builder::SpirvBuilder; 2 | 3 | #[cfg(feature = "oidn")] 4 | use std::{ 5 | env, 6 | path::{Path, PathBuf}, 7 | }; 8 | 9 | #[cfg(feature = "oidn")] 10 | fn get_output_path() -> PathBuf { 11 | let manifest_dir_string = env::var("CARGO_MANIFEST_DIR").unwrap(); 12 | let build_type = env::var("PROFILE").unwrap(); 13 | let path = Path::new(&manifest_dir_string) 14 | .join("target") 15 | .join(build_type); 16 | return PathBuf::from(path); 17 | } 18 | 19 | fn main() { 20 | SpirvBuilder::new("kernels", "spirv-unknown-vulkan1.1") 21 | .extra_arg("--no-spirt") 22 | .build() 23 | .expect("Kernel failed to compile"); 24 | 25 | #[cfg(feature = "oidn")] 26 | { 27 | let oidn_dir = std::env::var("OIDN_DIR").expect("OIDN_DIR environment variable not set. Please set this to the OIDN install directory root."); 28 | let oidn_path = Path::new(&oidn_dir).join("bin"); 29 | for entry in std::fs::read_dir(oidn_path).expect("Error finding OIDN binaries") { 30 | let path = entry.expect("Invalid path in OIDN binaries folder").path(); 31 | let file_name = path.file_name().unwrap().to_str().unwrap(); 32 | let mut output_path = get_output_path(); 33 | output_path.push(file_name); 34 | std::fs::copy(path, output_path).expect("Failed to copy OIDN binary"); 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /kernels/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "autocfg" 7 | version = "1.1.0" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 10 | 11 | [[package]] 12 | name = "bitflags" 13 | version = "1.3.2" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 16 | 17 | [[package]] 18 | name = "bytemuck" 19 | version = "1.13.1" 20 | source = "registry+https://github.com/rust-lang/crates.io-index" 21 | checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea" 22 | dependencies = [ 23 | "bytemuck_derive", 24 | ] 25 | 26 | [[package]] 27 | name = "bytemuck_derive" 28 | version = "1.4.1" 29 | source = "registry+https://github.com/rust-lang/crates.io-index" 30 | checksum = "fdde5c9cd29ebd706ce1b35600920a33550e402fc998a2e53ad3b42c3c47a192" 31 | dependencies = [ 32 | "proc-macro2", 33 | "quote", 34 | "syn 2.0.15", 35 | ] 36 | 37 | [[package]] 38 | name = "glam" 39 | version = "0.22.0" 40 | source = "registry+https://github.com/rust-lang/crates.io-index" 41 | checksum = "12f597d56c1bd55a811a1be189459e8fad2bbc272616375602443bdfb37fa774" 42 | dependencies = [ 43 | "bytemuck", 44 | "num-traits", 45 | ] 46 | 47 | [[package]] 48 | name = "kernels" 49 | version = "0.1.0" 50 | dependencies = [ 51 | "shared_structs", 52 | "spirv-std", 53 | ] 54 | 55 | [[package]] 56 | name = "libm" 57 | version = "0.2.6" 58 | source = "registry+https://github.com/rust-lang/crates.io-index" 59 | checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb" 60 | 61 | [[package]] 62 | name = "num-traits" 63 | version = "0.2.15" 64 | source = "registry+https://github.com/rust-lang/crates.io-index" 65 | checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" 66 | dependencies = [ 67 | "autocfg", 68 | "libm", 69 | ] 70 | 71 | [[package]] 72 | name = "proc-macro2" 73 | version = "1.0.56" 74 | source = "registry+https://github.com/rust-lang/crates.io-index" 75 | checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" 76 | dependencies = [ 77 | "unicode-ident", 78 | ] 79 | 80 | [[package]] 81 | name = "quote" 82 | version = "1.0.26" 83 | source = "registry+https://github.com/rust-lang/crates.io-index" 84 | checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" 85 | dependencies = [ 86 | "proc-macro2", 87 | ] 88 | 89 | [[package]] 90 | name = "shared_structs" 91 | version = "0.1.0" 92 | dependencies = [ 93 | "bytemuck", 94 | "glam", 95 | "spirv-std", 96 | ] 97 | 98 | [[package]] 99 | name = "spirv-std" 100 | version = "0.7.0" 101 | source = "registry+https://github.com/rust-lang/crates.io-index" 102 | checksum = "3197bd4c021c2dfc0f9dfb356312c8f7842d972d5545c308ad86422c2e2d3e66" 103 | dependencies = [ 104 | "bitflags", 105 | "glam", 106 | "num-traits", 107 | "spirv-std-macros", 108 | "spirv-std-types", 109 | ] 110 | 111 | [[package]] 112 | name = "spirv-std-macros" 113 | version = "0.7.0" 114 | source = "registry+https://github.com/rust-lang/crates.io-index" 115 | checksum = "bbaffad626ab9d3ac61c4b74b5d51cb52f1939a8041d7ac09ec828eb4ad44d72" 116 | dependencies = [ 117 | "proc-macro2", 118 | "quote", 119 | "spirv-std-types", 120 | "syn 1.0.109", 121 | ] 122 | 123 | [[package]] 124 | name = "spirv-std-types" 125 | version = "0.7.0" 126 | source = "registry+https://github.com/rust-lang/crates.io-index" 127 | checksum = "ab83875e851bc803c687024d2d950730f350c0073714b95b3a6b1d22e9eac42a" 128 | 129 | [[package]] 130 | name = "syn" 131 | version = "1.0.109" 132 | source = "registry+https://github.com/rust-lang/crates.io-index" 133 | checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" 134 | dependencies = [ 135 | "proc-macro2", 136 | "quote", 137 | "unicode-ident", 138 | ] 139 | 140 | [[package]] 141 | name = "syn" 142 | version = "2.0.15" 143 | source = "registry+https://github.com/rust-lang/crates.io-index" 144 | checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822" 145 | dependencies = [ 146 | "proc-macro2", 147 | "quote", 148 | "unicode-ident", 149 | ] 150 | 151 | [[package]] 152 | name = "unicode-ident" 153 | version = "1.0.8" 154 | source = "registry+https://github.com/rust-lang/crates.io-index" 155 | checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" 156 | -------------------------------------------------------------------------------- /kernels/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "kernels" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [lib] 7 | crate-type = ["dylib", "lib"] 8 | 9 | [dependencies] 10 | shared_structs = { path = "../shared_structs" } 11 | spirv-std = "0.7.0" -------------------------------------------------------------------------------- /kernels/src/bsdf.rs: -------------------------------------------------------------------------------- 1 | use shared_structs::{MaterialData, TracingConfig}; 2 | use spirv_std::{glam::{Vec3, Vec2, Vec4Swizzles}}; 3 | #[allow(unused_imports)] 4 | use spirv_std::num_traits::Float; 5 | 6 | use crate::{rng, util::{self}}; 7 | use shared_structs::{Image, Sampler}; 8 | 9 | type Spectrum = Vec3; 10 | 11 | #[derive(Default, Copy, Clone, PartialEq)] 12 | #[repr(u32)] 13 | pub enum LobeType { 14 | #[default] DiffuseReflection, 15 | SpecularReflection, 16 | #[allow(dead_code)] DiffuseTransmission, 17 | SpecularTransmission, 18 | } 19 | 20 | #[derive(Default, Copy, Clone)] 21 | pub struct BSDFSample { 22 | pub pdf: f32, 23 | pub sampled_lobe: LobeType, 24 | pub spectrum: Spectrum, 25 | pub sampled_direction: Vec3, 26 | } 27 | 28 | pub trait BSDF { 29 | fn evaluate( 30 | &self, 31 | view_direction: Vec3, 32 | normal: Vec3, 33 | sample_direction: Vec3, 34 | lobe_type: LobeType, 35 | ) -> Spectrum; 36 | fn sample(&self, view_direction: Vec3, normal: Vec3, rng: &mut rng::RngState) -> BSDFSample; 37 | fn pdf( 38 | &self, 39 | view_direction: Vec3, 40 | normal: Vec3, 41 | sample_direction: Vec3, 42 | lobe_type: LobeType, 43 | ) -> f32; 44 | } 45 | 46 | pub struct Lambertian { 47 | pub albedo: Spectrum, 48 | } 49 | 50 | impl Lambertian { 51 | fn pdf_fast(&self, cos_theta: f32) -> f32 { 52 | cos_theta / core::f32::consts::PI 53 | } 54 | 55 | fn evaluate_fast(&self, cos_theta: f32) -> Spectrum { 56 | self.albedo / core::f32::consts::PI * cos_theta 57 | } 58 | } 59 | 60 | impl BSDF for Lambertian { 61 | fn evaluate( 62 | &self, 63 | _view_direction: Vec3, 64 | normal: Vec3, 65 | sample_direction: Vec3, 66 | _lobe_type: LobeType, 67 | ) -> Spectrum { 68 | let cos_theta = normal.dot(sample_direction).max(0.0); 69 | self.evaluate_fast(cos_theta) 70 | } 71 | 72 | fn sample(&self, _view_direction: Vec3, normal: Vec3, rng: &mut rng::RngState) -> BSDFSample { 73 | let (up, nt, nb) = util::create_cartesian(normal); 74 | let rng_sample = rng.gen_r3(); 75 | let sample = util::cosine_sample_hemisphere(rng_sample.x, rng_sample.y); 76 | let sampled_direction = Vec3::new( 77 | sample.x * nb.x + sample.y * up.x + sample.z * nt.x, 78 | sample.x * nb.y + sample.y * up.y + sample.z * nt.y, 79 | sample.x * nb.z + sample.y * up.z + sample.z * nt.z, 80 | ) 81 | .normalize(); 82 | 83 | let sampled_lobe = LobeType::DiffuseReflection; 84 | let cos_theta = normal.dot(sampled_direction).max(0.0); 85 | let pdf = self.pdf_fast(cos_theta); 86 | let spectrum = self.evaluate_fast(cos_theta); 87 | BSDFSample { 88 | pdf, 89 | sampled_lobe, 90 | spectrum, 91 | sampled_direction, 92 | } 93 | } 94 | 95 | fn pdf( 96 | &self, 97 | _view_direction: Vec3, 98 | normal: Vec3, 99 | sample_direction: Vec3, 100 | _lobe_type: LobeType, 101 | ) -> f32 { 102 | let cos_theta = normal.dot(sample_direction).max(0.0); 103 | self.pdf_fast(cos_theta) 104 | } 105 | } 106 | 107 | pub struct Glass { 108 | pub albedo: Spectrum, 109 | pub ior: f32, 110 | pub roughness: f32, 111 | } 112 | 113 | impl BSDF for Glass { 114 | fn evaluate( 115 | &self, 116 | _view_direction: Vec3, 117 | _normal: Vec3, 118 | _sample_direction: Vec3, 119 | lobe_type: LobeType, 120 | ) -> Spectrum { 121 | if lobe_type == LobeType::SpecularReflection { 122 | Vec3::ONE // This is 1 because glass is fully non-metallic 123 | } else { 124 | self.albedo 125 | } 126 | } 127 | 128 | fn sample(&self, view_direction: Vec3, normal: Vec3, rng: &mut rng::RngState) -> BSDFSample { 129 | let rng_sample = rng.gen_r3(); 130 | 131 | let inside = normal.dot(view_direction) < 0.0; 132 | let normal = if inside { -normal } else { normal }; 133 | let in_ior = if inside { self.ior } else { 1.0 }; 134 | let out_ior = if inside { 1.0 } else { self.ior }; 135 | 136 | let microsurface_normal = util::sample_ggx_microsurface_normal(rng_sample.x, rng_sample.y, normal, self.roughness); 137 | let fresnel = util::fresnel_schlick_scalar(in_ior, out_ior, microsurface_normal.dot(view_direction).max(0.0)); 138 | if rng_sample.z <= fresnel { 139 | // Reflection 140 | let sampled_direction = (2.0 * view_direction.dot(microsurface_normal).abs() * microsurface_normal - view_direction).normalize(); 141 | let pdf = 1.0; 142 | let sampled_lobe = LobeType::SpecularReflection; 143 | let spectrum = Vec3::ONE; 144 | BSDFSample { 145 | pdf, 146 | sampled_lobe, 147 | spectrum, 148 | sampled_direction, 149 | } 150 | } else { 151 | // Refraction 152 | let eta = in_ior / out_ior; 153 | let c = view_direction.dot(microsurface_normal); 154 | let sampled_direction = ((eta * c - (view_direction.dot(normal)).signum() * (1.0 + eta * (c * c - 1.0)).max(0.0).sqrt()) * microsurface_normal - eta * view_direction).normalize(); 155 | let pdf = 1.0; 156 | let sampled_lobe = LobeType::SpecularTransmission; 157 | let spectrum = self.albedo; 158 | BSDFSample { 159 | pdf, 160 | sampled_lobe, 161 | spectrum, 162 | sampled_direction, 163 | } 164 | } 165 | } 166 | 167 | fn pdf( 168 | &self, 169 | _view_direction: Vec3, 170 | _normal: Vec3, 171 | _sample_direction: Vec3, 172 | _lobe_type: LobeType, 173 | ) -> f32 { 174 | 1.0 // Delta distribution 175 | } 176 | } 177 | 178 | // Assume IOR of 1.5 for dielectrics, which works well for most. 179 | const DIELECTRIC_IOR: f32 = 1.5; 180 | 181 | // Fresnel at normal incidence for dielectrics, with air as the other medium. 182 | const DIELECTRIC_F0_SQRT: f32 = (DIELECTRIC_IOR - 1.0) / (DIELECTRIC_IOR + 1.0); 183 | const DIELECTRIC_F0: f32 = DIELECTRIC_F0_SQRT * DIELECTRIC_F0_SQRT; 184 | 185 | pub struct PBR { 186 | pub albedo: Spectrum, 187 | pub roughness: f32, 188 | pub metallic: f32, 189 | pub specular_weight_clamp: Vec2, 190 | } 191 | 192 | impl PBR { 193 | fn evaluate_diffuse_fast( 194 | &self, 195 | cos_theta: f32, 196 | specular_weight: f32, 197 | ks: Vec3, 198 | ) -> Spectrum { 199 | let kd = (Vec3::splat(1.0) - ks) * (1.0 - self.metallic); 200 | let diffuse = kd * self.albedo / core::f32::consts::PI; 201 | diffuse * cos_theta / (1.0 - specular_weight) 202 | } 203 | 204 | fn evaluate_specular_fast( 205 | &self, 206 | view_direction: Vec3, 207 | normal: Vec3, 208 | sample_direction: Vec3, 209 | cos_theta: f32, 210 | d_term: f32, 211 | specular_weight: f32, 212 | ks: Vec3, 213 | ) -> Spectrum { 214 | let g_term = util::geometry_smith_schlick_ggx(normal, view_direction, sample_direction, self.roughness); 215 | let specular_numerator = d_term * g_term * ks; 216 | let specular_denominator = 4.0 * normal.dot(view_direction).max(0.0) * cos_theta; 217 | let specular = specular_numerator / specular_denominator.max(util::EPS); 218 | specular * cos_theta / specular_weight 219 | } 220 | 221 | fn pdf_diffuse_fast(&self, cos_theta: f32) -> f32 { 222 | cos_theta / core::f32::consts::PI 223 | } 224 | 225 | fn pdf_specular_fast( 226 | &self, 227 | view_direction: Vec3, 228 | normal: Vec3, 229 | halfway: Vec3, 230 | d_term: f32, 231 | ) -> f32 { 232 | (d_term * normal.dot(halfway)) / (4.0 * view_direction.dot(halfway)) 233 | } 234 | } 235 | 236 | impl BSDF for PBR { 237 | fn evaluate( 238 | &self, 239 | view_direction: Vec3, 240 | normal: Vec3, 241 | sample_direction: Vec3, 242 | lobe_type: LobeType, 243 | ) -> Spectrum { 244 | let approx_fresnel = util::fresnel_schlick_scalar(1.0, DIELECTRIC_IOR, normal.dot(view_direction).max(0.0)); 245 | let mut specular_weight = util::lerp(approx_fresnel, 1.0, self.metallic); 246 | if specular_weight != 0.0 && specular_weight != 1.0 { 247 | specular_weight = specular_weight.clamp(self.specular_weight_clamp.x, self.specular_weight_clamp.y); 248 | } 249 | 250 | let cos_theta = normal.dot(sample_direction).max(0.0); 251 | let halfway = (view_direction + sample_direction).normalize(); 252 | 253 | let f0 = Vec3::splat(DIELECTRIC_F0).lerp(self.albedo, self.metallic); 254 | let ks = util::fresnel_schlick(halfway.dot(view_direction).max(0.0), f0); 255 | 256 | if lobe_type == LobeType::DiffuseReflection { 257 | self.evaluate_diffuse_fast(cos_theta, specular_weight, ks) 258 | } else { 259 | let d_term = util::ggx_distribution(normal, halfway, self.roughness); 260 | self.evaluate_specular_fast( 261 | view_direction, 262 | normal, 263 | sample_direction, 264 | cos_theta, 265 | d_term, 266 | specular_weight, 267 | ks, 268 | ) 269 | } 270 | } 271 | 272 | fn sample(&self, view_direction: Vec3, normal: Vec3, rng: &mut rng::RngState) -> BSDFSample { 273 | let rng_sample = rng.gen_r3(); 274 | 275 | let approx_fresnel = util::fresnel_schlick_scalar(1.0, DIELECTRIC_IOR, normal.dot(view_direction).max(0.0)); 276 | let mut specular_weight = util::lerp(approx_fresnel, 1.0, self.metallic); 277 | // Clamp specular weight to prevent firelies. See Jakub Boksansky and Adam Marrs in RT gems 2 chapter 14. 278 | if specular_weight != 0.0 && specular_weight != 1.0 { 279 | specular_weight = specular_weight.clamp(self.specular_weight_clamp.x, self.specular_weight_clamp.y); 280 | } 281 | 282 | let (sampled_direction, sampled_lobe) = if rng_sample.z >= specular_weight { 283 | let (up, nt, nb) = util::create_cartesian(normal); 284 | let sample = util::cosine_sample_hemisphere(rng_sample.x, rng_sample.y); 285 | let sampled_direction = Vec3::new( 286 | sample.x * nb.x + sample.y * up.x + sample.z * nt.x, 287 | sample.x * nb.y + sample.y * up.y + sample.z * nt.y, 288 | sample.x * nb.z + sample.y * up.z + sample.z * nt.z, 289 | ) 290 | .normalize(); 291 | (sampled_direction, LobeType::DiffuseReflection) 292 | } else { 293 | let reflection_direction = util::reflect(-view_direction, normal); 294 | let sampled_direction = util::sample_ggx( 295 | rng_sample.x, 296 | rng_sample.y, 297 | reflection_direction, 298 | self.roughness, 299 | ); 300 | (sampled_direction, LobeType::SpecularReflection) 301 | }; 302 | 303 | let cos_theta = normal.dot(sampled_direction).max(util::EPS); 304 | let halfway = (view_direction + sampled_direction).normalize(); 305 | 306 | let f0 = Vec3::splat(DIELECTRIC_F0).lerp(self.albedo, self.metallic); 307 | let ks = util::fresnel_schlick(halfway.dot(view_direction).max(0.0), f0); 308 | 309 | let (sampled_direction, sampled_lobe, pdf, spectrum) = if sampled_lobe == LobeType::DiffuseReflection { 310 | let pdf = self.pdf_diffuse_fast(cos_theta); 311 | let spectrum = self.evaluate_diffuse_fast(cos_theta, specular_weight, ks); 312 | (sampled_direction, LobeType::DiffuseReflection, pdf, spectrum) 313 | } else { 314 | let d_term = util::ggx_distribution(normal, halfway, self.roughness); 315 | let pdf = self.pdf_specular_fast(view_direction, normal, halfway, d_term); 316 | let spectrum = self.evaluate_specular_fast( 317 | view_direction, 318 | normal, 319 | sampled_direction, 320 | cos_theta, 321 | d_term, 322 | specular_weight, 323 | ks, 324 | ); 325 | (sampled_direction, LobeType::SpecularReflection, pdf, spectrum) 326 | }; 327 | 328 | BSDFSample { 329 | pdf, 330 | sampled_lobe, 331 | spectrum, 332 | sampled_direction, 333 | } 334 | } 335 | 336 | fn pdf( 337 | &self, 338 | view_direction: Vec3, 339 | normal: Vec3, 340 | sample_direction: Vec3, 341 | lobe_type: LobeType, 342 | ) -> f32 { 343 | if lobe_type == LobeType::DiffuseReflection { 344 | let cos_theta = normal.dot(sample_direction).max(0.0); 345 | self.pdf_diffuse_fast(cos_theta) 346 | } else { 347 | let halfway = (view_direction + sample_direction).normalize(); 348 | let d_term = util::ggx_distribution(normal, halfway, self.roughness); 349 | self.pdf_specular_fast(view_direction, normal, halfway, d_term) 350 | } 351 | } 352 | } 353 | 354 | pub fn get_pbr_bsdf(config: &TracingConfig, material: &MaterialData, uv: Vec2, atlas: &Image!(2D, type=f32, sampled), sampler: &Sampler) -> PBR { 355 | let albedo = if material.has_albedo_texture() { 356 | let scaled_uv = material.albedo.xy() + uv * material.albedo.zw(); 357 | let albedo = atlas.sample_by_lod(*sampler, scaled_uv, 0.0); 358 | albedo.xyz() 359 | } else { 360 | material.albedo.xyz() 361 | }; 362 | let roughness = if material.has_roughness_texture() { 363 | let scaled_uv = material.roughness.xy() + uv * material.roughness.zw(); 364 | let roughness = atlas.sample_by_lod(*sampler, scaled_uv, 0.0); 365 | roughness.x 366 | } else { 367 | material.roughness.x 368 | }; 369 | let metallic = if material.has_metallic_texture() { 370 | let scaled_uv = material.metallic.xy() + uv * material.metallic.zw(); 371 | let metallic = atlas.sample_by_lod(*sampler, scaled_uv, 0.0); 372 | metallic.x 373 | } else { 374 | material.metallic.x 375 | }; 376 | 377 | // Clamp values to avoid NaNs :P 378 | let roughness = roughness.max(util::EPS); 379 | let metallic = metallic.min(1.0 - util::EPS); 380 | 381 | PBR { 382 | albedo, 383 | roughness, 384 | metallic, 385 | specular_weight_clamp: config.specular_weight_clamp, 386 | } 387 | } -------------------------------------------------------------------------------- /kernels/src/intersection.rs: -------------------------------------------------------------------------------- 1 | use shared_structs::{BVHNode, PerVertexData}; 2 | #[allow(unused_imports)] 3 | use spirv_std::num_traits::Float; 4 | use spirv_std::{glam::{UVec4, Vec4, Vec3, Vec4Swizzles}, num_traits::Signed}; 5 | 6 | use crate::vec::FixedVec; 7 | 8 | // Adapted from raytri.c 9 | fn muller_trumbore(ro: Vec3, rd: Vec3, a: Vec3, b: Vec3, c: Vec3, out_t: &mut f32, out_backface: &mut bool) -> bool 10 | { 11 | *out_t = 0.0; 12 | 13 | let edge1 = b - a; 14 | let edge2 = c - a; 15 | 16 | // begin calculating determinant - also used to calculate U parameter 17 | let pv = rd.cross(edge2); 18 | 19 | // if determinant is near zero, ray lies in plane of triangle 20 | let det = edge1.dot(pv); 21 | *out_backface = det.is_negative(); 22 | 23 | if det.abs() < 1e-6 { 24 | return false; 25 | } 26 | 27 | let inv_det = 1.0 / det; 28 | 29 | // calculate distance from vert0 to ray origin 30 | let tv = ro - a; 31 | 32 | // calculate U parameter and test bounds 33 | let u = tv.dot(pv) * inv_det; 34 | if u < 0.0 || u > 1.0 { 35 | return false; 36 | } 37 | 38 | // prepare to test V parameter 39 | let qv = tv.cross(edge1); 40 | 41 | // calculate V parameter and test bounds 42 | let v = rd.dot(qv) * inv_det; 43 | if v < 0.0 || u + v > 1.0 { 44 | return false; 45 | } 46 | 47 | let t = edge2.dot(qv) * inv_det; 48 | if t < 0.0 { 49 | return false; 50 | } 51 | *out_t = t; 52 | 53 | return true; 54 | } 55 | 56 | pub struct TraceResult { 57 | pub triangle: UVec4, 58 | pub triangle_index: u32, 59 | pub t: f32, 60 | pub hit: bool, 61 | pub backface: bool, 62 | } 63 | 64 | impl Default for TraceResult { 65 | fn default() -> Self { 66 | Self { 67 | triangle: UVec4::splat(0), 68 | triangle_index: 0, 69 | t: 1000000.0, 70 | hit: false, 71 | backface: false, 72 | } 73 | } 74 | } 75 | 76 | #[allow(dead_code)] 77 | fn intersect_slow_as_shit( 78 | vertex_buffer: &[Vec4], 79 | index_buffer: &[UVec4], 80 | ro: Vec3, 81 | rd: Vec3 82 | ) -> TraceResult { 83 | let mut result = TraceResult::default(); 84 | for i in 0..index_buffer.len() { 85 | let triangle = index_buffer[i]; 86 | let a = vertex_buffer[triangle.x as usize].xyz(); 87 | let b = vertex_buffer[triangle.y as usize].xyz(); 88 | let c = vertex_buffer[triangle.z as usize].xyz(); 89 | 90 | let mut t = 0.0; 91 | let mut backface = false; 92 | if muller_trumbore(ro, rd, a, b, c, &mut t, &mut backface) && t > 0.001 && t < result.t { 93 | result.triangle = triangle; 94 | result.triangle_index = i as u32; 95 | result.t = result.t.min(t); 96 | result.hit = true; 97 | result.backface = backface; 98 | } 99 | } 100 | result 101 | } 102 | 103 | // TODO: Optimize this 104 | fn intersect_aabb(aabb_min: Vec3, aabb_max: Vec3, ro: Vec3, rd: Vec3, prev_min_t: f32) -> f32 { 105 | let tx1 = (aabb_min.x - ro.x) / rd.x; 106 | let tx2 = (aabb_max.x - ro.x) / rd.x; 107 | let mut tmin = tx1.min(tx2); 108 | let mut tmax = tx1.max(tx2); 109 | let ty1 = (aabb_min.y - ro.y) / rd.y; 110 | let ty2 = (aabb_max.y - ro.y) / rd.y; 111 | tmin = tmin.max(ty1.min(ty2)); 112 | tmax = tmax.min(ty1.max(ty2)); 113 | let tz1 = (aabb_min.z - ro.z) / rd.z; 114 | let tz2 = (aabb_max.z - ro.z) / rd.z; 115 | tmin = tmin.max(tz1.min(tz2)); 116 | tmax = tmax.min(tz1.max(tz2)); 117 | if tmax >= tmin && tmax > 0.0 && tmin < prev_min_t { 118 | tmin 119 | } else { 120 | f32::INFINITY 121 | } 122 | } 123 | 124 | pub struct BVHReference<'a> { 125 | pub nodes: &'a [BVHNode], 126 | } 127 | 128 | impl<'a> BVHReference<'a> { 129 | #[allow(dead_code)] 130 | pub fn intersect_fixed_order(&self, vertex_buffer: &[Vec4], index_buffer: &[UVec4], ro: Vec3, rd: Vec3) -> TraceResult { 131 | let mut stack = FixedVec::::new(); 132 | stack.push(0); 133 | 134 | let mut result = TraceResult::default(); 135 | while !stack.is_empty() { 136 | let node_index = stack.pop().unwrap(); 137 | let node = &self.nodes[node_index]; 138 | if intersect_aabb(node.aabb_min(), node.aabb_max(), ro, rd, result.t).is_infinite() { 139 | continue; 140 | } 141 | 142 | if node.is_leaf() { 143 | for i in 0..node.triangle_count() { 144 | let triangle_index = node.first_triangle_index() + i; 145 | let triangle = index_buffer[triangle_index as usize]; 146 | let a = vertex_buffer[triangle.x as usize].xyz(); 147 | let b = vertex_buffer[triangle.y as usize].xyz(); 148 | let c = vertex_buffer[triangle.z as usize].xyz(); 149 | 150 | let mut t = 0.0; 151 | let mut backface = false; 152 | if muller_trumbore(ro, rd, a, b, c, &mut t, &mut backface) && t > 0.001 && t < result.t { 153 | result.triangle = triangle; 154 | result.triangle_index = triangle_index; 155 | result.t = result.t.min(t); 156 | result.hit = true; 157 | result.backface = backface; 158 | } 159 | } 160 | } else { 161 | stack.push(node.right_node_index() as usize); 162 | stack.push(node.left_node_index() as usize); 163 | } 164 | } 165 | 166 | result 167 | } 168 | 169 | pub fn intersect_nearest(&self, per_vertex_buffer: &[PerVertexData], index_buffer: &[UVec4], ro: Vec3, rd: Vec3) -> TraceResult { 170 | self.intersect_front_to_back::(per_vertex_buffer, index_buffer, ro, rd, 0.0) 171 | } 172 | 173 | pub fn intersect_any(&self, per_vertex_buffer: &[PerVertexData], index_buffer: &[UVec4], ro: Vec3, rd: Vec3, max_t: f32) -> TraceResult { 174 | self.intersect_front_to_back::(per_vertex_buffer, index_buffer, ro, rd, max_t) 175 | } 176 | 177 | fn intersect_front_to_back(&self, per_vertex_buffer: &[PerVertexData], index_buffer: &[UVec4], ro: Vec3, rd: Vec3, max_t: f32) -> TraceResult { 178 | let mut stack = FixedVec::::new(); 179 | stack.push(0); 180 | 181 | let mut result = TraceResult::default(); 182 | while !stack.is_empty() { 183 | let node_index = stack.pop().unwrap(); 184 | let node = &self.nodes[node_index]; 185 | if node.is_leaf() { 186 | for i in 0..node.triangle_count() { 187 | let triangle_index = node.first_triangle_index() + i; 188 | let triangle = index_buffer[triangle_index as usize]; 189 | let a = per_vertex_buffer[triangle.x as usize].vertex.xyz(); 190 | let b = per_vertex_buffer[triangle.y as usize].vertex.xyz(); 191 | let c = per_vertex_buffer[triangle.z as usize].vertex.xyz(); 192 | 193 | let mut t = 0.0; 194 | let mut backface = false; 195 | if muller_trumbore(ro, rd, a, b, c, &mut t, &mut backface) && t > 0.001 && t < result.t && (NEAREST_HIT || t <= max_t) { 196 | result.triangle = triangle; 197 | result.triangle_index = triangle_index; 198 | result.t = result.t.min(t); 199 | result.hit = true; 200 | result.backface = backface; 201 | if !NEAREST_HIT { 202 | return result; 203 | } 204 | } 205 | } 206 | } else { 207 | // find closest child 208 | let mut min_index = node.left_node_index() as usize; 209 | let mut max_index = node.right_node_index() as usize; 210 | let mut min_child = &self.nodes[min_index]; 211 | let mut max_child = &self.nodes[max_index]; 212 | let mut min_dist = intersect_aabb(min_child.aabb_min(), min_child.aabb_max(), ro, rd, result.t); 213 | let mut max_dist = intersect_aabb(max_child.aabb_min(), max_child.aabb_max(), ro, rd, result.t); 214 | if min_dist > max_dist { 215 | core::mem::swap(&mut min_index, &mut max_index); 216 | core::mem::swap(&mut min_dist, &mut max_dist); 217 | core::mem::swap(&mut min_child, &mut max_child); 218 | } 219 | 220 | // if min child isn't hit, both children aren't hit, so skip 221 | if min_dist.is_infinite() { 222 | continue; 223 | } 224 | 225 | // push valid children in the best order 226 | if max_dist.is_finite() { 227 | stack.push(max_index); 228 | } 229 | stack.push(min_index); // <-- this child will be popped first 230 | } 231 | } 232 | 233 | result 234 | } 235 | } 236 | -------------------------------------------------------------------------------- /kernels/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(target_arch = "spirv", no_std)] 2 | 3 | use bsdf::BSDF; 4 | use glam::*; 5 | use intersection::BVHReference; 6 | use shared_structs::{Image, Sampler}; 7 | use shared_structs::{TracingConfig, BVHNode, MaterialData, PerVertexData, LightPickEntry, NextEventEstimation}; 8 | #[allow(unused_imports)] 9 | use spirv_std::num_traits::Float; 10 | use spirv_std::{glam, spirv}; 11 | 12 | mod bsdf; 13 | mod rng; 14 | mod util; 15 | mod intersection; 16 | mod vec; 17 | mod skybox; 18 | mod light_pick; 19 | 20 | #[cfg_attr(target_arch = "spirv", inline(always))] 21 | pub fn trace_pixel( 22 | id: UVec3, 23 | config: &TracingConfig, 24 | rng: UVec2, 25 | per_vertex_buffer: &[PerVertexData], 26 | index_buffer: &[UVec4], 27 | nodes_buffer: &[BVHNode], 28 | material_data_buffer: &[MaterialData], 29 | light_pick_buffer: &[LightPickEntry], 30 | sampler: &Sampler, 31 | atlas: &Image!(2D, type=f32, sampled), 32 | skybox: &Image!(2D, type=f32, sampled), 33 | ) -> (Vec4, UVec2) { 34 | let nee_mode = NextEventEstimation::from_u32(config.nee); 35 | let nee = nee_mode.uses_nee(); 36 | let mut rng_state = rng::RngState::new(rng); 37 | 38 | // Get anti-aliased pixel coordinates. 39 | let suv = id.xy().as_vec2() + rng_state.gen_r2(); 40 | let mut uv = Vec2::new( 41 | suv.x as f32 / config.width as f32, 42 | 1.0 - suv.y as f32 / config.height as f32, 43 | ) * 2.0 44 | - 1.0; 45 | uv.y *= config.height as f32 / config.width as f32; 46 | 47 | // Setup camera. 48 | let mut ray_origin = config.cam_position.xyz(); 49 | let mut ray_direction = Vec3::new(uv.x, uv.y, 1.0).normalize(); 50 | let euler_mat = Mat3::from_rotation_y(config.cam_rotation.y) * Mat3::from_rotation_x(config.cam_rotation.x); 51 | ray_direction = euler_mat * ray_direction; 52 | 53 | let bvh = BVHReference { 54 | nodes: nodes_buffer, 55 | }; 56 | 57 | let mut throughput = Vec3::ONE; 58 | let mut radiance = Vec3::ZERO; 59 | let mut last_bsdf_sample = bsdf::BSDFSample::default(); 60 | let mut last_light_sample = light_pick::DirectLightSample::default(); 61 | 62 | for bounce in 0..config.max_bounces { 63 | let trace_result = bvh.intersect_nearest(per_vertex_buffer, index_buffer, ray_origin, ray_direction); 64 | let hit = ray_origin + ray_direction * trace_result.t; 65 | 66 | if !trace_result.hit { 67 | if config.has_skybox == 0 { 68 | // Fallback to procedural skybox 69 | radiance += throughput * skybox::scatter(config.sun_direction, ray_origin, ray_direction); 70 | } else { 71 | // Read skybox from image 72 | let rotation = config.sun_direction.z.atan2(config.sun_direction.x); 73 | let rotated = Mat3::from_rotation_y(rotation) * ray_direction; 74 | let u = 0.5 + rotated.z.atan2(rotated.x) / (2.0 * core::f32::consts::PI); 75 | let v = 1.0 - (0.5 + rotated.y.asin() / core::f32::consts::PI); 76 | let intensity = config.sun_direction.w * (1.0 / 15.0); 77 | radiance += throughput * skybox.sample_by_lod(*sampler, Vec2::new(u, v), 0.0).xyz() * intensity; 78 | } 79 | break; 80 | } else { 81 | // Get material 82 | let material_index = trace_result.triangle.w; 83 | let material = material_data_buffer[material_index as usize]; 84 | 85 | // Add emission 86 | if material.emissive.xyz() != Vec3::ZERO { 87 | // Emissive triangles are single-sided 88 | if trace_result.backface { 89 | break; // Break since emissives don't bounce light 90 | } 91 | 92 | // We want to add emissive contribution if: 93 | // - We are not doing NEE at all. 94 | // - This is the first bounce (so light sources don't look black). 95 | // - This is a non-diffuse bounce (so we don't double count emissive light). 96 | // AND we aren't hitting a backface (to match direct light sampling behavior). 97 | if !nee || bounce == 0 || last_bsdf_sample.sampled_lobe != bsdf::LobeType::DiffuseReflection { 98 | radiance += util::mask_nan(throughput * material.emissive.xyz()); 99 | break; 100 | } 101 | 102 | // If we have hit a light source, and we are using NEE with MIS, we use last bounces data 103 | // to add the BSDF contribution, weighted by MIS. 104 | if nee_mode.uses_mis() && last_bsdf_sample.sampled_lobe == bsdf::LobeType::DiffuseReflection { 105 | let direct_contribution = light_pick::calculate_bsdf_mis_contribution(&trace_result, &last_bsdf_sample, &last_light_sample); 106 | radiance += util::mask_nan(direct_contribution); 107 | break; 108 | } 109 | } 110 | 111 | // Interpolate vertex data 112 | let vertex_data_a = per_vertex_buffer[trace_result.triangle.x as usize]; 113 | let vertex_data_b = per_vertex_buffer[trace_result.triangle.y as usize]; 114 | let vertex_data_c = per_vertex_buffer[trace_result.triangle.z as usize]; 115 | let vert_a = vertex_data_a.vertex.xyz(); 116 | let vert_b = vertex_data_b.vertex.xyz(); 117 | let vert_c = vertex_data_c.vertex.xyz(); 118 | let norm_a = vertex_data_a.normal.xyz(); 119 | let norm_b = vertex_data_b.normal.xyz(); 120 | let norm_c = vertex_data_c.normal.xyz(); 121 | let uv_a = vertex_data_a.uv0; 122 | let uv_b = vertex_data_b.uv0; 123 | let uv_c = vertex_data_c.uv0; 124 | let bary = util::barycentric(hit, vert_a, vert_b, vert_c); 125 | let mut normal = bary.x * norm_a + bary.y * norm_b + bary.z * norm_c; 126 | let mut uv = bary.x * uv_a + bary.y * uv_b + bary.z * uv_c; 127 | if uv.clamp(Vec2::ZERO, Vec2::ONE) != uv { 128 | uv = uv.fract(); // wrap UVs 129 | } 130 | 131 | // Apply normal map 132 | if material.has_normal_texture() { 133 | let scaled_uv = material.normals.xy() + uv * material.normals.zw(); 134 | let normal_map = atlas.sample_by_lod(*sampler, scaled_uv, 0.0) * 2.0 - 1.0; 135 | let tangent_a = vertex_data_a.tangent.xyz(); 136 | let tangent_b = vertex_data_b.tangent.xyz(); 137 | let tangent_c = vertex_data_c.tangent.xyz(); 138 | let tangent = bary.x * tangent_a + bary.y * tangent_b + bary.z * tangent_c; 139 | let tbn = Mat3::from_cols(tangent, tangent.cross(normal), normal); 140 | normal = (tbn * normal_map.xyz()).normalize(); 141 | } 142 | 143 | // Sample BSDF 144 | let bsdf = bsdf::get_pbr_bsdf(config, &material, uv, atlas, sampler); 145 | let bsdf_sample = bsdf.sample(-ray_direction, normal, &mut rng_state); 146 | last_bsdf_sample = bsdf_sample; 147 | 148 | // Sample lights directly 149 | if nee && bsdf_sample.sampled_lobe == bsdf::LobeType::DiffuseReflection { 150 | last_light_sample = light_pick::sample_direct_lighting( 151 | nee_mode, 152 | index_buffer, 153 | per_vertex_buffer, 154 | material_data_buffer, 155 | light_pick_buffer, 156 | &bvh, 157 | throughput, 158 | &bsdf, 159 | hit, 160 | normal, 161 | ray_direction, 162 | &mut rng_state 163 | ); 164 | radiance += util::mask_nan(last_light_sample.direct_light_contribution); 165 | } 166 | 167 | // Attenuate by BSDF 168 | throughput *= bsdf_sample.spectrum / bsdf_sample.pdf; 169 | 170 | // Update ray 171 | ray_direction = bsdf_sample.sampled_direction; 172 | ray_origin = hit + ray_direction * util::EPS; 173 | 174 | // Russian roulette 175 | if bounce > config.min_bounces { 176 | let prob = throughput.max_element(); 177 | if rng_state.gen_r1() > prob { 178 | break; 179 | } 180 | throughput *= 1.0 / prob; 181 | } 182 | } 183 | } 184 | 185 | (radiance.extend(1.0), rng_state.next_state()) 186 | } 187 | 188 | 189 | #[spirv(compute(threads(8, 8, 1)))] 190 | pub fn trace_kernel( 191 | #[spirv(global_invocation_id)] id: UVec3, 192 | #[spirv(uniform, descriptor_set = 0, binding = 0)] config: &TracingConfig, 193 | #[spirv(storage_buffer, descriptor_set = 0, binding = 1)] rng: &mut [UVec2], 194 | #[spirv(storage_buffer, descriptor_set = 0, binding = 2)] output: &mut [Vec4], 195 | #[spirv(storage_buffer, descriptor_set = 0, binding = 3)] per_vertex_buffer: &[PerVertexData], 196 | #[spirv(storage_buffer, descriptor_set = 0, binding = 4)] index_buffer: &[UVec4], 197 | #[spirv(storage_buffer, descriptor_set = 0, binding = 5)] nodes_buffer: &[BVHNode], 198 | #[spirv(storage_buffer, descriptor_set = 0, binding = 6)] material_data_buffer: &[MaterialData], 199 | #[spirv(storage_buffer, descriptor_set = 0, binding = 7)] light_pick_buffer: &[LightPickEntry], 200 | #[spirv(descriptor_set = 0, binding = 8)] sampler: &Sampler, 201 | #[spirv(descriptor_set = 0, binding = 9)] atlas: &Image!(2D, type=f32, sampled), 202 | #[spirv(descriptor_set = 0, binding = 10)] skybox: &Image!(2D, type=f32, sampled), 203 | ) { 204 | // Handle non-divisible workgroup sizes. 205 | if id.x > config.width || id.y > config.height { 206 | return; 207 | } 208 | 209 | let index = (id.y * config.width + id.x) as usize; 210 | 211 | let (radiance, rng_state) = trace_pixel( 212 | id, 213 | config, 214 | rng[index], 215 | per_vertex_buffer, 216 | index_buffer, 217 | nodes_buffer, 218 | material_data_buffer, 219 | light_pick_buffer, 220 | sampler, 221 | atlas, 222 | skybox, 223 | ); 224 | 225 | output[index] += radiance; 226 | rng[index] = rng_state; 227 | } 228 | -------------------------------------------------------------------------------- /kernels/src/light_pick.rs: -------------------------------------------------------------------------------- 1 | use shared_structs::{LightPickEntry, PerVertexData, MaterialData, NextEventEstimation}; 2 | use spirv_std::glam::{Vec3, UVec4, Vec4Swizzles}; 3 | #[allow(unused_imports)] 4 | use spirv_std::num_traits::Float; 5 | 6 | use crate::{rng::RngState, util, bsdf::{self, BSDF}, intersection::{BVHReference, self}}; 7 | 8 | pub fn pick_light(table: &[LightPickEntry], rng_state: &mut RngState) -> (u32, f32, f32) { 9 | let rng = rng_state.gen_r2(); 10 | let entry = table[(rng.x * table.len() as f32) as usize]; 11 | if rng.y < entry.ratio { 12 | (entry.triangle_index_a, entry.triangle_area_a, entry.triangle_pick_pdf_a) 13 | } else { 14 | (entry.triangle_index_b, entry.triangle_area_b, entry.triangle_pick_pdf_b) 15 | } 16 | } 17 | 18 | // https://www.cs.princeton.edu/~funk/tog02.pdf equation 1 19 | pub fn pick_triangle_point(a: Vec3, b: Vec3, c: Vec3, rng_state: &mut RngState) -> Vec3 { 20 | let rng = rng_state.gen_r2(); 21 | let r1_sqrt = rng.x.sqrt(); 22 | (1.0 - r1_sqrt) * a + (r1_sqrt * (1.0 - rng.y)) * b + (r1_sqrt * rng.y) * c 23 | } 24 | 25 | // PDF of picking a point on a light source w.r.t area 26 | // - light_area is the area of the light source 27 | // - light_distance is the distance from the chosen point to the point being shaded 28 | // - light_normal is the normal of the light source at the chosen point 29 | // - light_direction is the direction from the light source to the point being shaded 30 | pub fn calculate_light_pdf(light_area: f32, light_distance: f32, light_normal: Vec3, light_direction: Vec3) -> f32 { 31 | /* This warrants some explanation for my future dumb self: 32 | (In case anyone but me reads this, I use "mathover" VSCode extension to render the LaTeX inline) 33 | When we estimate the rendering equation by monte carlo integration, we typically integrate over the solid angle domain, 34 | but with direct light sampling, we use the (surface) area domain of the light sources, and need to convert between the 2. 35 | 36 | An integral for direct lighting can be written as so: 37 | # Math: \int_{\Omega_d} f_r(x, w_i, w_o) L_i(x, w_i) w_i \cdot w_n dw 38 | Where \Omega_d crucially denotes only the part of the hemisphere - the part which the visible lights project onto. 39 | This is similar to the rendering equation, except we know that the incoming ray is coming from a light source. 40 | 41 | We can instead integrate over area domain like this: 42 | # Math: \int_\triangle f_r(x, w_i, w_o) L_i(x, w_i) w_i \cdot w_n \frac{-w_i \cdot n}{r^2} dA 43 | Where r is the distance to the light source, n is the surface normal of the light source, and \triangle denotes the 44 | domain of light source surface area. dA is thus projected differential area, centered around the point on the light source which we hit. 45 | This uses the fact that: 46 | # Math: dA = r^2 \frac{1}{-w \cdot n} dw = \frac{r^2}{-w \cdot n} dw 47 | Which can be rewritten to: 48 | # Math: dw = \frac{-w \cdot n}{r^2} dA 49 | The intuition behind this is fairly straight forward. As we move the light source further from the shading point, 50 | by a distance of r, the projected area grows by a factor of r^2. This is the inverse square law, and the reason for the r^2 term. 51 | As we tilt the light source away from the shading point, the projected area also grows. The factor with which it grows is determined 52 | by the cosine of the angle between the surface normal on the light source and the negated incoming light direction. As the angle grows, 53 | the cosine shrinks. Since the area should grow and not shrink when this happens, we have the \frac{1}{-w_i \cdot n} term. Since both 54 | vectors are normalized, that dot product is exactly the cosine of the angle. See https://arxiv.org/abs/1205.4447 for more details. 55 | 56 | We can write out an estimator for our integral over area domain, assuming uniform sampling over the surface of the light source: 57 | # Math: \frac{1}{n}\sum^n \frac{f_r(x, w_i, w_o) L_i(x, w_i) w_i \cdot w_n \frac{-w_i \cdot n}{r^2}}{\frac{1}{|\triangle|}} 58 | Where |\triangle| is the surface area of the light source. 59 | We can further simplify to: 60 | # Math: \frac{1}{n}\sum^n f_r(x, w_i, w_o) L_i(x, w_i) w_i \cdot w_n \frac{(-w_i \cdot n) |\triangle|}{r^2} 61 | Note now that this last part: 62 | # Math: \frac{(-w_i \cdot n) |\triangle|}{r^2} 63 | Is precisely the reciprocal of the formula you see written out in code below. The reason I use the reciprocal 64 | is because I am dividing rather than multiplying this weighting term. It's an implementation detail. 65 | 66 | Note what this tell us about direct light sampling - to evaluate direct lighting, for a given bounce, we choose a direction towards 67 | a light source, and multiply the incoming radiance (light source emission) by the BRDF, by the regular cosine term (often folded into the BRDF), 68 | and by the weighting factor described just above - there are 2 cosine at terms at play! 69 | 70 | This explanation doesn't include visibility checks or weight for multiple different sources, though, so let me briefly describe. 71 | When we don't pass a visibility check (ie. the chosen light point is occluded), we simply don't add the contribution, since the 72 | probability of hitting that point is 0. When we have multiple light sources, we simply pick one at random and divide the contribution 73 | by the probability of picking the given light source. This is just splitting the estimator into multiple addends. */ 74 | let cos_theta = light_normal.dot(-light_direction); 75 | if cos_theta <= 0.0 { 76 | return 0.0; 77 | } 78 | light_distance.powi(2) / (light_area * cos_theta) 79 | } 80 | 81 | pub fn get_weight(nee_mode: NextEventEstimation, p1: f32, p2: f32) -> f32 { 82 | match nee_mode { 83 | NextEventEstimation::None => 1.0, 84 | NextEventEstimation::MultipleImportanceSampling => util::power_heuristic(p1, p2), 85 | NextEventEstimation::DirectLightSampling => 1.0, 86 | } 87 | } 88 | 89 | #[derive(Default, Copy, Clone)] 90 | pub struct DirectLightSample { 91 | pub light_area: f32, 92 | pub light_normal: Vec3, 93 | pub light_pick_pdf: f32, 94 | pub light_emission: Vec3, 95 | pub light_triangle_index: u32, 96 | pub throughput: Vec3, 97 | pub direct_light_contribution: Vec3, 98 | } 99 | 100 | pub fn sample_direct_lighting( 101 | nee_mode: NextEventEstimation, 102 | index_buffer: &[UVec4], 103 | per_vertex_buffer: &[PerVertexData], 104 | material_data_buffer: &[MaterialData], 105 | light_pick_buffer: &[LightPickEntry], 106 | bvh: &BVHReference, 107 | throughput: Vec3, 108 | surface_bsdf: &impl BSDF, 109 | surface_point: Vec3, 110 | surface_normal: Vec3, 111 | ray_direction: Vec3, 112 | rng_state: &mut RngState, 113 | ) -> DirectLightSample { 114 | // If the first entry is a sentinel, there are no lights 115 | let mut info = DirectLightSample::default(); 116 | if light_pick_buffer[0].is_sentinel() { 117 | return info; 118 | } 119 | 120 | // Pick a light, get its surface properties 121 | let (light_index, light_area, light_pick_pdf) = pick_light(&light_pick_buffer, rng_state); 122 | let light_triangle = index_buffer[light_index as usize]; 123 | let light_vert_a = per_vertex_buffer[light_triangle.x as usize].vertex.xyz(); 124 | let light_vert_b = per_vertex_buffer[light_triangle.y as usize].vertex.xyz(); 125 | let light_vert_c = per_vertex_buffer[light_triangle.z as usize].vertex.xyz(); 126 | let light_norm_a = per_vertex_buffer[light_triangle.x as usize].normal.xyz(); 127 | let light_norm_b = per_vertex_buffer[light_triangle.y as usize].normal.xyz(); 128 | let light_norm_c = per_vertex_buffer[light_triangle.z as usize].normal.xyz(); 129 | let light_normal = (light_norm_a + light_norm_b + light_norm_c) / 3.0; // lights can use flat shading, no need to pay for interpolation 130 | let light_material = material_data_buffer[light_triangle.w as usize]; 131 | let light_emission = light_material.emissive.xyz(); 132 | 133 | // Pick a point on the light 134 | let light_point = pick_triangle_point(light_vert_a, light_vert_b, light_vert_c, rng_state); 135 | let light_direction_unorm = light_point - surface_point; 136 | let light_distance = light_direction_unorm.length(); 137 | let light_direction = light_direction_unorm / light_distance; 138 | 139 | // Sample the light directly using MIS 140 | let mut direct = Vec3::ZERO; 141 | let light_trace = bvh.intersect_any( 142 | per_vertex_buffer, 143 | index_buffer, 144 | surface_point + light_direction * util::EPS, 145 | light_direction, 146 | light_distance - util::EPS * 2.0, 147 | ); 148 | if !light_trace.hit { 149 | // Calculate light pdf for this sample 150 | let light_pdf = calculate_light_pdf(light_area, light_distance, light_normal, light_direction); 151 | if light_pdf > 0.0 { 152 | // Calculate BSDF attenuation for this sample 153 | let bsdf_attenuation = surface_bsdf.evaluate(-ray_direction, surface_normal, light_direction, bsdf::LobeType::DiffuseReflection); 154 | // Calculate BSDF pdf for this sample 155 | let bsdf_pdf = surface_bsdf.pdf(-ray_direction, surface_normal, light_direction, bsdf::LobeType::DiffuseReflection); 156 | if bsdf_pdf > 0.0 { 157 | // MIS - add the weighted sample 158 | let weight = get_weight(nee_mode, light_pdf, bsdf_pdf); 159 | direct = (bsdf_attenuation * light_emission * weight / light_pdf) / light_pick_pdf; 160 | } 161 | } 162 | } 163 | 164 | // Write out data for the next bounce to use 165 | info.light_area = light_area; 166 | info.light_normal = light_normal; 167 | info.light_pick_pdf = light_pick_pdf; 168 | info.light_emission = light_emission; 169 | info.light_triangle_index = light_index; 170 | info.throughput = throughput; 171 | info.direct_light_contribution = throughput * direct; 172 | info 173 | } 174 | 175 | // If this is being called, the assumption is that: 176 | // - We are using NEE with MIS 177 | // - We have hit a light source 178 | // - That last bounce was diffuse, so we did direct light sampling 179 | pub fn calculate_bsdf_mis_contribution( 180 | trace_result: &intersection::TraceResult, 181 | last_bsdf_sample: &bsdf::BSDFSample, 182 | last_light_sample: &DirectLightSample 183 | ) -> Vec3 { 184 | // If we haven't hit the same light as we sampled directly, no contribution 185 | if trace_result.triangle_index != last_light_sample.light_triangle_index { 186 | return Vec3::ZERO; 187 | } 188 | 189 | // Calculate the light pdf for this sample 190 | let light_pdf = calculate_light_pdf(last_light_sample.light_area, trace_result.t, last_light_sample.light_normal, last_bsdf_sample.sampled_direction); 191 | if light_pdf > 0.0 { 192 | // MIS - add the weighted sample 193 | let weight = get_weight(NextEventEstimation::MultipleImportanceSampling, last_bsdf_sample.pdf, light_pdf); 194 | let direct = (last_bsdf_sample.spectrum * last_light_sample.light_emission * weight / last_bsdf_sample.pdf) / last_light_sample.light_pick_pdf; 195 | last_light_sample.throughput * direct 196 | } else { 197 | Vec3::ZERO 198 | } 199 | } -------------------------------------------------------------------------------- /kernels/src/rng.rs: -------------------------------------------------------------------------------- 1 | use spirv_std::glam::{UVec2, Vec2, Vec3}; 2 | 3 | #[allow(dead_code)] 4 | #[cfg(target_arch = "spirv")] 5 | pub fn pcg_hash(input: u32) -> u32 { 6 | let state = input * 747796405u32 + 2891336453u32; 7 | let word = ((state >> ((state >> 28u32) + 4u32)) ^ state) * 277803737u32; 8 | (word >> 22u32) ^ word 9 | } 10 | 11 | #[allow(dead_code)] 12 | #[cfg(not(target_arch = "spirv"))] 13 | pub fn pcg_hash(input: u32) -> u32 { 14 | let state = input.overflowing_mul(747796405u32).0.overflowing_add(2891336453u32).0; 15 | let word = ((state >> ((state >> 28u32) + 4u32)) ^ state).overflowing_mul(277803737u32).0; 16 | (word >> 22u32) ^ word 17 | } 18 | 19 | // From loicvdbruh: https://www.shadertoy.com/view/NlGXzz. Square roots of primes. 20 | const LDS_MAX_DIMENSIONS: usize = 32; 21 | const LDS_PRIMES: [u32; LDS_MAX_DIMENSIONS] = [ 22 | 0x6a09e667u32, 0xbb67ae84u32, 0x3c6ef372u32, 0xa54ff539u32, 0x510e527fu32, 0x9b05688au32, 0x1f83d9abu32, 0x5be0cd18u32, 23 | 0xcbbb9d5cu32, 0x629a2929u32, 0x91590159u32, 0x452fecd8u32, 0x67332667u32, 0x8eb44a86u32, 0xdb0c2e0bu32, 0x47b5481du32, 24 | 0xae5f9155u32, 0xcf6c85d1u32, 0x2f73477du32, 0x6d1826cau32, 0x8b43d455u32, 0xe360b595u32, 0x1c456002u32, 0x6f196330u32, 25 | 0xd94ebeafu32, 0x9cc4a611u32, 0x261dc1f2u32, 0x5815a7bdu32, 0x70b7ed67u32, 0xa1513c68u32, 0x44f93634u32, 0x720dcdfcu32 26 | ]; 27 | 28 | // http://extremelearning.com.au/unreasonable-effectiveness-of-quasirandom-sequences/ 29 | pub fn lds(n: u32, dimension: usize, offset: u32) -> f32 { 30 | const INV_U32_MAX_FLOAT: f32 = 1.0 / 4294967296.0; 31 | (LDS_PRIMES[dimension].wrapping_mul(n.wrapping_add(offset))) as f32 * INV_U32_MAX_FLOAT 32 | } 33 | 34 | pub struct RngState { 35 | state: UVec2, 36 | dimension: usize, 37 | } 38 | 39 | impl RngState { 40 | pub fn new(state: UVec2) -> Self { 41 | Self { 42 | state, 43 | dimension: 0, 44 | } 45 | } 46 | 47 | pub fn next_state(&self) -> UVec2 { 48 | UVec2::new(self.state.x + 1, self.state.y) 49 | } 50 | 51 | pub fn gen_r1(&mut self) -> f32 { 52 | self.dimension += 1; 53 | lds(self.state.x, self.dimension, self.state.y) 54 | } 55 | 56 | pub fn gen_r2(&mut self) -> Vec2 { 57 | Vec2::new(self.gen_r1(), self.gen_r1()) 58 | } 59 | 60 | pub fn gen_r3(&mut self) -> Vec3 { 61 | Vec3::new(self.gen_r1(), self.gen_r1(), self.gen_r1()) 62 | } 63 | } -------------------------------------------------------------------------------- /kernels/src/skybox.rs: -------------------------------------------------------------------------------- 1 | use spirv_std::glam::{Vec2, Vec3, Vec4, Vec4Swizzles}; 2 | #[allow(unused_imports)] 3 | use spirv_std::num_traits::Float; 4 | 5 | use crate::util; 6 | 7 | // Constants 8 | const RAY_SCATTER_COEFF: Vec3 = Vec3::new(58e-7, 135e-7, 331e-7); 9 | const RAY_EFFECTIVE_COEFF: Vec3 = RAY_SCATTER_COEFF; // Rayleight doesn't absorb light 10 | const MIE_SCATTER_COEFF: Vec3 = Vec3::new(2e-5, 2e-5, 2e-5); 11 | const MIE_EFFECTIVE_COEFF: Vec3 = Vec3::new(2e-5 * 1.1, 2e-5 * 1.1, 2e-5 * 1.1); // Approximate absorption as a factor of scattering 12 | const EARTH_RADIUS: f32 = 6360e3; 13 | const ATMOSPHERE_RADIUS: f32 = 6380e3; 14 | const H_RAY: f32 = 8e3; 15 | const H_MIE: f32 = 12e2; 16 | const CENTER: Vec3 = Vec3::new(0.0, -EARTH_RADIUS, 0.0); // earth center point 17 | 18 | fn escape(p: Vec3, d: Vec3, r: f32) -> f32 { 19 | let v = p - CENTER; 20 | let b = v.dot(d); 21 | let det = b * b - v.dot(v) + r * r; 22 | if det < 0.0 { 23 | return -1.0; 24 | } 25 | let det = det.sqrt(); 26 | let t1 = -b - det; 27 | let t2 = -b + det; 28 | if t1 >= 0.0 { 29 | return t1; 30 | } 31 | return t2; 32 | } 33 | 34 | fn densities_rm(p: Vec3) -> Vec2 { 35 | let h = ((p - CENTER).length() - EARTH_RADIUS).max(0.0); 36 | let exp_h_ray = (-h / H_RAY).exp(); 37 | let exp_h_mie = (-h / H_MIE).exp(); 38 | Vec2::new(exp_h_ray, exp_h_mie) 39 | } 40 | 41 | fn scatter_depth_int(o: Vec3, d: Vec3, l: f32) -> Vec2 { 42 | // Approximate by combining 2 samples 43 | densities_rm(o) * (l / 2.) + densities_rm(o + d * l) * (l / 2.) 44 | } 45 | 46 | fn scatter_in(origin: Vec3, direction: Vec3, depth: f32, steps: u32, sundir: Vec3) -> (Vec3, Vec3) { 47 | let depth = depth / steps as f32; 48 | 49 | let mut i_r = Vec3::ZERO; 50 | let mut i_m = Vec3::ZERO; 51 | let mut total_depth_rm = Vec2::ZERO; 52 | 53 | let mut i = 0; 54 | while i < steps { 55 | let p = origin + direction * (depth * i as f32); 56 | let d_rm = densities_rm(p) * depth; 57 | total_depth_rm += d_rm; 58 | 59 | // Calculate optical depth 60 | let depth_rm_sum = 61 | total_depth_rm + scatter_depth_int(p, sundir, escape(p, sundir, ATMOSPHERE_RADIUS)); 62 | 63 | // Calculate exponent part of both integrals 64 | let a = 65 | (-RAY_EFFECTIVE_COEFF * depth_rm_sum.x - MIE_EFFECTIVE_COEFF * depth_rm_sum.y).exp(); 66 | 67 | i_r += a * d_rm.x; 68 | i_m += a * d_rm.y; 69 | i += 1; 70 | } 71 | 72 | (i_r, i_m) 73 | } 74 | 75 | pub fn scatter(sundir: Vec4, origin: Vec3, direction: Vec3) -> Vec3 { 76 | let (i_r, i_m) = scatter_in( 77 | origin, 78 | direction, 79 | escape(origin, direction, ATMOSPHERE_RADIUS), 80 | 12, 81 | sundir.xyz(), 82 | ); 83 | 84 | let mu = direction.dot(sundir.xyz()); 85 | let res = sundir.w 86 | * (1. + mu * mu) 87 | * ( 88 | // 3/16pi = 0.597 89 | i_r * RAY_EFFECTIVE_COEFF * 0.0597 90 | + i_m * MIE_SCATTER_COEFF * 0.0196 / (1.58 - 1.52 * mu).powf(1.5) 91 | ); 92 | 93 | return util::mask_nan(Vec3::new(res.x.sqrt(), res.y.sqrt(), res.z.sqrt())).powf(2.2); // gamma -> linear since we render in linear 94 | } 95 | -------------------------------------------------------------------------------- /kernels/src/util.rs: -------------------------------------------------------------------------------- 1 | use spirv_std::glam::Vec3; 2 | #[allow(unused_imports)] 3 | use spirv_std::num_traits::Float; 4 | 5 | pub const EPS: f32 = 0.001; 6 | 7 | #[allow(dead_code)] 8 | pub fn uniform_sample_sphere(r1: f32, r2: f32) -> Vec3 { 9 | let cos_phi = 2.0 * r1 - 1.0; 10 | let sin_phi = (1.0 - cos_phi * cos_phi).sqrt(); 11 | let theta = 2.0 * core::f32::consts::PI * r2; 12 | Vec3::new(sin_phi * theta.cos(), cos_phi, sin_phi * theta.cos()) 13 | } 14 | 15 | #[allow(dead_code)] 16 | pub fn uniform_sample_hemisphere(r1: f32, r2: f32) -> Vec3 { 17 | let sin_theta = (1.0 - r1 * r1).sqrt(); 18 | let phi = 2.0 * core::f32::consts::PI * r2; 19 | let x = sin_theta * phi.cos(); 20 | let z = sin_theta * phi.sin(); 21 | Vec3::new(x, r1, z) 22 | } 23 | 24 | pub fn cosine_sample_hemisphere(r1: f32, r2: f32) -> Vec3 { 25 | let theta = r1.sqrt().acos(); 26 | let phi = 2.0 * core::f32::consts::PI * r2; 27 | Vec3::new( 28 | theta.sin() * phi.cos(), 29 | theta.cos(), 30 | theta.sin() * phi.sin(), 31 | ) 32 | } 33 | 34 | pub fn create_cartesian(up: Vec3) -> (Vec3, Vec3, Vec3) { 35 | let arbitrary = Vec3::new(0.1, 0.5, 0.9); 36 | let temp_vec = up.cross(arbitrary).normalize(); 37 | let right = temp_vec.cross(up).normalize(); 38 | let forward = up.cross(right).normalize(); 39 | (up, right, forward) 40 | } 41 | 42 | pub fn reflect(i: Vec3, normal: Vec3) -> Vec3 { 43 | i - normal * 2.0 * i.dot(normal) 44 | } 45 | 46 | #[allow(dead_code)] 47 | pub fn refract(i: Vec3, normal: Vec3, in_ior: f32, out_ior: f32) -> Vec3 { 48 | let eta = in_ior / out_ior; 49 | let n_dot_i = normal.dot(i); 50 | let k = 1.0 - eta * eta * (1.0 - n_dot_i.powi(2)); 51 | if k < 0.0 { 52 | Vec3::ZERO 53 | } else { 54 | eta * i - (eta * n_dot_i + k.sqrt()) * normal 55 | } 56 | } 57 | 58 | pub fn ggx_distribution(normal: Vec3, halfway: Vec3, roughness: f32) -> f32 { 59 | let numerator = roughness * roughness; 60 | let n_dot_h = normal.dot(halfway).max(0.0); 61 | let mut denominator = (n_dot_h * n_dot_h) * (numerator - 1.0) + 1.0; 62 | denominator = (core::f32::consts::PI * (denominator * denominator)).max(EPS); 63 | numerator / denominator 64 | } 65 | 66 | // https://blog.selfshadow.com/publications/s2013-shading-course/karis/s2013_pbs_epic_notes_v2.pdf 67 | pub fn sample_ggx(r1: f32, r2: f32, reflection_direction: Vec3, roughness: f32) -> Vec3 { 68 | let a = roughness * roughness; 69 | 70 | let phi = 2.0 * core::f32::consts::PI * r1; 71 | let cos_theta = ((1.0 - r2) / (r2 * (a * a - 1.0) + 1.0)).sqrt(); 72 | let sin_theta = (1.0 - cos_theta * cos_theta).sqrt(); 73 | 74 | let halfway = Vec3::new(phi.cos() * sin_theta, phi.sin() * sin_theta, cos_theta); 75 | 76 | let up = if reflection_direction.z.abs() < 0.999 { 77 | Vec3::new(0.0, 0.0, 1.0) 78 | } else { 79 | Vec3::new(1.0, 0.0, 0.0) 80 | }; 81 | let tangent = up.cross(reflection_direction).normalize(); 82 | let bitangent = reflection_direction.cross(tangent); 83 | 84 | (tangent * halfway.x + bitangent * halfway.y + reflection_direction * halfway.z).normalize() 85 | } 86 | 87 | // GGX distribution with respect to microsurface normal (NOT halfway vector) 88 | // https://www.cs.cornell.edu/~srm/publications/EGSR07-btdf.pdf equation 33 89 | #[allow(dead_code)] 90 | pub fn ggx_distribution_microsurface_normal( 91 | m_dot_n: f32, // microsurface normal dot macrosurface normal 92 | roughness: f32 93 | ) -> f32 { 94 | let a_g = roughness * roughness; 95 | let a_g2 = a_g * a_g; 96 | let theta_m = m_dot_n.acos(); 97 | let numerator = a_g2 * positive_characteristic(m_dot_n); 98 | let denominator = core::f32::consts::PI * theta_m.cos().powi(4) * (a_g2 + theta_m.tan().powi(2)).powi(2); 99 | numerator / denominator 100 | } 101 | 102 | // GGX distribution multiplied with the dot of micro and macrosurface normals. 103 | // This what sample_ggx_microsurface_normal samples w.r.t 104 | // https://www.cs.cornell.edu/~srm/publications/EGSR07-btdf.pdf equation 24 105 | #[allow(dead_code)] 106 | pub fn ggx_pdf_microsurface_normal( 107 | microsurface_normal: Vec3, 108 | macrosurface_normal: Vec3, 109 | roughness: f32 110 | ) -> f32 { 111 | let m_dot_n = microsurface_normal.dot(macrosurface_normal); 112 | ggx_distribution_microsurface_normal(m_dot_n, roughness) * m_dot_n.abs() 113 | } 114 | 115 | // Function for sampling GGX(m)*|m.n| with respect to microsurface normal 116 | // https://www.cs.cornell.edu/~srm/publications/EGSR07-btdf.pdf equation 35-36 117 | pub fn sample_ggx_microsurface_normal( 118 | r1: f32, 119 | r2: f32, 120 | macrosurface_normal: Vec3, 121 | roughness: f32 122 | ) -> Vec3 { 123 | let a_g = roughness * roughness; 124 | 125 | let theta_m = ((a_g * r1.sqrt()) / (1.0 - r1).sqrt()).atan(); 126 | let phi_m = 2.0 * core::f32::consts::PI * r2; 127 | 128 | let m = Vec3::new( 129 | theta_m.sin() * phi_m.cos(), 130 | theta_m.cos(), 131 | theta_m.sin() * phi_m.sin()); 132 | 133 | let (up, nt, nb) = create_cartesian(macrosurface_normal); 134 | Vec3::new( 135 | m.x * nb.x + m.y * up.x + m.z * nt.x, 136 | m.x * nb.y + m.y * up.y + m.z * nt.y, 137 | m.x * nb.z + m.y * up.z + m.z * nt.z, 138 | ).normalize() 139 | } 140 | 141 | // PDF for sampling GGX(m)*|m.n| in case of reflection 142 | // https://www.cs.cornell.edu/~srm/publications/EGSR07-btdf.pdf equation 38, 13 and 14 143 | #[allow(dead_code)] 144 | pub fn ggx_pdf_reflection( 145 | incoming_direction: Vec3, // aka. light direction 146 | outgoing_direction: Vec3, // aka. scattered direction 147 | macrosurface_normal: Vec3, 148 | pdf_microsurface_normal: f32, // pdf of sampling the specific microsurface normal 149 | ) -> f32 { 150 | let h_r_hat = incoming_direction.dot(macrosurface_normal).signum() * (incoming_direction + outgoing_direction); 151 | let h_r = h_r_hat.normalize(); 152 | let jacobian = 1.0 / (4.0 * outgoing_direction.dot(h_r).abs()); 153 | pdf_microsurface_normal * jacobian 154 | } 155 | 156 | // PDF for sampling GGX(m)*|m.n| in case of refraction 157 | // https://www.cs.cornell.edu/~srm/publications/EGSR07-btdf.pdf equation 38, 13 and 14 158 | #[allow(dead_code)] 159 | pub fn ggx_pdf_refraction( 160 | incoming_direction: Vec3, // aka. light direction 161 | outgoing_direction: Vec3, // aka. scattered direction 162 | in_ior: f32, 163 | out_ior: f32, 164 | pdf_microsurface_normal: f32, // pdf of sampling the specific microsurface normal 165 | ) -> f32 { 166 | let h_t_hat = -(in_ior * incoming_direction + out_ior * outgoing_direction); 167 | let h_t = h_t_hat.normalize(); 168 | let o_dot_h_t = outgoing_direction.dot(h_t); 169 | let numerator = out_ior.powi(2) * o_dot_h_t.abs(); 170 | let denominator = (in_ior * incoming_direction.dot(h_t) + out_ior * (outgoing_direction.dot(h_t))).powi(2); 171 | let jacobian = numerator / denominator; 172 | pdf_microsurface_normal * jacobian 173 | } 174 | 175 | // GGX geometry function w.r.t microsurface normal. Not the Schlick-GGX approximation. 176 | // https://www.cs.cornell.edu/~srm/publications/EGSR07-btdf.pdf equation 34 177 | pub fn geometry_ggx_microfacet_normal( 178 | macrosurface_normal: Vec3, 179 | microsurface_normal: Vec3, 180 | view_direction: Vec3, 181 | roughness: f32 182 | ) -> f32 { 183 | let a_g = roughness * roughness; 184 | let a_g2 = a_g * a_g; 185 | 186 | let v_dot_n = view_direction.dot(macrosurface_normal); 187 | let v_dot_m = view_direction.dot(microsurface_normal); 188 | let numerator = 2.0 * positive_characteristic(v_dot_m / v_dot_n); 189 | 190 | let theta_v = view_direction.dot(macrosurface_normal).acos(); 191 | let denominator = 1.0 + (1.0 + a_g2 * theta_v.tan().powi(2)).sqrt(); 192 | 193 | numerator / denominator 194 | } 195 | 196 | // Geometry-Smith term w.r.t microsurface normal. Not the Schlick-GGX approximation. 197 | // https://www.cs.cornell.edu/~srm/publications/EGSR07-btdf.pdf equation 23 198 | #[allow(dead_code)] 199 | pub fn geometry_smith_microfacet_normal( 200 | macrosurface_normal: Vec3, 201 | microsurface_normal: Vec3, 202 | view_direction: Vec3, 203 | light_direction: Vec3, 204 | roughness: f32, 205 | ) -> f32 { 206 | geometry_ggx_microfacet_normal(macrosurface_normal, microsurface_normal, view_direction, roughness) 207 | * geometry_ggx_microfacet_normal(macrosurface_normal, microsurface_normal, light_direction, roughness) 208 | } 209 | 210 | // Schlick-GGX geometry function from https://learnopengl.com/pbr/theory 211 | pub fn geometry_schlick_ggx(normal: Vec3, view_direction: Vec3, roughness: f32) -> f32 { 212 | let numerator = normal.dot(view_direction).max(0.0); 213 | let r = (roughness * roughness) / 8.0; 214 | let denominator = numerator * (1.0 - r) + r; 215 | numerator / denominator 216 | } 217 | 218 | // Geometry-Smith term based on Schlick-GGX from https://learnopengl.com/pbr/theory 219 | pub fn geometry_smith_schlick_ggx( 220 | normal: Vec3, 221 | view_direction: Vec3, 222 | light_direction: Vec3, 223 | roughness: f32, 224 | ) -> f32 { 225 | geometry_schlick_ggx(normal, view_direction, roughness) 226 | * geometry_schlick_ggx(normal, light_direction, roughness) 227 | } 228 | 229 | pub fn fresnel_schlick(cos_theta: f32, f0: Vec3) -> Vec3 { 230 | f0 + (Vec3::ONE - f0) * (1.0 - cos_theta).powi(5) 231 | } 232 | 233 | pub fn fresnel_schlick_scalar(in_ior: f32, out_ior: f32, cos_theta: f32) -> f32 { 234 | let f0 = ((in_ior - out_ior) / (in_ior + out_ior)).powi(2); 235 | f0 + (1.0 - f0) * (1.0 - cos_theta).powi(5) 236 | } 237 | 238 | pub fn barycentric(p: Vec3, a: Vec3, b: Vec3, c: Vec3) -> Vec3 { 239 | let v0 = b - a; 240 | let v1 = c - a; 241 | let v2 = p - a; 242 | let d00 = v0.dot(v0); 243 | let d01 = v0.dot(v1); 244 | let d11 = v1.dot(v1); 245 | let d20 = v2.dot(v0); 246 | let d21 = v2.dot(v1); 247 | let denom = d00 * d11 - d01 * d01; 248 | let v = (d11 * d20 - d01 * d21) / denom; 249 | let w = (d00 * d21 - d01 * d20) / denom; 250 | Vec3::new(1.0 - v - w, v, w) 251 | } 252 | 253 | pub fn power_heuristic(p1: f32, p2: f32) -> f32 { 254 | let p1_2 = p1 * p1; 255 | p1_2 / (p1_2 + p2 * p2) 256 | } 257 | 258 | #[allow(dead_code)] 259 | pub fn balance_heuristic(p1: f32, p2: f32) -> f32 { 260 | p1 / (p1 + p2) 261 | } 262 | 263 | pub fn positive_characteristic(x: f32) -> f32 { 264 | if x > 0.0 { 265 | 1.0 266 | } else { 267 | 0.0 268 | } 269 | } 270 | 271 | pub fn mask_nan(v: Vec3) -> Vec3 { 272 | if v.is_finite() { 273 | v 274 | } else { 275 | Vec3::ZERO 276 | } 277 | } 278 | 279 | pub fn lerp(a: f32, b: f32, t: f32) -> f32 { 280 | a * (1.0 - t) + b * t 281 | } -------------------------------------------------------------------------------- /kernels/src/vec.rs: -------------------------------------------------------------------------------- 1 | use core::ops::Index; 2 | 3 | pub struct FixedVec { 4 | pub data: [T; CAPACITY], 5 | pub len: u32, 6 | } 7 | 8 | impl FixedVec { 9 | pub fn new() -> Self { 10 | Self { 11 | data: [Default::default(); CAPACITY], 12 | len: 0, 13 | } 14 | } 15 | 16 | pub fn push(&mut self, value: T) { 17 | self.data[self.len as usize] = value; 18 | self.len += 1; 19 | } 20 | 21 | pub fn pop(&mut self) -> Option { 22 | if self.len > 0 { 23 | self.len -= 1; 24 | Some(self.data[self.len as usize]) 25 | } else { 26 | None 27 | } 28 | } 29 | 30 | pub fn is_empty(&self) -> bool { 31 | self.len == 0 32 | } 33 | 34 | #[allow(dead_code)] 35 | pub fn clear(&mut self) { 36 | self.len = 0; 37 | } 38 | } 39 | 40 | impl Index for FixedVec { 41 | type Output = T; 42 | 43 | fn index(&self, index: u32) -> &Self::Output { 44 | &self.data[index as usize] 45 | } 46 | } -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "nightly-2023-03-04" 3 | components = ["rust-src", "rustc-dev", "llvm-tools-preview"] 4 | # commit_hash = 44cfafe2fafe816395d3acc434663a45d5178c41 5 | -------------------------------------------------------------------------------- /scene.glb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pema99/rust-path-tracer/2b31ad0384e62a84520a23f8596e4f0f9c6c173d/scene.glb -------------------------------------------------------------------------------- /scenes/BreakTime.glb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pema99/rust-path-tracer/2b31ad0384e62a84520a23f8596e4f0f9c6c173d/scenes/BreakTime.glb -------------------------------------------------------------------------------- /scenes/DarkCornell.glb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pema99/rust-path-tracer/2b31ad0384e62a84520a23f8596e4f0f9c6c173d/scenes/DarkCornell.glb -------------------------------------------------------------------------------- /scenes/FurnaceTest.glb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pema99/rust-path-tracer/2b31ad0384e62a84520a23f8596e4f0f9c6c173d/scenes/FurnaceTest.glb -------------------------------------------------------------------------------- /scenes/GlassTest.glb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pema99/rust-path-tracer/2b31ad0384e62a84520a23f8596e4f0f9c6c173d/scenes/GlassTest.glb -------------------------------------------------------------------------------- /scenes/PBRTest.glb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pema99/rust-path-tracer/2b31ad0384e62a84520a23f8596e4f0f9c6c173d/scenes/PBRTest.glb -------------------------------------------------------------------------------- /scenes/VeachMIS.glb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pema99/rust-path-tracer/2b31ad0384e62a84520a23f8596e4f0f9c6c173d/scenes/VeachMIS.glb -------------------------------------------------------------------------------- /shared_structs/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "bytemuck" 7 | version = "1.13.1" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea" 10 | dependencies = [ 11 | "bytemuck_derive", 12 | ] 13 | 14 | [[package]] 15 | name = "bytemuck_derive" 16 | version = "1.4.1" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "fdde5c9cd29ebd706ce1b35600920a33550e402fc998a2e53ad3b42c3c47a192" 19 | dependencies = [ 20 | "proc-macro2", 21 | "quote", 22 | "syn", 23 | ] 24 | 25 | [[package]] 26 | name = "glam" 27 | version = "0.22.0" 28 | source = "registry+https://github.com/rust-lang/crates.io-index" 29 | checksum = "12f597d56c1bd55a811a1be189459e8fad2bbc272616375602443bdfb37fa774" 30 | dependencies = [ 31 | "bytemuck", 32 | ] 33 | 34 | [[package]] 35 | name = "proc-macro2" 36 | version = "1.0.56" 37 | source = "registry+https://github.com/rust-lang/crates.io-index" 38 | checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" 39 | dependencies = [ 40 | "unicode-ident", 41 | ] 42 | 43 | [[package]] 44 | name = "quote" 45 | version = "1.0.26" 46 | source = "registry+https://github.com/rust-lang/crates.io-index" 47 | checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" 48 | dependencies = [ 49 | "proc-macro2", 50 | ] 51 | 52 | [[package]] 53 | name = "shared_structs" 54 | version = "0.1.0" 55 | dependencies = [ 56 | "bytemuck", 57 | "glam", 58 | ] 59 | 60 | [[package]] 61 | name = "syn" 62 | version = "2.0.15" 63 | source = "registry+https://github.com/rust-lang/crates.io-index" 64 | checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822" 65 | dependencies = [ 66 | "proc-macro2", 67 | "quote", 68 | "unicode-ident", 69 | ] 70 | 71 | [[package]] 72 | name = "unicode-ident" 73 | version = "1.0.8" 74 | source = "registry+https://github.com/rust-lang/crates.io-index" 75 | checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" 76 | -------------------------------------------------------------------------------- /shared_structs/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "shared_structs" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | bytemuck = { version = "1.13.1", features = ["derive"] } 8 | glam = { version = "0.22", default-features = false, features = ["bytemuck"] } 9 | spirv-std = "0.7.0" -------------------------------------------------------------------------------- /shared_structs/src/image_polyfill.rs: -------------------------------------------------------------------------------- 1 | // This file contains a polyfill for the Image and Sampler types, which are not available on the CPU. 2 | 3 | #[cfg(target_arch = "spirv")] 4 | pub mod polyfill { 5 | pub use spirv_std::{Sampler, Image}; 6 | } 7 | 8 | #[cfg(not(target_arch = "spirv"))] 9 | pub mod polyfill { 10 | use glam::{Vec4, Vec2, IVec2}; 11 | 12 | #[derive(Clone, Copy)] 13 | pub struct Sampler; 14 | 15 | pub struct Image<'a, A,B,C,D,E,F> { 16 | _phantom: core::marker::PhantomData<(A,B,C,D,E,F)>, 17 | width: u32, 18 | height: u32, 19 | buffer: &'a [Vec4], 20 | } 21 | 22 | impl<'a, A> Image<'a, A,A,A,A,A,A> { 23 | pub const fn new (buffer: &'a [Vec4], width: u32, height: u32) -> Self { 24 | Image { 25 | _phantom: core::marker::PhantomData, 26 | width, 27 | height, 28 | buffer, 29 | } 30 | } 31 | 32 | fn sample_raw(&self, coord: IVec2) -> Vec4 { 33 | let x = coord.x as usize % self.width as usize; 34 | let y = coord.y as usize % self.height as usize; 35 | self.buffer[y * self.width as usize + x] 36 | } 37 | 38 | pub fn sample_by_lod(&self, _sampler: Sampler, coord: Vec2, _lod: f32) -> Vec4 { 39 | let scaled_uv = coord * Vec2::new(self.width as f32, self.height as f32); 40 | let frac_uv = scaled_uv.fract(); 41 | let ceil_uv = scaled_uv.ceil().as_ivec2(); 42 | let floor_uv = scaled_uv.floor().as_ivec2(); 43 | 44 | // Bilinear filtering 45 | let c00 = self.sample_raw(floor_uv); 46 | let c01 = self.sample_raw(IVec2::new(floor_uv.x, ceil_uv.y)); 47 | let c10 = self.sample_raw(IVec2::new(ceil_uv.x, floor_uv.y)); 48 | let c11 = self.sample_raw(ceil_uv); 49 | let tx = frac_uv.x; 50 | let ty = frac_uv.y; 51 | 52 | let a = c00.lerp(c10, tx); 53 | let b = c01.lerp(c11, tx); 54 | a.lerp(b, ty) 55 | } 56 | } 57 | 58 | #[macro_export] 59 | macro_rules! Image { 60 | ($a:expr, $b:ident=$d:ident, $c:expr) => { Image<(), (), (), (), (), ()> }; 61 | } 62 | 63 | pub type CpuImage<'fw> = Image<'fw, (),(),(),(),(),()>; 64 | } -------------------------------------------------------------------------------- /shared_structs/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | 3 | use bytemuck::{Pod, Zeroable}; 4 | use glam::{Vec3, Vec4, Vec4Swizzles, Vec2}; 5 | 6 | mod image_polyfill; 7 | pub use image_polyfill::polyfill::{Image, Sampler}; 8 | #[cfg(not(target_arch = "spirv"))] 9 | pub use image_polyfill::polyfill::CpuImage; 10 | 11 | 12 | #[repr(C)] 13 | #[derive(Copy, Clone, Pod, Zeroable)] 14 | pub struct TracingConfig { 15 | pub cam_position: Vec4, 16 | pub cam_rotation: Vec4, 17 | pub width: u32, 18 | pub height: u32, 19 | pub min_bounces: u32, 20 | pub max_bounces: u32, 21 | pub sun_direction: Vec4, 22 | pub nee: u32, 23 | pub has_skybox: u32, 24 | pub specular_weight_clamp: Vec2, 25 | } 26 | 27 | impl Default for TracingConfig { 28 | fn default() -> Self { 29 | Self { 30 | cam_position: Vec4::new(0.0, 1.0, -5.0, 0.0), 31 | cam_rotation: Vec4::ZERO, 32 | width: 1280, 33 | height: 720, 34 | min_bounces: 3, 35 | max_bounces: 4, 36 | sun_direction: Vec3::new(0.5, 1.3, 1.0).normalize().extend(15.0), 37 | nee: 0, 38 | has_skybox: 0, 39 | specular_weight_clamp: Vec2::new(0.1, 0.9), 40 | } 41 | } 42 | } 43 | 44 | #[repr(C)] 45 | #[derive(Copy, Clone, Pod, Zeroable, Default)] 46 | pub struct MaterialData { // each Vec4 is either a color or an atlas location 47 | pub emissive: Vec4, 48 | pub albedo: Vec4, 49 | pub roughness: Vec4, 50 | pub metallic: Vec4, 51 | pub normals: Vec4, 52 | has_albedo_texture: u32, 53 | has_metallic_texture: u32, 54 | has_roughness_texture: u32, 55 | has_normal_texture: u32, 56 | } 57 | 58 | impl MaterialData { 59 | pub fn has_albedo_texture(&self) -> bool { 60 | self.has_albedo_texture != 0 61 | } 62 | 63 | pub fn set_has_albedo_texture(&mut self, has_albedo_texture: bool) { 64 | self.has_albedo_texture = if has_albedo_texture { 1 } else { 0 }; 65 | } 66 | 67 | pub fn has_metallic_texture(&self) -> bool { 68 | self.has_metallic_texture != 0 69 | } 70 | 71 | pub fn set_has_metallic_texture(&mut self, has_metallic_texture: bool) { 72 | self.has_metallic_texture = if has_metallic_texture { 1 } else { 0 }; 73 | } 74 | 75 | pub fn has_roughness_texture(&self) -> bool { 76 | self.has_roughness_texture != 0 77 | } 78 | 79 | pub fn set_has_roughness_texture(&mut self, has_roughness_texture: bool) { 80 | self.has_roughness_texture = if has_roughness_texture { 1 } else { 0 }; 81 | } 82 | 83 | pub fn has_normal_texture(&self) -> bool { 84 | self.has_normal_texture != 0 85 | } 86 | 87 | pub fn set_has_normal_texture(&mut self, has_normal_texture: bool) { 88 | self.has_normal_texture = if has_normal_texture { 1 } else { 0 }; 89 | } 90 | } 91 | 92 | #[repr(C)] 93 | #[derive(Copy, Clone, Pod, Zeroable, Default)] 94 | pub struct PerVertexData { 95 | pub vertex: Vec4, 96 | pub normal: Vec4, 97 | pub tangent: Vec4, 98 | pub uv0: Vec2, 99 | pub uv1: Vec2, 100 | } 101 | 102 | #[repr(C)] 103 | #[derive(Copy, Clone, Pod, Zeroable, Default)] 104 | pub struct LightPickEntry { 105 | pub triangle_index_a: u32, 106 | pub triangle_area_a: f32, 107 | pub triangle_pick_pdf_a: f32, 108 | pub triangle_index_b: u32, 109 | pub triangle_area_b: f32, 110 | pub triangle_pick_pdf_b: f32, 111 | pub ratio: f32, 112 | } 113 | 114 | // wgpu doesn't allow 0-sized buffers, so we use negative ratios to indicate sentinel values 115 | impl LightPickEntry { 116 | pub fn is_sentinel(&self) -> bool { 117 | self.ratio < 0.0 118 | } 119 | } 120 | 121 | #[repr(C)] 122 | #[derive(Clone, Copy, Pod, Zeroable)] 123 | pub struct BVHNode { 124 | aabb_min: Vec4, // w = triangle count 125 | aabb_max: Vec4, // w = left_node if triangle_count is 0, first_triangle_index if triangle_count is 1 126 | } 127 | 128 | impl Default for BVHNode { 129 | fn default() -> Self { 130 | Self { 131 | aabb_min: Vec4::new(f32::INFINITY, f32::INFINITY, f32::INFINITY, 0.0), 132 | aabb_max: Vec4::new(f32::NEG_INFINITY, f32::NEG_INFINITY, f32::NEG_INFINITY, 0.0), 133 | } 134 | } 135 | } 136 | 137 | impl BVHNode { 138 | // Immutable access 139 | pub fn triangle_count(&self) -> u32 { 140 | unsafe { core::mem::transmute(self.aabb_min.w) } 141 | } 142 | 143 | pub fn left_node_index(&self) -> u32 { 144 | unsafe { core::mem::transmute(self.aabb_max.w) } 145 | } 146 | 147 | pub fn right_node_index(&self) -> u32 { 148 | self.left_node_index() + 1 149 | } 150 | 151 | pub fn first_triangle_index(&self) -> u32 { 152 | unsafe { core::mem::transmute(self.aabb_max.w) } 153 | } 154 | 155 | pub fn aabb_min(&self) -> Vec3 { 156 | self.aabb_min.xyz() 157 | } 158 | 159 | pub fn aabb_max(&self) -> Vec3 { 160 | self.aabb_max.xyz() 161 | } 162 | 163 | pub fn is_leaf(&self) -> bool { 164 | self.triangle_count() > 0 165 | } 166 | 167 | // Mutable access 168 | pub fn set_triangle_count(&mut self, triangle_count: u32) { 169 | self.aabb_min.w = unsafe { core::mem::transmute(triangle_count) }; 170 | } 171 | 172 | pub fn set_left_node_index(&mut self, left_node_index: u32) { 173 | self.aabb_max.w = unsafe { core::mem::transmute(left_node_index) }; 174 | } 175 | 176 | pub fn set_first_triangle_index(&mut self, first_triangle_index: u32) { 177 | self.aabb_max.w = unsafe { core::mem::transmute(first_triangle_index) }; 178 | } 179 | 180 | pub fn set_aabb_min(&mut self, aabb_min: &Vec3) { 181 | self.aabb_min.x = aabb_min.x; 182 | self.aabb_min.y = aabb_min.y; 183 | self.aabb_min.z = aabb_min.z; 184 | } 185 | 186 | pub fn set_aabb_max(&mut self, aabb_max: &Vec3) { 187 | self.aabb_max.x = aabb_max.x; 188 | self.aabb_max.y = aabb_max.y; 189 | self.aabb_max.z = aabb_max.z; 190 | } 191 | } 192 | 193 | #[repr(u32)] 194 | #[derive(Copy, Clone, PartialEq, Eq, Hash)] 195 | pub enum NextEventEstimation { 196 | None, 197 | MultipleImportanceSampling, 198 | DirectLightSampling, 199 | } 200 | 201 | impl core::fmt::Debug for NextEventEstimation { 202 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 203 | match self { 204 | NextEventEstimation::None => write!(f, "None"), 205 | NextEventEstimation::MultipleImportanceSampling => write!(f, "MIS"), 206 | NextEventEstimation::DirectLightSampling => write!(f, "Direct only"), 207 | } 208 | } 209 | } 210 | 211 | impl NextEventEstimation { 212 | pub fn to_u32(self) -> u32 { 213 | match self { 214 | NextEventEstimation::None => 0, 215 | NextEventEstimation::MultipleImportanceSampling => 1, 216 | NextEventEstimation::DirectLightSampling => 2, 217 | } 218 | } 219 | 220 | pub fn from_u32(value: u32) -> Self { 221 | match value { 222 | 0 => NextEventEstimation::None, 223 | 1 => NextEventEstimation::MultipleImportanceSampling, 224 | 2 => NextEventEstimation::DirectLightSampling, 225 | _ => NextEventEstimation::None, 226 | } 227 | } 228 | 229 | pub fn uses_mis(&self) -> bool { 230 | self == &NextEventEstimation::MultipleImportanceSampling 231 | } 232 | 233 | pub fn uses_nee(&self) -> bool { 234 | self != &NextEventEstimation::None 235 | } 236 | } -------------------------------------------------------------------------------- /src/app.rs: -------------------------------------------------------------------------------- 1 | 2 | use std::num::NonZeroU32; 3 | use std::sync::atomic::Ordering; 4 | use std::time::Instant; 5 | use std::{iter, sync::Arc}; 6 | use std::fmt::Debug; 7 | 8 | use egui_wgpu::renderer::ScreenDescriptor; 9 | use egui_winit_platform::Platform; 10 | use wgpu::util::DeviceExt; 11 | use winit::dpi::PhysicalSize; 12 | 13 | use glam::{Mat3, Vec3}; 14 | use shared_structs::NextEventEstimation; 15 | 16 | use crate::trace::{trace_cpu, trace_gpu, TracingState}; 17 | 18 | #[repr(u32)] 19 | #[derive(Copy, Clone, PartialEq, Eq, Hash)] 20 | enum Tonemapping { 21 | None, 22 | Reinhard, 23 | ACESNarkowicz, 24 | ACESNarkowiczOverexposed, 25 | ACESHill, 26 | Neutral, 27 | Uncharted, 28 | } 29 | 30 | impl Debug for Tonemapping { 31 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 32 | match self { 33 | Tonemapping::None => write!(f, "None"), 34 | Tonemapping::Reinhard => write!(f, "Reinhard"), 35 | Tonemapping::ACESNarkowicz => write!(f, "ACES (N)"), 36 | Tonemapping::ACESNarkowiczOverexposed => write!(f, "ACES (N, O)"), 37 | Tonemapping::ACESHill => write!(f, "ACES (H)"), 38 | Tonemapping::Neutral => write!(f, "Neutral"), 39 | Tonemapping::Uncharted => write!(f, "Uncharted"), 40 | } 41 | } 42 | } 43 | 44 | fn is_image(img: &str) -> bool { 45 | img.ends_with(".png") 46 | || img.ends_with(".jpg") 47 | || img.ends_with(".jpeg") 48 | || img.ends_with(".bmp") 49 | || img.ends_with(".tga") 50 | || img.ends_with(".hdr") 51 | || img.ends_with(".exr") 52 | } 53 | 54 | pub struct App { 55 | tracing_state: Arc, 56 | compute_join_handle: Option>, 57 | 58 | use_cpu: bool, 59 | tonemapping: Tonemapping, 60 | selected_scene: String, 61 | selected_skybox: Option, 62 | show_environment_window: bool, 63 | last_input: Instant, 64 | mouse_delta: (f32, f32), 65 | 66 | device: wgpu::Device, 67 | queue: wgpu::Queue, 68 | window: winit::window::Window, 69 | surface_format: wgpu::TextureFormat, 70 | surface_config: wgpu::SurfaceConfiguration, 71 | surface: wgpu::Surface, 72 | egui_renderer: egui_wgpu::renderer::Renderer, 73 | } 74 | 75 | impl App { 76 | pub fn new(window: winit::window::Window) -> Self { 77 | let instance = wgpu::Instance::new(wgpu::Backends::PRIMARY); 78 | let surface = unsafe { instance.create_surface(&window) }; 79 | let adapter = pollster::block_on(instance.request_adapter(&wgpu::RequestAdapterOptions { 80 | power_preference: wgpu::PowerPreference::HighPerformance, 81 | compatible_surface: Some(&surface), 82 | force_fallback_adapter: false, 83 | })) 84 | .expect("Failed to creator wgpu adapter."); 85 | 86 | let (device, queue) = pollster::block_on(adapter.request_device( 87 | &wgpu::DeviceDescriptor { 88 | features: wgpu::Features::default(), 89 | limits: wgpu::Limits::default(), 90 | label: None, 91 | }, 92 | None, 93 | )) 94 | .expect("Failed to creator wgpu device."); 95 | 96 | let size = window.inner_size(); 97 | let surface_format = surface.get_supported_formats(&adapter)[0]; 98 | let surface_config = wgpu::SurfaceConfiguration { 99 | usage: wgpu::TextureUsages::RENDER_ATTACHMENT, 100 | format: surface_format, 101 | width: size.width, 102 | height: size.height, 103 | present_mode: wgpu::PresentMode::Fifo, 104 | alpha_mode: wgpu::CompositeAlphaMode::Auto, 105 | }; 106 | surface.configure(&device, &surface_config); 107 | 108 | let egui_renderer = egui_wgpu::renderer::Renderer::new(&device, surface_format, None, 1); 109 | let tracing_state = Arc::new(TracingState::new(size.width, size.height)); 110 | Self { 111 | tracing_state, 112 | last_input: Instant::now(), 113 | mouse_delta: (0.0, 0.0), 114 | device, 115 | queue, 116 | window, 117 | surface_config, 118 | surface, 119 | surface_format, 120 | egui_renderer, 121 | compute_join_handle: None, 122 | selected_scene: "scene.glb".to_string(), 123 | selected_skybox: None, 124 | tonemapping: Tonemapping::None, 125 | use_cpu: false, 126 | show_environment_window: false, 127 | } 128 | } 129 | 130 | pub fn window(&self) -> &winit::window::Window { 131 | &self.window 132 | } 133 | 134 | fn start_render(&mut self, continue_previous: bool) { 135 | if self.compute_join_handle.is_some() { 136 | self.stop_render(); 137 | } 138 | 139 | self.window.set_resizable(false); 140 | let size = self.window.inner_size(); 141 | 142 | if !continue_previous { 143 | let (config, framebuffer) = TracingState::make_view_dependent_state(size.width, size.height, Some(*self.tracing_state.config.read())); 144 | *self.tracing_state.config.write() = config; 145 | *self.tracing_state.framebuffer.write() = framebuffer; 146 | self.tracing_state.samples.store(0, Ordering::Relaxed); 147 | 148 | let render_resources = PaintCallbackResources::new(&self.device, self.surface_format, size.width, size.height); 149 | self.egui_renderer.paint_callback_resources.insert(render_resources); 150 | } 151 | self.tracing_state.running.store(true, Ordering::Relaxed); 152 | let tracing_state = self.tracing_state.clone(); 153 | 154 | let use_cpu = self.use_cpu; 155 | let path = self.selected_scene.clone(); 156 | let skybox_path = self.selected_skybox.clone(); 157 | self.compute_join_handle = Some(std::thread::spawn(move || { 158 | let skybox_path_ref = skybox_path.as_ref().map(|s| s.as_str()); 159 | if use_cpu { 160 | trace_cpu(&path, skybox_path_ref, tracing_state); 161 | } else { 162 | trace_gpu(&path, skybox_path_ref, tracing_state); 163 | } 164 | })); 165 | } 166 | 167 | fn stop_render(&mut self) { 168 | self.window.set_resizable(true); 169 | self.tracing_state.running.store(false, Ordering::Relaxed); 170 | 171 | if let Some(handle) = self.compute_join_handle.take() { 172 | handle.join().expect("Render thread died."); 173 | self.compute_join_handle = None; 174 | } 175 | } 176 | 177 | fn restart_current_render(&mut self, continue_previous: bool) { 178 | if self.compute_join_handle.is_some() { 179 | self.start_render(continue_previous); 180 | } 181 | } 182 | 183 | fn set_skybox(&mut self, skybox: &str) { 184 | self.selected_skybox = Some(skybox.to_string()); 185 | self.tracing_state.config.write().has_skybox = 1; 186 | self.restart_current_render(false); 187 | } 188 | 189 | fn clear_skybox(&mut self) { 190 | self.selected_skybox = None; 191 | self.tracing_state.config.write().has_skybox = 0; 192 | self.restart_current_render(false); 193 | } 194 | 195 | fn set_scene(&mut self, scene: &str) { 196 | self.selected_scene = scene.to_string(); 197 | self.start_render(false); 198 | } 199 | 200 | fn on_gui(&mut self, egui_ctx: &egui::Context) { 201 | self.on_settings_gui(egui_ctx); 202 | self.on_environment_gui(egui_ctx); 203 | } 204 | 205 | fn on_settings_gui(&mut self, egui_ctx: &egui::Context) { 206 | egui::Window::new("Settings").show(egui_ctx, |ui| { 207 | egui::Grid::new("MainGrid") 208 | .striped(true) 209 | .show(ui, |ui| { 210 | ui.vertical(|ui| { 211 | ui.label(format!("Selected scene: {}", self.selected_scene)); 212 | ui.horizontal(|ui| { 213 | if self.compute_join_handle.as_ref().map_or(false, |t| !t.is_finished()) { 214 | if ui.button("Stop").clicked() { 215 | self.stop_render(); 216 | } 217 | } else { 218 | if ui.button("Start").clicked() { 219 | self.start_render(false); 220 | } 221 | } 222 | 223 | if ui.button("Select scene").clicked() { 224 | if let Some(path) = tinyfiledialogs::open_file_dialog("Select scene", "", None) { 225 | if is_image(&path) { 226 | self.set_skybox(&path); 227 | } else { 228 | self.set_scene(&path); 229 | } 230 | } 231 | } 232 | 233 | if ui.button("Save image").clicked() { 234 | if let Some(resources) = self.egui_renderer.paint_callback_resources.get::() { 235 | let width = self.tracing_state.config.read().width; 236 | let height = self.tracing_state.config.read().height; 237 | resources.save_render(width, height, self.surface_format, &self.device, &self.queue); 238 | } 239 | } 240 | }); 241 | }); 242 | ui.end_row(); 243 | 244 | ui.horizontal(|ui| { 245 | #[cfg(feature = "oidn")] 246 | { 247 | let mut denoise_checked = self.tracing_state.denoise.load(Ordering::Relaxed); 248 | if ui.checkbox(&mut denoise_checked, "Denoise").changed() { 249 | self.tracing_state.denoise.store(denoise_checked, Ordering::Relaxed); 250 | } 251 | } 252 | 253 | let mut use_blue_noise = self.tracing_state.use_blue_noise.load(Ordering::Relaxed); 254 | if ui.checkbox(&mut use_blue_noise, "Use blue noise").changed() { 255 | self.tracing_state.use_blue_noise.store(use_blue_noise, Ordering::Relaxed); 256 | self.tracing_state.dirty.store(true, Ordering::Relaxed); 257 | } 258 | }); 259 | ui.end_row(); 260 | 261 | ui.horizontal(|ui| { 262 | let mut config = self.tracing_state.config.write(); 263 | if ui.add(egui::DragValue::new(&mut config.min_bounces)).changed() { 264 | if config.min_bounces > config.max_bounces { 265 | config.max_bounces = config.min_bounces; 266 | } 267 | self.tracing_state.dirty.store(true, Ordering::Relaxed); 268 | } 269 | ui.label("Min bounces"); 270 | 271 | if ui.add(egui::DragValue::new(&mut config.max_bounces)).changed() { 272 | if config.max_bounces < config.min_bounces { 273 | config.min_bounces = config.max_bounces; 274 | } 275 | self.tracing_state.dirty.store(true, Ordering::Relaxed); 276 | } 277 | ui.label("Max bounces"); 278 | }); 279 | ui.end_row(); 280 | 281 | let prev_nee_mode = NextEventEstimation::from_u32(self.tracing_state.config.read().nee); 282 | let mut nee_mode = prev_nee_mode; 283 | egui::ComboBox::from_label("Next event estimation") 284 | .selected_text(format!("{:?}", nee_mode)) 285 | .show_ui(ui, |ui| { 286 | ui.selectable_value(&mut nee_mode, NextEventEstimation::None, "None"); 287 | ui.selectable_value(&mut nee_mode, NextEventEstimation::MultipleImportanceSampling, "Multiple importance sampling"); 288 | ui.selectable_value(&mut nee_mode, NextEventEstimation::DirectLightSampling, "Direct light sampling only"); 289 | }); 290 | if nee_mode != prev_nee_mode { 291 | self.tracing_state.config.write().nee = nee_mode.to_u32(); 292 | self.tracing_state.dirty.store(true, Ordering::Relaxed); 293 | } 294 | ui.end_row(); 295 | 296 | { 297 | let mut config = self.tracing_state.config.write(); 298 | if ui.add(egui::Slider::new(&mut config.specular_weight_clamp.x, 0.0..=1.0).text("Min specular")).changed() { 299 | if config.specular_weight_clamp.x > config.specular_weight_clamp.y { 300 | config.specular_weight_clamp.y = config.specular_weight_clamp.x; 301 | } 302 | self.tracing_state.dirty.store(true, Ordering::Relaxed); 303 | } 304 | ui.end_row(); 305 | 306 | if ui.add(egui::Slider::new(&mut config.specular_weight_clamp.y, 0.0..=1.0).text("Max specular")).changed() { 307 | if config.specular_weight_clamp.x > config.specular_weight_clamp.y { 308 | config.specular_weight_clamp.x = config.specular_weight_clamp.y; 309 | } 310 | self.tracing_state.dirty.store(true, Ordering::Relaxed); 311 | } 312 | ui.end_row(); 313 | } 314 | 315 | egui::ComboBox::from_label("Tonemapping operator") 316 | .selected_text(format!("{:?}", self.tonemapping)) 317 | .show_ui(ui, |ui| { 318 | ui.selectable_value(&mut self.tonemapping, Tonemapping::None, "None"); 319 | ui.selectable_value(&mut self.tonemapping, Tonemapping::ACESNarkowicz, "ACES (Narkowicz)"); 320 | ui.selectable_value(&mut self.tonemapping, Tonemapping::ACESNarkowiczOverexposed, "ACES (Narkowicz, overexposed)"); 321 | ui.selectable_value(&mut self.tonemapping, Tonemapping::ACESHill, "ACES (Hill)"); 322 | ui.selectable_value(&mut self.tonemapping, Tonemapping::Neutral, "Neutral"); 323 | ui.selectable_value(&mut self.tonemapping, Tonemapping::Reinhard, "Reinhard"); 324 | ui.selectable_value(&mut self.tonemapping, Tonemapping::Uncharted, "Uncharted"); 325 | }); 326 | ui.end_row(); 327 | 328 | if ui.button("Environment settings").clicked() { 329 | self.show_environment_window = !self.show_environment_window; 330 | } 331 | ui.end_row(); 332 | 333 | ui.separator(); 334 | ui.end_row(); 335 | 336 | egui::ComboBox::from_label("Compute device") 337 | .selected_text(if self.use_cpu { "CPU" } else { "GPU" }) 338 | .show_ui(ui, |ui| { 339 | if ui.selectable_label(!self.use_cpu, "GPU").clicked() { 340 | self.use_cpu = false; 341 | self.restart_current_render(true); 342 | }; 343 | if ui.selectable_label(self.use_cpu, "CPU").clicked() { 344 | self.use_cpu = true; 345 | self.restart_current_render(true); 346 | }; 347 | }); 348 | ui.end_row(); 349 | 350 | let mut sync_rate = self.tracing_state.sync_rate.load(Ordering::Relaxed); 351 | if ui.add_enabled(!self.use_cpu, egui::Slider::new(&mut sync_rate, 1..=256).text("GPU sync rate")).changed() { 352 | self.tracing_state.sync_rate.store(sync_rate, Ordering::Relaxed); 353 | } 354 | ui.end_row(); 355 | 356 | ui.label(format!( 357 | "Samples: {}", 358 | self.tracing_state.samples.load(Ordering::Relaxed) 359 | )); 360 | ui.end_row(); 361 | }); 362 | }); 363 | } 364 | 365 | fn on_environment_gui(&mut self, egui_ctx: &egui::Context) { 366 | let mut show_environment_window = self.show_environment_window; 367 | egui::Window::new("Environment").open(&mut show_environment_window).show(egui_ctx, |ui| { 368 | let mouse_down = ui.input().pointer.primary_down(); 369 | let sun_direction = self.tracing_state.config.read().sun_direction; 370 | { 371 | let skybox_name = self.selected_skybox.as_ref().map(|s| s.as_ref()).unwrap_or("Procedural"); 372 | ui.label(format!("Selected skybox: {}", skybox_name)); 373 | } 374 | ui.horizontal(|ui| { 375 | if ui.button("Select skybox").clicked() { 376 | if let Some(path) = tinyfiledialogs::open_file_dialog("Select skybox", "", None) { 377 | self.set_skybox(&path); 378 | } 379 | } 380 | if ui.button("Reset skybox").clicked() { 381 | self.clear_skybox(); 382 | } 383 | }); 384 | 385 | let mut sun_intensity = sun_direction.w; 386 | if ui.add(egui::Slider::new(&mut sun_intensity, 0.0..=50.0).text("Sun intensity")).changed() { 387 | self.tracing_state.config.write().sun_direction.w = sun_intensity; 388 | self.tracing_state.dirty.store(true, Ordering::Relaxed); 389 | } 390 | ui.end_row(); 391 | 392 | egui::plot::Plot::new("Sun position") 393 | .view_aspect(1.0) 394 | .allow_drag(false) 395 | .allow_zoom(false) 396 | .allow_scroll(false) 397 | .allow_boxed_zoom(false) 398 | .height(250.0) 399 | .show(ui, |ui| { 400 | let n = 512; 401 | let circle_points: egui::plot::PlotPoints = (0..=n) 402 | .map(|i| { 403 | let t = egui::remap(i as f64, 0.0..=(n as f64), 0.0..=6.28); 404 | let r = 1.0; 405 | [ 406 | r * t.cos() + 0.0 as f64, 407 | r * t.sin() + 0.0 as f64, 408 | ] 409 | }) 410 | .collect(); 411 | ui.line(egui::plot::Line::new(circle_points)); 412 | 413 | let sun_pos = [sun_direction.x as f64, sun_direction.z as f64]; 414 | ui.points(egui::plot::Points::new(vec![sun_pos]) 415 | .color(egui::Color32::GOLD) 416 | .shape(egui::plot::MarkerShape::Asterisk) 417 | .radius(8.0) 418 | .name("Sun position")); 419 | 420 | let pointer = ui.pointer_coordinate(); 421 | if let Some(pointer) = pointer { 422 | if mouse_down && pointer.x.abs() <= 1.0 && pointer.y.abs() <= 1.0 { 423 | let mut new_pos = pointer.to_vec2(); 424 | if new_pos.length() > 1.0 { 425 | new_pos = new_pos.normalized(); 426 | } 427 | let new_pos_y = (1.0 - new_pos.x * new_pos.x - new_pos.y * new_pos.y).sqrt(); 428 | let new_pos_vec = Vec3::new(new_pos.x as f32, new_pos_y as f32, new_pos.y as f32).normalize(); 429 | 430 | self.tracing_state.config.write().sun_direction = new_pos_vec.extend(sun_direction.w); 431 | self.tracing_state.dirty.store(true, Ordering::Relaxed); 432 | } 433 | } 434 | }); 435 | }); 436 | self.show_environment_window = show_environment_window; 437 | } 438 | 439 | fn handle_input(&mut self, ui: &egui::Ui) { 440 | if self.last_input.elapsed().as_millis() < 16 { 441 | return; 442 | } 443 | self.last_input = Instant::now(); 444 | 445 | if ui.input().pointer.secondary_down() { 446 | self.tracing_state.interacting.store(true, Ordering::Relaxed); 447 | self.window.set_cursor_visible(false); 448 | } else { 449 | self.tracing_state.interacting.store(false, Ordering::Relaxed); 450 | self.window.set_cursor_visible(true); 451 | } 452 | 453 | let mut config = self.tracing_state.config.write(); 454 | 455 | let mut forward = Vec3::new(0.0, 0.0, 1.0); 456 | let mut right = Vec3::new(1.0, 0.0, 0.0); 457 | let euler_mat = 458 | Mat3::from_rotation_y(config.cam_rotation.y) * Mat3::from_rotation_x(config.cam_rotation.x); 459 | forward = euler_mat * forward; 460 | right = euler_mat * right; 461 | 462 | let speed = if ui.input().modifiers.shift { 463 | 0.5 464 | } else if ui.input().modifiers.ctrl { 465 | 0.01 466 | } else { 467 | 0.1 468 | }; 469 | 470 | if ui.input().key_down(egui::Key::W) { 471 | config.cam_position += forward.extend(0.0) * speed; 472 | } 473 | if ui.input().key_down(egui::Key::S) { 474 | config.cam_position -= forward.extend(0.0) * speed; 475 | } 476 | if ui.input().key_down(egui::Key::D) { 477 | config.cam_position += right.extend(0.0) * speed; 478 | } 479 | if ui.input().key_down(egui::Key::A) { 480 | config.cam_position -= right.extend(0.0) * speed; 481 | } 482 | if ui.input().key_down(egui::Key::E) { 483 | config.cam_position.y += speed; 484 | } 485 | if ui.input().key_down(egui::Key::Q) { 486 | config.cam_position.y -= speed; 487 | } 488 | 489 | config.cam_rotation.x += self.mouse_delta.1 * 0.005; 490 | config.cam_rotation.y += self.mouse_delta.0 * 0.005; 491 | self.mouse_delta = (0.0, 0.0); 492 | } 493 | 494 | pub fn redraw(&mut self, platform: &mut Platform, start_time: &Instant) { 495 | platform.update_time(start_time.elapsed().as_secs_f64()); 496 | 497 | let output_frame = match self.surface.get_current_texture() { 498 | Ok(frame) => frame, 499 | Err(_) => { 500 | return; 501 | } 502 | }; 503 | let output_view = output_frame 504 | .texture 505 | .create_view(&wgpu::TextureViewDescriptor::default()); 506 | 507 | // Begin to draw the UI frame. 508 | platform.begin_frame(); 509 | 510 | // Render here 511 | egui::CentralPanel::default() 512 | .frame(egui::Frame::default().inner_margin(egui::Vec2::ZERO)) 513 | .show(&platform.context(), |ui| { 514 | self.on_gui(&platform.context()); 515 | self.handle_input(ui); 516 | 517 | let rect = ui.allocate_exact_size(ui.available_size(), egui::Sense::drag()).0; 518 | let framebuffer = self.tracing_state.framebuffer.read().clone(); // TODO: clone is slow 519 | let width = self.tracing_state.config.read().width; 520 | let height = self.tracing_state.config.read().height; 521 | let tonemapping = self.tonemapping; 522 | let cb = egui_wgpu::CallbackFn::new() 523 | .prepare(move |_device, queue, _encoder, typemap| { 524 | if let Some(resources) = typemap.get::() { 525 | resources.prepare(queue, &framebuffer, width, height, tonemapping); 526 | } 527 | Default::default() 528 | }) 529 | .paint(move |_info, rpass, typemap| { 530 | if let Some(resources) = typemap.get::() { 531 | resources.paint(rpass); 532 | } 533 | }); 534 | 535 | let callback = egui::PaintCallback { 536 | rect, 537 | callback: Arc::new(cb), 538 | }; 539 | 540 | ui.painter().add(callback); 541 | }); 542 | 543 | // End the UI frame. We could now handle the output and draw the UI with the backend. 544 | let full_output = platform.end_frame(Some(&self.window)); 545 | let paint_jobs = platform.context().tessellate(full_output.shapes); 546 | 547 | let mut encoder = self.device.create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); 548 | 549 | // Upload all resources for the GPU. 550 | let screen_descriptor = ScreenDescriptor { 551 | size_in_pixels: [self.surface_config.width, self.surface_config.height], 552 | pixels_per_point: self.window.scale_factor() as f32, 553 | }; 554 | let tdelta: egui::TexturesDelta = full_output.textures_delta; 555 | for (id, image_delta) in &tdelta.set { 556 | self.egui_renderer.update_texture(&self.device, &self.queue, *id, image_delta); 557 | } 558 | self.egui_renderer.update_buffers( 559 | &self.device, 560 | &self.queue, 561 | &mut encoder, 562 | &paint_jobs, 563 | &screen_descriptor, 564 | ); 565 | 566 | { 567 | let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { 568 | label: None, 569 | color_attachments: &[ 570 | Some(wgpu::RenderPassColorAttachment { 571 | view: &output_view, 572 | resolve_target: None, 573 | ops: wgpu::Operations { 574 | load: wgpu::LoadOp::Clear(wgpu::Color::BLACK), 575 | store: true, 576 | }, 577 | }), 578 | ], 579 | depth_stencil_attachment: None, 580 | }); 581 | self.egui_renderer.render(&mut render_pass, &paint_jobs, &screen_descriptor); 582 | } 583 | // Submit the commands. 584 | self.queue.submit(iter::once(encoder.finish())); 585 | 586 | // Redraw egui 587 | output_frame.present(); 588 | 589 | for id in &tdelta.free { 590 | self.egui_renderer.free_texture(id); 591 | } 592 | } 593 | 594 | pub fn handle_mouse_motion(&mut self, delta: (f64, f64)) { 595 | if self.tracing_state.interacting.load(Ordering::Relaxed) { 596 | self.mouse_delta.0 += delta.0 as f32; 597 | self.mouse_delta.1 += delta.1 as f32; 598 | 599 | // Set mouse position to center of screen 600 | let size = self.window.inner_size(); 601 | let center = winit::dpi::LogicalPosition::new( 602 | size.width as f32 / 2.0, 603 | size.height as f32 / 2.0, 604 | ); 605 | let _ = self.window.set_cursor_position(center); 606 | } 607 | } 608 | 609 | pub fn handle_resize(&mut self, size: PhysicalSize) { 610 | if size.width > 0 && size.height > 0 { 611 | self.surface_config.width = size.width; 612 | self.surface_config.height = size.height; 613 | self.surface.configure(&self.device, &self.surface_config); 614 | } 615 | } 616 | 617 | pub fn handle_file_dropped(&mut self, path: &std::path::Path) { 618 | let path_str = path.to_str().expect("Path was not valid utf8."); 619 | if is_image(path_str) { 620 | self.set_skybox(path_str); 621 | } else { 622 | self.set_scene(path_str); 623 | } 624 | } 625 | } 626 | 627 | struct PaintCallbackResources { 628 | pipeline: wgpu::RenderPipeline, 629 | bind_group: wgpu::BindGroup, 630 | uniform_buffer: wgpu::Buffer, 631 | render_buffer: wgpu::Buffer, 632 | } 633 | 634 | impl PaintCallbackResources { 635 | fn prepare( 636 | &self, 637 | queue: &wgpu::Queue, 638 | framebuffer: &Vec, 639 | width: u32, 640 | height: u32, 641 | tonemapping: Tonemapping, 642 | ) { 643 | queue.write_buffer(&self.render_buffer, 0, bytemuck::cast_slice(framebuffer)); 644 | queue.write_buffer( 645 | &self.uniform_buffer, 646 | 0, 647 | bytemuck::cast_slice(&[width, height, tonemapping as u32]), 648 | ); 649 | } 650 | 651 | fn paint<'rpass>(&'rpass self, rpass: &mut wgpu::RenderPass<'rpass>) { 652 | rpass.set_pipeline(&self.pipeline); 653 | rpass.set_bind_group(0, &self.bind_group, &[]); 654 | rpass.draw(0..6, 0..1); 655 | } 656 | 657 | fn new( 658 | device: &wgpu::Device, 659 | format: wgpu::TextureFormat, 660 | width: u32, 661 | height: u32, 662 | ) -> PaintCallbackResources { 663 | let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { 664 | label: None, 665 | source: wgpu::ShaderSource::Wgsl(include_str!("resources/render.wgsl").into()), 666 | }); 667 | 668 | let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { 669 | label: None, 670 | entries: &[ 671 | wgpu::BindGroupLayoutEntry { 672 | binding: 0, 673 | visibility: wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT, 674 | ty: wgpu::BindingType::Buffer { 675 | ty: wgpu::BufferBindingType::Uniform, 676 | has_dynamic_offset: false, 677 | min_binding_size: None, 678 | }, 679 | count: None, 680 | }, 681 | wgpu::BindGroupLayoutEntry { 682 | binding: 1, 683 | visibility: wgpu::ShaderStages::FRAGMENT, 684 | ty: wgpu::BindingType::Buffer { 685 | ty: wgpu::BufferBindingType::Storage { read_only: true }, 686 | has_dynamic_offset: false, 687 | min_binding_size: None, 688 | }, 689 | count: None, 690 | }, 691 | ], 692 | }); 693 | 694 | let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { 695 | label: None, 696 | bind_group_layouts: &[&bind_group_layout], 697 | push_constant_ranges: &[], 698 | }); 699 | 700 | let pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor { 701 | label: None, 702 | layout: Some(&pipeline_layout), 703 | vertex: wgpu::VertexState { 704 | module: &shader, 705 | entry_point: "vs_main", 706 | buffers: &[], 707 | }, 708 | fragment: Some(wgpu::FragmentState { 709 | module: &shader, 710 | entry_point: "fs_main", 711 | targets: &[Some(wgpu::ColorTargetState { 712 | format, 713 | blend: Some(wgpu::BlendState::REPLACE), 714 | write_mask: wgpu::ColorWrites::ALL, 715 | })], 716 | }), 717 | primitive: wgpu::PrimitiveState::default(), 718 | depth_stencil: None, 719 | multisample: wgpu::MultisampleState::default(), 720 | multiview: None, 721 | }); 722 | 723 | let uniform_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { 724 | label: None, 725 | contents: bytemuck::cast_slice(&[0.0, 0.0, 0.0]), 726 | usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::UNIFORM, 727 | }); 728 | 729 | let render_buffer = device.create_buffer(&wgpu::BufferDescriptor { 730 | label: None, 731 | size: (width * height * 3 * 4) as u64, 732 | usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::STORAGE, 733 | mapped_at_creation: false, 734 | }); 735 | 736 | let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { 737 | label: None, 738 | layout: &bind_group_layout, 739 | entries: &[ 740 | wgpu::BindGroupEntry { 741 | binding: 0, 742 | resource: uniform_buffer.as_entire_binding(), 743 | }, 744 | wgpu::BindGroupEntry { 745 | binding: 1, 746 | resource: render_buffer.as_entire_binding(), 747 | }, 748 | ], 749 | }); 750 | 751 | PaintCallbackResources { 752 | pipeline, 753 | bind_group, 754 | uniform_buffer, 755 | render_buffer, 756 | } 757 | } 758 | 759 | fn save_render(&self, texture_width: u32, texture_height: u32, format: wgpu::TextureFormat, device: &wgpu::Device, queue: &wgpu::Queue) { 760 | let texture_desc = &wgpu::TextureDescriptor { 761 | label: None, 762 | size: wgpu::Extent3d { 763 | width: texture_width, 764 | height: texture_height, 765 | depth_or_array_layers: 1, 766 | }, 767 | mip_level_count: 1, 768 | sample_count: 1, 769 | dimension: wgpu::TextureDimension::D2, 770 | format, 771 | usage: wgpu::TextureUsages::COPY_SRC | wgpu::TextureUsages::RENDER_ATTACHMENT, 772 | }; 773 | let texture = device.create_texture(texture_desc); 774 | let texture_view = texture.create_view(&wgpu::TextureViewDescriptor::default()); 775 | let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); 776 | { 777 | let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { 778 | label: None, 779 | color_attachments: &[ 780 | Some(wgpu::RenderPassColorAttachment { 781 | view: &texture_view, 782 | resolve_target: None, 783 | ops: wgpu::Operations { 784 | load: wgpu::LoadOp::Clear(wgpu::Color::BLACK), 785 | store: true, 786 | }, 787 | }), 788 | ], 789 | depth_stencil_attachment: None, 790 | }); 791 | self.paint(&mut render_pass); 792 | } 793 | 794 | let u32_size = std::mem::size_of::() as u32; 795 | 796 | let output_buffer_size = (u32_size * texture_width * texture_height) as wgpu::BufferAddress; 797 | let output_buffer_desc = wgpu::BufferDescriptor { 798 | size: output_buffer_size, 799 | usage: wgpu::BufferUsages::COPY_DST 800 | // this tells wpgu that we want to read this buffer from the cpu 801 | | wgpu::BufferUsages::MAP_READ, 802 | label: None, 803 | mapped_at_creation: false, 804 | }; 805 | let output_buffer = device.create_buffer(&output_buffer_desc); 806 | encoder.copy_texture_to_buffer( 807 | wgpu::ImageCopyTexture { 808 | aspect: wgpu::TextureAspect::All, 809 | texture: &texture, 810 | mip_level: 0, 811 | origin: wgpu::Origin3d::ZERO, 812 | }, 813 | wgpu::ImageCopyBuffer { 814 | buffer: &output_buffer, 815 | layout: wgpu::ImageDataLayout { 816 | offset: 0, 817 | bytes_per_row: NonZeroU32::new(u32_size * texture_width), 818 | rows_per_image: NonZeroU32::new(texture_height), 819 | }, 820 | }, 821 | texture_desc.size, 822 | ); 823 | queue.submit(Some(encoder.finish())); 824 | 825 | { 826 | let buffer_slice = output_buffer.slice(..); 827 | 828 | buffer_slice.map_async(wgpu::MapMode::Read, |_| {}); 829 | device.poll(wgpu::Maintain::Wait); 830 | let mut data = buffer_slice.get_mapped_range().to_vec(); 831 | data.chunks_exact_mut(4).for_each(|c| c.swap(0, 2)); // BGRA -> RGBA swizzle 832 | let Some(buffer) = image::ImageBuffer::, _>::from_raw(texture_width, texture_height, data) else { 833 | return; 834 | }; 835 | let image = image::DynamicImage::ImageRgba8(buffer).into_rgba8(); 836 | if let Some(path) = tinyfiledialogs::save_file_dialog("Save render", "") { 837 | let res = image.save(path); 838 | if res.is_err() { 839 | #[cfg(debug_assertions)] println!("Failed to save image: {:?}", res.err()); 840 | } 841 | } 842 | } 843 | output_buffer.unmap(); 844 | } 845 | } -------------------------------------------------------------------------------- /src/asset.rs: -------------------------------------------------------------------------------- 1 | use glam::{UVec4, Vec4, Mat4, Vec2, Vec3}; 2 | use gpgpu::{GpuBuffer, BufOps, GpuConstImage, primitives::{pixels::{Rgba8UintNorm, Rgba32Float}, PixelInfo}, ImgOps}; 3 | use image::DynamicImage; 4 | use russimp::{scene::{Scene, PostProcess::*}, node::Node, material::{DataContent, TextureType, Texture, Material, PropertyTypeInfo}}; 5 | use shared_structs::{MaterialData, PerVertexData, LightPickEntry}; 6 | 7 | use crate::{bvh::{BVH, BVHBuilder, GpuBVH}, trace::FW, light_pick}; 8 | 9 | pub struct World { 10 | pub bvh: BVH, 11 | pub per_vertex_buffer: Vec, 12 | pub index_buffer: Vec, 13 | pub atlas: DynamicImage, 14 | pub material_data_buffer: Vec, 15 | pub light_pick_buffer: Vec, 16 | } 17 | 18 | pub struct GpuWorld<'fw> { 19 | pub bvh: GpuBVH<'fw>, 20 | pub per_vertex_buffer: GpuBuffer<'fw, PerVertexData>, 21 | pub index_buffer: GpuBuffer<'fw, UVec4>, 22 | pub atlas: GpuConstImage<'fw, Rgba8UintNorm>, 23 | pub material_data_buffer: GpuBuffer<'fw, MaterialData>, 24 | pub light_pick_buffer: GpuBuffer<'fw, LightPickEntry>, 25 | } 26 | 27 | fn convert_texture(texture: &Texture) -> Option { 28 | let image = match &texture.data { 29 | DataContent::Texel(raw_data) => { 30 | let image_data = raw_data.iter().flat_map(|c| [c.r, c.g, c.b, c.a]).collect::>(); 31 | let image_buffer = image::RgbaImage::from_vec(texture.width, texture.height, image_data)?; 32 | image::DynamicImage::ImageRgba8(image_buffer) 33 | }, 34 | DataContent::Bytes(bytes) => { 35 | image::io::Reader::new(std::io::Cursor::new(bytes)).with_guessed_format().ok()?.decode().ok()? 36 | } 37 | }; 38 | 39 | Some(image) 40 | } 41 | 42 | fn load_texture(material: &Material, texture_type: TextureType) -> Option { 43 | material.textures.get(&texture_type).and_then(|texture| convert_texture(&texture.borrow())) 44 | } 45 | 46 | fn load_float_array(material: &Material, name: &str) -> Option> { 47 | let prop = material.properties.iter().find(|p| p.key == name)?; 48 | match &prop.data { 49 | PropertyTypeInfo::FloatArray(col) => Some(col.clone()), 50 | _ => None 51 | } 52 | } 53 | 54 | impl World { 55 | pub fn from_path(path: &str) -> Option { 56 | let blend = Scene::from_file( 57 | path, 58 | vec![ 59 | JoinIdenticalVertices, 60 | Triangulate, 61 | SortByPrimitiveType, 62 | GenerateSmoothNormals, 63 | GenerateUVCoords, 64 | TransformUVCoords, 65 | CalculateTangentSpace, 66 | EmbedTextures, 67 | ImproveCacheLocality, 68 | ], 69 | ).ok()?; 70 | 71 | // Gather mesh data 72 | let mut vertices = Vec::new(); 73 | let mut indices = Vec::new(); 74 | let mut normals = Vec::new(); 75 | let mut tangents = Vec::new(); 76 | let mut uvs = Vec::new(); 77 | 78 | fn walk_node_graph( 79 | scene: &Scene, 80 | node: &Node, 81 | trs: Mat4, 82 | vertices: &mut Vec, 83 | indices: &mut Vec, 84 | normals: &mut Vec, 85 | tangents: &mut Vec, 86 | uvs: &mut Vec 87 | ) { 88 | let node_trs = Mat4::from_cols_array_2d(&[ 89 | [node.transformation.a1, node.transformation.b1, node.transformation.c1, node.transformation.d1], 90 | [node.transformation.a2, node.transformation.b2, node.transformation.c2, node.transformation.d2], 91 | [node.transformation.a3, node.transformation.b3, node.transformation.c3, node.transformation.d3], 92 | [node.transformation.a4, node.transformation.b4, node.transformation.c4, node.transformation.d4], 93 | ]); 94 | let new_trs = trs * node_trs; 95 | let (node_scale,node_quat,_) = new_trs.to_scale_rotation_translation(); 96 | 97 | for mesh_idx in node.meshes.iter() { 98 | let mesh = &scene.meshes[*mesh_idx as usize]; 99 | let triangle_offset = vertices.len() as u32; 100 | for v in &mesh.vertices { 101 | let vert = new_trs.mul_vec4(Vec4::new(v.x, v.y, v.z, 1.0)); 102 | vertices.push(Vec4::new(vert.x, vert.z, vert.y, 1.0)); 103 | } 104 | for f in &mesh.faces { 105 | assert_eq!(f.0.len(), 3); 106 | indices.push(UVec4::new(triangle_offset + f.0[0], triangle_offset + f.0[2], triangle_offset + f.0[1], mesh.material_index)); 107 | } 108 | for n in &mesh.normals { 109 | let norm = (node_quat.mul_vec3(Vec3::new(n.x, n.y, n.z) / node_scale)).normalize(); 110 | normals.push(Vec4::new(norm.x, norm.z, norm.y, 0.0)); 111 | } 112 | for t in &mesh.tangents { 113 | let tan = (node_quat.mul_vec3(Vec3::new(t.x, t.y, t.z) / node_scale)).normalize(); 114 | tangents.push(Vec4::new(tan.x, tan.z, tan.y, 0.0)); 115 | } 116 | if let Some(Some(uv_set)) = mesh.texture_coords.first() { 117 | for uv in uv_set { 118 | uvs.push(Vec2::new(uv.x, uv.y)); 119 | } 120 | } else { 121 | uvs.resize(vertices.len(), Vec2::ZERO); 122 | } 123 | } 124 | 125 | for child in node.children.borrow().iter() { 126 | walk_node_graph(scene, child, new_trs, vertices, indices, normals, tangents, uvs); 127 | } 128 | } 129 | 130 | if let Some(root) = blend.root.as_ref() { 131 | walk_node_graph(&blend, root, Mat4::IDENTITY, &mut vertices, &mut indices, &mut normals, &mut tangents, &mut uvs); 132 | } 133 | 134 | // Gather material data 135 | let mut material_datas = vec![MaterialData::default(); blend.materials.len()]; 136 | 137 | let mut textures = Vec::new(); 138 | for (material_index, material) in blend.materials.iter().enumerate() { 139 | let current_material_data = &mut material_datas[material_index]; 140 | if let Some(texture) = load_texture(material, TextureType::Diffuse) { 141 | // Albedo data is stored in gamma space, but we atlas it with all the other textures 142 | // which are stored in linear. Therefore, we convert here. 143 | let mut texture = texture.into_rgb8(); 144 | for pixel in texture.iter_mut() { 145 | *pixel = ((*pixel as f32 / 255.0).powf(2.2) * 255.0) as u8; 146 | } 147 | textures.push(image::DynamicImage::ImageRgb8(texture)); 148 | current_material_data.set_has_albedo_texture(true); 149 | } 150 | if let Some(texture) = load_texture(material, TextureType::Metalness) { 151 | textures.push(texture); 152 | current_material_data.set_has_metallic_texture(true); 153 | } 154 | if let Some(texture) = load_texture(material, TextureType::Roughness) { 155 | textures.push(texture); 156 | current_material_data.set_has_roughness_texture(true); 157 | } 158 | if let Some(texture) = load_texture(material, TextureType::Normals) { 159 | textures.push(texture); 160 | current_material_data.set_has_normal_texture(true); 161 | } 162 | if let Some(col) = load_float_array(material, "$clr.diffuse") { 163 | current_material_data.albedo = Vec4::new(col[0], col[1], col[2], col[3]); 164 | } 165 | if let Some(col) = load_float_array(material, "$clr.emissive") { 166 | // HACK: Multiply by 15 since assimp 5.2.5 doesn't support emissive strength :( 167 | current_material_data.emissive = Vec4::new(col[0], col[1], col[2], col[3]) * 15.0; 168 | } 169 | if let Some(col) = load_float_array(material, "$mat.metallicFactor") { 170 | current_material_data.metallic = Vec4::splat(col[0]); 171 | } 172 | if let Some(col) = load_float_array(material, "$mat.roughnessFactor") { 173 | current_material_data.roughness = Vec4::splat(col[0]); 174 | } 175 | } 176 | 177 | let (atlas_raw, mut sts) = crate::atlas::pack_textures(&textures, 4096, 4096); 178 | 179 | for material_data in material_datas.iter_mut() { 180 | if material_data.has_albedo_texture() { 181 | material_data.albedo = sts.remove(0); // TODO: Optimize this 182 | } 183 | if material_data.has_metallic_texture() { 184 | material_data.metallic = sts.remove(0); 185 | } 186 | if material_data.has_roughness_texture() { 187 | material_data.roughness = sts.remove(0); 188 | } 189 | if material_data.has_normal_texture() { 190 | material_data.normals = sts.remove(0); 191 | } 192 | } 193 | 194 | // BVH building 195 | let now = std::time::Instant::now(); 196 | let bvh = BVHBuilder::new(&vertices, &mut indices).sah_samples(128).build(); 197 | #[cfg(debug_assertions)] println!("BVH build time: {:?}", now.elapsed()); 198 | 199 | // Build light pick table 200 | let now = std::time::Instant::now(); 201 | let emissive_mask = light_pick::compute_emissive_mask(&indices, &material_datas); 202 | let light_pick_table = light_pick::build_light_pick_table(&vertices, &indices, &emissive_mask, &material_datas); 203 | #[cfg(debug_assertions)] println!("Light pick table build time: {:?}", now.elapsed()); 204 | 205 | // Pack per-vertex data 206 | let mut per_vertex_data = Vec::new(); 207 | for i in 0..vertices.len() { 208 | per_vertex_data.push(PerVertexData { 209 | vertex: *vertices.get(i).unwrap_or(&Vec4::ZERO), 210 | normal: *normals.get(i).unwrap_or(&Vec4::ZERO), 211 | tangent: *tangents.get(i).unwrap_or(&Vec4::ZERO), 212 | uv0: *uvs.get(i).unwrap_or(&Vec2::ZERO), 213 | ..Default::default() 214 | }); 215 | } 216 | Some(Self { 217 | bvh, 218 | per_vertex_buffer: per_vertex_data, 219 | index_buffer: indices, 220 | atlas: atlas_raw, 221 | material_data_buffer: material_datas, 222 | light_pick_buffer: light_pick_table, 223 | }) 224 | } 225 | 226 | pub fn into_gpu<'fw>(self) -> GpuWorld<'fw> { 227 | GpuWorld { 228 | per_vertex_buffer: GpuBuffer::from_slice(&FW, &self.per_vertex_buffer), 229 | index_buffer: GpuBuffer::from_slice(&FW, &self.index_buffer), 230 | bvh: self.bvh.into_gpu(), 231 | atlas: GpuConstImage::from_bytes(&FW, &self.atlas.to_rgba8(), 4096, 4096), 232 | material_data_buffer: GpuBuffer::from_slice(&FW, &self.material_data_buffer), 233 | light_pick_buffer: GpuBuffer::from_slice(&FW, &self.light_pick_buffer), 234 | } 235 | } 236 | } 237 | 238 | pub fn load_dynamic_image(path: &str) -> Option { 239 | // Image crate does not by default decode .hdr images as HDR for some reason 240 | if path.ends_with(".hdr") { 241 | let hdr_decoder = image::codecs::hdr::HdrDecoder::new(std::io::BufReader::new( 242 | std::fs::File::open(&path).unwrap(), 243 | )).ok()?; 244 | let width = hdr_decoder.metadata().width; 245 | let height = hdr_decoder.metadata().height; 246 | let buffer = hdr_decoder.read_image_hdr().ok()?; 247 | return Some(DynamicImage::ImageRgb32F(image::ImageBuffer::from_vec( 248 | width, 249 | height, 250 | buffer.into_iter().flat_map(|c| vec![c[0], c[1], c[2]]).collect(), 251 | )?)); 252 | } 253 | 254 | image::io::Reader::open(path).ok()?.decode().ok() 255 | } 256 | 257 | pub fn dynamic_image_to_gpu_image<'fw, P: PixelInfo>(img: DynamicImage) -> GpuConstImage<'fw, P> { 258 | let width = img.width(); 259 | let height = img.height(); 260 | match P::byte_size() { 261 | 16 => GpuConstImage::from_bytes(&FW, bytemuck::cast_slice(&img.into_rgba32f()), width, height), 262 | _ => GpuConstImage::from_bytes(&FW, &img.into_rgba8(), width, height) 263 | } 264 | } 265 | 266 | pub fn dynamic_image_to_cpu_buffer<'img>(img: DynamicImage) -> Vec { 267 | let width = img.width(); 268 | let height = img.height(); 269 | let data = img.into_rgb8(); 270 | let cpu_data: Vec = data.chunks(3).map(|f| Vec4::new(f[0] as f32, f[1] as f32, f[2] as f32, 255.0) / 255.0).collect(); 271 | assert_eq!(cpu_data.len(), width as usize * height as usize); 272 | cpu_data 273 | } 274 | 275 | pub fn fallback_gpu_image<'fw>() -> GpuConstImage<'fw, Rgba32Float> { 276 | GpuConstImage::from_bytes(&FW, bytemuck::cast_slice(&[ 277 | 1.0, 0.0, 1.0, 1.0, 278 | 1.0, 0.0, 1.0, 1.0, 279 | 1.0, 0.0, 1.0, 1.0, 280 | 1.0, 0.0, 1.0, 1.0]), 2, 2) 281 | } 282 | 283 | pub fn fallback_cpu_buffer() -> Vec { 284 | vec![ 285 | Vec4::new(1.0, 0.0, 1.0, 1.0), 286 | Vec4::new(1.0, 0.0, 1.0, 1.0), 287 | Vec4::new(1.0, 0.0, 1.0, 1.0), 288 | Vec4::new(1.0, 0.0, 1.0, 1.0), 289 | ] 290 | } -------------------------------------------------------------------------------- /src/atlas.rs: -------------------------------------------------------------------------------- 1 | use std::{collections::VecDeque, num::NonZeroU32}; 2 | 3 | use glam::Vec4; 4 | use image::{DynamicImage, GenericImage}; 5 | use fast_image_resize as fr; 6 | 7 | #[derive(Clone, Copy)] 8 | pub struct PackingRect { 9 | pub x: u32, 10 | pub y: u32, 11 | pub width: u32, 12 | pub height: u32, 13 | } 14 | 15 | impl PackingRect { 16 | pub fn to_uvst(&self, atlas_width: u32, atlas_height: u32) -> Vec4 { 17 | Vec4::new( 18 | self.x as f32 / atlas_width as f32, 19 | self.y as f32 / atlas_width as f32, 20 | self.width as f32 / atlas_width as f32, 21 | self.height as f32 / atlas_height as f32, 22 | ) 23 | } 24 | } 25 | 26 | pub fn pack_textures(textures: &[DynamicImage], atlas_width: u32, atlas_height: u32) -> (DynamicImage, Vec) { 27 | let root = PackingRect { 28 | x: 0, 29 | y: 0, 30 | width: atlas_width, 31 | height: atlas_height, 32 | }; 33 | let mut queue = VecDeque::from([root]); 34 | 35 | while queue.len() <= textures.len() { 36 | let node = queue.pop_front().expect("Texture packing queue was empty."); 37 | let half_width = node.width / 2; 38 | let half_height = node.height / 2; 39 | queue.extend([ 40 | PackingRect { 41 | x: node.x, 42 | y: node.y, 43 | width: half_width, 44 | height: half_height, 45 | }, 46 | PackingRect { 47 | x: node.x + half_width, 48 | y: node.y, 49 | width: half_width, 50 | height: half_height, 51 | }, 52 | PackingRect { 53 | x: node.x, 54 | y: node.y + half_height, 55 | width: half_width, 56 | height: half_height, 57 | }, 58 | PackingRect { 59 | x: node.x + half_width, 60 | y: node.y + half_height, 61 | width: half_width, 62 | height: half_height, 63 | }, 64 | ]); 65 | } 66 | 67 | let mut leafs = queue.into_iter().collect::>(); 68 | leafs.sort_by(|a, b| b.width.cmp(&a.width)); 69 | leafs.truncate(textures.len()); 70 | 71 | let mut resizer = fr::Resizer::new(fr::ResizeAlg::Convolution(fr::FilterType::Lanczos3)); 72 | let mut atlas = DynamicImage::new_rgba8(atlas_width, atlas_height); 73 | for (i, leaf) in leafs.iter().enumerate() { 74 | let tex = &textures[i]; 75 | let width = NonZeroU32::new(tex.width()).unwrap(); 76 | let height = NonZeroU32::new(tex.height()).unwrap(); 77 | 78 | let desired_width = NonZeroU32::new(leaf.width).unwrap(); 79 | let desired_height = NonZeroU32::new(leaf.height).unwrap(); 80 | let fr_img_src = fr::Image::from_vec_u8(width, height, tex.to_rgba8().into_raw(), fr::PixelType::U8x4).unwrap(); 81 | let mut fr_img_dst = fr::Image::new(desired_width, desired_height, fr::PixelType::U8x4); 82 | resizer.resize(&fr_img_src.view(), &mut fr_img_dst.view_mut()).unwrap(); 83 | 84 | let resized_tex = DynamicImage::ImageRgba8(image::RgbaImage::from_raw(desired_width.get(), desired_height.get(), fr_img_dst.into_vec()).unwrap()); 85 | atlas.copy_from(&resized_tex.flipv(), leaf.x, leaf.y).unwrap(); 86 | } 87 | 88 | let sts = leafs.iter().map(|x| x.to_uvst(atlas_width, atlas_height)).collect::>(); 89 | (atlas, sts) 90 | } 91 | 92 | 93 | -------------------------------------------------------------------------------- /src/bvh.rs: -------------------------------------------------------------------------------- 1 | use glam::{UVec4, Vec3, Vec4, Vec4Swizzles}; 2 | use gpgpu::{GpuBuffer, BufOps}; 3 | use shared_structs::{BVHNode}; 4 | 5 | use crate::trace::FW; 6 | 7 | // TODO: Use triangle buffer directly instead of 2 indirections 8 | 9 | trait BVHNodeExtensions { 10 | fn encapsulate(&mut self, point: &Vec3); 11 | fn encapsulate_node(&mut self, node: &BVHNode); 12 | fn area(&self) -> f32; 13 | } 14 | 15 | impl BVHNodeExtensions for BVHNode { 16 | fn encapsulate(&mut self, point: &Vec3) { 17 | self.set_aabb_min(&self.aabb_min().min(*point)); 18 | self.set_aabb_max(&self.aabb_max().max(*point)); 19 | } 20 | 21 | fn encapsulate_node(&mut self, node: &BVHNode) { 22 | if node.aabb_min().x == f32::INFINITY { 23 | return; 24 | } 25 | self.set_aabb_min(&self.aabb_min().min(node.aabb_min())); 26 | self.set_aabb_max(&self.aabb_max().max(node.aabb_max())); 27 | } 28 | 29 | fn area(&self) -> f32 { 30 | let extent = self.aabb_max() - self.aabb_min(); 31 | extent.x * extent.y + extent.y * extent.z + extent.z * extent.x 32 | } 33 | } 34 | 35 | pub struct BVH { 36 | pub nodes: Vec, 37 | } 38 | 39 | impl BVH { 40 | pub fn into_gpu<'fw>(self) -> GpuBVH<'fw> { 41 | let nodes_buffer = GpuBuffer::from_slice(&FW, &self.nodes); 42 | GpuBVH { nodes_buffer } 43 | } 44 | } 45 | 46 | pub struct GpuBVH<'fw> { 47 | pub nodes_buffer: GpuBuffer<'fw, BVHNode>, 48 | } 49 | 50 | pub struct BVHBuilder<'a> { 51 | sah_samples: usize, 52 | vertices: &'a [Vec4], 53 | indices: &'a mut [UVec4], 54 | centroids: Vec, 55 | nodes: Vec, 56 | } 57 | 58 | impl<'a> BVHBuilder<'a> { 59 | pub fn new(vertices: &'a [Vec4], indices: &'a mut [UVec4]) -> Self { 60 | let centroids = indices 61 | .iter() 62 | .map(|ind| { 63 | let v0 = vertices[ind.x as usize].xyz(); 64 | let v1 = vertices[ind.y as usize].xyz(); 65 | let v2 = vertices[ind.z as usize].xyz(); 66 | (v0 + v1 + v2) / 3.0 67 | }) 68 | .collect::>(); 69 | let nodes = vec![BVHNode::default(); indices.len() * 2 - 1]; 70 | 71 | Self { 72 | sah_samples: 128, 73 | vertices, 74 | indices, 75 | centroids, 76 | nodes, 77 | } 78 | } 79 | 80 | pub fn sah_samples(mut self, sah_samples: usize) -> Self { 81 | self.sah_samples = sah_samples; 82 | self 83 | } 84 | 85 | fn update_node_aabb(&mut self, node_idx: usize) { 86 | let node = &mut self.nodes[node_idx]; 87 | let mut aabb_min = Vec3::splat(f32::INFINITY); 88 | let mut aabb_max = Vec3::splat(f32::NEG_INFINITY); 89 | 90 | for i in 0..node.triangle_count() { 91 | let triangle_index = (node.first_triangle_index() + i) as usize; 92 | let index = self.indices[triangle_index]; 93 | let v0 = self.vertices[index.x as usize].xyz(); 94 | let v1 = self.vertices[index.y as usize].xyz(); 95 | let v2 = self.vertices[index.z as usize].xyz(); 96 | 97 | aabb_min = aabb_min.min(v0.min(v1).min(v2)); 98 | aabb_max = aabb_max.max(v0.max(v1).max(v2)); 99 | } 100 | 101 | node.set_aabb_min(&aabb_min); 102 | node.set_aabb_max(&aabb_max); 103 | } 104 | 105 | fn calculate_surface_area_heuristic(&self, node: &BVHNode, axis: usize, split: f32) -> f32 { 106 | let mut left_box = BVHNode::default(); 107 | let mut right_box = BVHNode::default(); 108 | let mut left_tri_count = 0; 109 | let mut right_tri_count = 0; 110 | 111 | for i in 0..node.triangle_count() { 112 | let triangle_index = (node.left_node_index() + i) as usize; 113 | let index = self.indices[triangle_index]; 114 | let v0 = self.vertices[index.x as usize].xyz(); 115 | let v1 = self.vertices[index.y as usize].xyz(); 116 | let v2 = self.vertices[index.z as usize].xyz(); 117 | let centroid = self.centroids[triangle_index]; 118 | 119 | if centroid[axis] < split { 120 | left_box.encapsulate(&v0); 121 | left_box.encapsulate(&v1); 122 | left_box.encapsulate(&v2); 123 | left_tri_count += 1; 124 | } else { 125 | right_box.encapsulate(&v0); 126 | right_box.encapsulate(&v1); 127 | right_box.encapsulate(&v2); 128 | right_tri_count += 1; 129 | } 130 | } 131 | 132 | let result = left_box.area() * left_tri_count as f32 + right_box.area() * right_tri_count as f32; 133 | if result > 0.0 { 134 | result 135 | } else { 136 | f32::INFINITY 137 | } 138 | } 139 | 140 | #[allow(dead_code)] 141 | fn find_best_split(&self, node: &BVHNode) -> (usize, f32, f32) { 142 | // calculate the best split (SAH) 143 | let mut best_axis = 0; 144 | let mut best_split = 0.0; 145 | let mut best_cost = f32::INFINITY; 146 | for axis in 0..3 { 147 | // find bounds of centroids in this node 148 | let mut bounds_min = f32::INFINITY; 149 | let mut bounds_max = f32::NEG_INFINITY; 150 | for i in 0..node.triangle_count() { 151 | let triangle_index = (node.first_triangle_index() + i) as usize; 152 | let centroid = self.centroids[triangle_index]; 153 | bounds_min = bounds_min.min(centroid[axis]); 154 | bounds_max = bounds_max.max(centroid[axis]); 155 | } 156 | 157 | // skip if completely flat 158 | if bounds_min == bounds_max { 159 | continue; 160 | } 161 | 162 | // check splits uniformly along the axis 163 | let scale = (bounds_max - bounds_min) / self.sah_samples as f32; 164 | for i in 1..self.sah_samples { 165 | let candidate = bounds_min + scale * i as f32; 166 | let cost = self.calculate_surface_area_heuristic(node, axis, candidate); 167 | if cost < best_cost { 168 | best_axis = axis; 169 | best_split = candidate; 170 | best_cost = cost; 171 | } 172 | } 173 | } 174 | 175 | (best_axis, best_split, best_cost) 176 | } 177 | 178 | fn find_best_split_segmented(&self, node: &BVHNode) -> (usize, f32, f32) { 179 | // calculate the best split (SAH) 180 | let mut best_axis = 0; 181 | let mut best_split = 0.0; 182 | let mut best_cost = f32::INFINITY; 183 | for axis in 0..3 { 184 | // find bounds of centroids in this node 185 | let mut bounds_min = f32::INFINITY; 186 | let mut bounds_max = f32::NEG_INFINITY; 187 | for i in 0..node.triangle_count() { 188 | let triangle_index = (node.first_triangle_index() + i) as usize; 189 | let centroid = self.centroids[triangle_index]; 190 | bounds_min = bounds_min.min(centroid[axis]); 191 | bounds_max = bounds_max.max(centroid[axis]); 192 | } 193 | 194 | // skip if completely flat 195 | if bounds_min == bounds_max { 196 | continue; 197 | } 198 | 199 | // build segments and fill them with triangles 200 | #[derive(Default, Clone, Copy)] 201 | struct Segment { 202 | aabb: BVHNode, 203 | triangle_count: u32, 204 | } 205 | let mut segments = vec![Segment::default(); self.sah_samples]; 206 | let scale = self.sah_samples as f32 / (bounds_max - bounds_min); 207 | for i in 0..node.triangle_count() { 208 | let triangle_index = (node.first_triangle_index() + i) as usize; 209 | let index = self.indices[triangle_index]; 210 | let v0 = self.vertices[index.x as usize].xyz(); 211 | let v1 = self.vertices[index.y as usize].xyz(); 212 | let v2 = self.vertices[index.z as usize].xyz(); 213 | let segment_index = (((self.centroids[triangle_index][axis] - bounds_min) * scale) as usize).min(self.sah_samples - 1); 214 | segments[segment_index].aabb.encapsulate(&v0); 215 | segments[segment_index].aabb.encapsulate(&v1); 216 | segments[segment_index].aabb.encapsulate(&v2); 217 | segments[segment_index].triangle_count += 1; 218 | } 219 | 220 | // gather what we need for SAH from each plane between the segments 221 | // this is area, tri_count of left and right segment for each possible split. 222 | // the sum and box are used to calculate these efficiently with a sweep. 223 | let mut left_box = BVHNode::default(); 224 | let mut left_sum = 0; 225 | let mut left_areas = vec![0.0; self.sah_samples - 1]; 226 | let mut left_tri_counts = vec![0; self.sah_samples - 1]; 227 | let mut right_box = BVHNode::default(); 228 | let mut right_sum = 0; 229 | let mut right_areas = vec![0.0; self.sah_samples - 1]; 230 | let mut right_tri_counts = vec![0; self.sah_samples - 1]; 231 | for i in 0..self.sah_samples - 1 { 232 | left_sum += segments[i].triangle_count; 233 | left_tri_counts[i] = left_sum; 234 | left_box.encapsulate_node(&segments[i].aabb); 235 | left_areas[i] = left_box.area(); 236 | right_sum += segments[self.sah_samples - 1 - i].triangle_count; 237 | right_tri_counts[self.sah_samples - 2 - i] = right_sum; 238 | right_box.encapsulate_node(&segments[self.sah_samples - 1 - i].aabb); 239 | right_areas[self.sah_samples - 2 - i] = right_box.area(); 240 | } 241 | 242 | // evaluate SAH for each split, pick the best 243 | let scale = (bounds_max - bounds_min) / self.sah_samples as f32; 244 | for i in 0..self.sah_samples-1 { 245 | let cost = left_tri_counts[i] as f32 * left_areas[i] + right_tri_counts[i] as f32 * right_areas[i]; 246 | if cost < best_cost { 247 | best_axis = axis; 248 | best_split = bounds_min + scale * (i + 1) as f32; 249 | best_cost = cost; 250 | } 251 | } 252 | } 253 | 254 | (best_axis, best_split, best_cost) 255 | } 256 | 257 | pub fn build(&mut self) -> BVH { 258 | let mut node_count = 1; 259 | 260 | let root = &mut self.nodes[0]; 261 | root.set_first_triangle_index(0); 262 | root.set_triangle_count(self.indices.len() as u32); 263 | self.update_node_aabb(0); 264 | 265 | let mut stack = vec![0]; 266 | while !stack.is_empty() { 267 | // get the next root node 268 | let node_idx = stack.pop().expect("BVH build stack is empty."); 269 | let node = &self.nodes[node_idx]; 270 | 271 | // calculate the best split (SAH) 272 | let (best_axis, best_split, best_cost) = self.find_best_split_segmented(node); 273 | 274 | // if the parent node is cheaper, don't split 275 | let parent_cost = node.area() * node.triangle_count() as f32; 276 | if parent_cost <= best_cost { 277 | continue; 278 | } 279 | 280 | // partition the triangles 281 | let mut a = node.first_triangle_index(); 282 | let mut b = a + node.triangle_count() - 1; 283 | while a <= b { 284 | let centroid = self.centroids[a as usize][best_axis]; 285 | if centroid < best_split { 286 | a += 1; 287 | } else { 288 | self.indices.swap(a as usize, b as usize); 289 | self.centroids.swap(a as usize, b as usize); 290 | b -= 1; 291 | } 292 | } 293 | 294 | // if either side is empty (no split), then we're done 295 | let left_count = a - node.first_triangle_index(); 296 | if left_count == 0 || left_count == node.triangle_count() { 297 | continue; 298 | } 299 | 300 | // create children 301 | let prev_triangle_idx = node.first_triangle_index(); 302 | let prev_triangle_count = node.triangle_count(); 303 | let left_idx = node_count; 304 | let right_idx = node_count + 1; 305 | node_count += 2; 306 | self.nodes[node_idx].set_left_node_index(left_idx as u32); 307 | self.nodes[node_idx].set_triangle_count(0); 308 | self.nodes[left_idx].set_first_triangle_index(prev_triangle_idx); 309 | self.nodes[left_idx].set_triangle_count(left_count); 310 | self.nodes[right_idx].set_first_triangle_index(a); 311 | self.nodes[right_idx].set_triangle_count(prev_triangle_count - left_count); 312 | self.update_node_aabb(left_idx); 313 | self.update_node_aabb(right_idx); 314 | 315 | // push children onto the stack 316 | stack.push(right_idx); 317 | stack.push(left_idx); 318 | } 319 | 320 | self.nodes.truncate(node_count); 321 | BVH { 322 | nodes: self.nodes.clone(), 323 | } 324 | } 325 | } -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![feature(int_roundings)] 2 | 3 | pub mod app; 4 | pub mod trace; 5 | pub mod bvh; 6 | pub mod atlas; 7 | pub mod asset; 8 | pub mod light_pick; -------------------------------------------------------------------------------- /src/light_pick.rs: -------------------------------------------------------------------------------- 1 | use glam::{UVec4, Vec3, Vec4, Vec4Swizzles}; 2 | use rand::Rng; 3 | use shared_structs::{LightPickEntry, MaterialData}; 4 | 5 | fn triangle_area(a: Vec3, b: Vec3, c: Vec3) -> f32 { 6 | let side_a = b - a; 7 | let side_b = c - b; 8 | let side_c = a - c; 9 | let s = (side_a.length() + side_b.length() + side_c.length()) / 2.0; 10 | (s * (s - side_a.length()) * (s - side_b.length()) * (s - side_c.length())).sqrt() 11 | } 12 | 13 | pub fn compute_emissive_mask(indices: &[UVec4], material_datas: &[MaterialData]) -> Vec { 14 | let mut emissive_mask = vec![false; indices.len()]; 15 | for i in 0..indices.len() { 16 | if material_datas[indices[i].w as usize].emissive.xyz() != Vec3::ZERO { 17 | emissive_mask[i] = true; 18 | } 19 | } 20 | emissive_mask 21 | } 22 | 23 | // NOTE: `mask` indicates which triangles are valid for picking 24 | pub fn build_light_pick_table( 25 | vertices: &[Vec4], 26 | indices: &[UVec4], 27 | mask: &[bool], 28 | material_datas: &[MaterialData], 29 | ) -> Vec { 30 | // Calculate areas and probabilities of picking each triangle 31 | let mut triangle_areas = vec![0.0; indices.len()]; 32 | let mut triangle_powers = vec![0.0; indices.len()]; 33 | let mut total_power = 0.0; 34 | let mut total_tris = 0; 35 | for i in 0..indices.len() { 36 | if !mask[i] { 37 | continue; 38 | } 39 | total_tris += 1; 40 | 41 | let triangle = indices[i]; 42 | let a = vertices[triangle.x as usize].xyz(); 43 | let b = vertices[triangle.y as usize].xyz(); 44 | let c = vertices[triangle.z as usize].xyz(); 45 | 46 | let triangle_area = triangle_area(a, b, c); 47 | triangle_areas[i] = triangle_area; 48 | 49 | let triangle_power = material_datas[triangle.w as usize].emissive.xyz().dot(Vec3::ONE) * triangle_area; 50 | triangle_powers[i] = triangle_power; 51 | total_power += triangle_power; 52 | } 53 | if total_tris == 0 { 54 | // If there are 0 entries, put in a stupid sentinel value 55 | return vec![LightPickEntry { 56 | ratio: -1.0, 57 | ..Default::default() 58 | }]; 59 | } 60 | let mut triangle_probabilities = vec![0.0; indices.len()]; 61 | for i in 0..indices.len() { 62 | triangle_probabilities[i] = triangle_powers[i] / total_power; 63 | } 64 | let average_probability = triangle_probabilities.iter().sum::() / total_tris as f32; 65 | // Build histogram bins. Each entry contains 2 discrete outcomes. 66 | #[derive(Debug)] 67 | struct TriangleBin { 68 | index_a: usize, 69 | probability_a: f32, 70 | index_b: usize, 71 | probability_b: f32, 72 | } 73 | let mut bins = triangle_probabilities 74 | .iter() 75 | .enumerate() 76 | .map(|x| TriangleBin { 77 | index_a: x.0, 78 | probability_a: *x.1, 79 | index_b: 0, 80 | probability_b: 0.0, 81 | }) 82 | .filter(|x| x.probability_a != 0.0) 83 | .collect::>(); 84 | bins.sort_by(|a, b| { 85 | a.probability_a 86 | .partial_cmp(&b.probability_a) 87 | .unwrap_or(std::cmp::Ordering::Equal) 88 | }); 89 | 90 | // Robin hood - take from the most probable and give to the least probable 91 | let num_bins = bins.len(); 92 | let mut most_probable = num_bins - 1; 93 | for i in 0..num_bins { 94 | let needed = average_probability - bins[i].probability_a; 95 | if needed <= 0.0 { 96 | break; 97 | } 98 | 99 | bins[i].index_b = bins[most_probable].index_a; 100 | bins[i].probability_b = needed; 101 | bins[most_probable].probability_a -= needed; 102 | if bins[most_probable].probability_a <= average_probability { 103 | most_probable -= 1; 104 | } 105 | } 106 | 107 | // Build the table 108 | let table = bins 109 | .iter() 110 | .map(|x| LightPickEntry { 111 | triangle_index_a: x.index_a as u32, 112 | triangle_index_b: x.index_b as u32, 113 | triangle_pick_pdf_a: triangle_probabilities[x.index_a], 114 | triangle_area_a: triangle_areas[x.index_a], 115 | triangle_area_b: triangle_areas[x.index_b], 116 | triangle_pick_pdf_b: triangle_probabilities[x.index_b], 117 | ratio: x.probability_a / (x.probability_a + x.probability_b), 118 | }) 119 | .collect::>(); 120 | 121 | table 122 | } 123 | 124 | // Just for reference 125 | #[allow(dead_code)] 126 | fn pick_light(table: &[LightPickEntry]) -> u32 { 127 | let rng = rand::thread_rng().gen_range(0..table.len()); 128 | let entry = table[rng]; 129 | let rng = rand::thread_rng().gen_range(0.0..1.0); 130 | if rng < entry.ratio { 131 | entry.triangle_index_a 132 | } else { 133 | entry.triangle_index_b 134 | } 135 | } 136 | 137 | #[allow(dead_code)] 138 | fn build_light_cdf_table(vertices: &[Vec4], indices: &[UVec4], mask: &[bool]) -> Vec { 139 | // Calculate areas and probabilities of picking each triangle 140 | let mut triangle_areas = vec![0.0; indices.len()]; 141 | let mut total_area = 0.0; 142 | for i in 0..indices.len() { 143 | if !mask[i] { 144 | continue; 145 | } 146 | let triangle = indices[i]; 147 | let a = vertices[triangle.x as usize].xyz(); 148 | let b = vertices[triangle.y as usize].xyz(); 149 | let c = vertices[triangle.z as usize].xyz(); 150 | let triangle_area = triangle_area(a, b, c); 151 | total_area += triangle_area; 152 | triangle_areas[i] = triangle_area; 153 | } 154 | let mut triangle_probabilities = vec![0.0; indices.len()]; 155 | for i in 0..indices.len() { 156 | triangle_probabilities[i] = triangle_areas[i] / total_area; 157 | } 158 | for i in 1..indices.len() { 159 | triangle_probabilities[i] += triangle_probabilities[i - 1]; 160 | } 161 | triangle_probabilities 162 | } 163 | 164 | /* 165 | pub fn compare_approaches(vertices: &[Vec4], indices: &[UVec4], mask: &[bool]) { 166 | let table = build_light_pick_table(vertices, indices, mask); 167 | let cdf_table = build_light_cdf_table(vertices, indices, mask); 168 | let root = BitMapBackend::new("bla.png", (640, 480)).into_drawing_area(); 169 | 170 | root.fill(&WHITE).unwrap(); 171 | 172 | let mut chart = ChartBuilder::on(&root) 173 | .x_label_area_size(35) 174 | .y_label_area_size(40) 175 | .margin(5) 176 | .caption("Histogram sampling", ("sans-serif", 50.0)) 177 | .build_cartesian_2d((0usize..cdf_table.len()).into_segmented(), 0.0f32..0.02f32) 178 | .unwrap(); 179 | 180 | chart 181 | .configure_mesh() 182 | .disable_x_mesh() 183 | .bold_line_style(&WHITE.mix(0.3)) 184 | .y_desc("Count") 185 | .x_desc("Bucket") 186 | .axis_desc_style(("sans-serif", 15)) 187 | .draw() 188 | .unwrap(); 189 | 190 | let samples = 100000; 191 | let mut data = vec![0; indices.len()]; 192 | for i in 0..samples { 193 | data[pick_light(&table) as usize] += 1; 194 | } 195 | /*for i in 0..samples { 196 | let rng = rand::thread_rng().gen_range(0.0..1.0); 197 | let mut j = 0; 198 | while j <= cdf_table.len() { 199 | if rng < cdf_table[j] { 200 | break; 201 | } 202 | j += 1; 203 | } 204 | data[j] += 1; 205 | }*/ 206 | let data = data 207 | .iter() 208 | .map(|x| *x as f32 / samples as f32) 209 | .collect::>(); 210 | 211 | chart 212 | .draw_series( 213 | Histogram::vertical(&chart) 214 | .style(RED.mix(0.5).filled()) 215 | .data(data.into_iter().enumerate().collect::>()), 216 | ) 217 | .unwrap(); 218 | 219 | // To avoid the IO failure being ignored silently, we manually call the present function 220 | root.present().expect("Unable to write result to file, please make sure 'plotters-doc-data' dir exists under current dir"); 221 | 222 | let root = BitMapBackend::new("bla2.png", (640, 480)).into_drawing_area(); 223 | 224 | root.fill(&WHITE).unwrap(); 225 | 226 | let mut chart = ChartBuilder::on(&root) 227 | .x_label_area_size(35) 228 | .y_label_area_size(40) 229 | .margin(5) 230 | .caption("CDF sampling", ("sans-serif", 50.0)) 231 | .build_cartesian_2d((0usize..cdf_table.len()).into_segmented(), 0.0f32..0.02f32) 232 | .unwrap(); 233 | 234 | chart 235 | .configure_mesh() 236 | .disable_x_mesh() 237 | .bold_line_style(&WHITE.mix(0.3)) 238 | .y_desc("Count") 239 | .x_desc("Bucket") 240 | .axis_desc_style(("sans-serif", 15)) 241 | .draw() 242 | .unwrap(); 243 | 244 | let mut data = vec![0; indices.len()]; 245 | /*for i in 0..samples { 246 | let rng = rand::thread_rng().gen_range(0..table.len()); 247 | let entry = table[rng]; 248 | let rng = rand::thread_rng().gen_range(0.0..1.0); 249 | let choice = if rng > entry.2 { entry.0 } else { entry.1 }; 250 | data[choice as usize] += 1; 251 | }*/ 252 | for i in 0..samples { 253 | let rng = rand::thread_rng().gen_range(0.0..1.0); 254 | let mut j = 0; 255 | while j < cdf_table.len() - 1 && rng > cdf_table[j] { 256 | j += 1; 257 | } 258 | data[j] += 1; 259 | } 260 | let data = data 261 | .iter() 262 | .map(|x| *x as f32 / samples as f32) 263 | .collect::>(); 264 | 265 | chart 266 | .draw_series( 267 | Histogram::vertical(&chart) 268 | .style(RED.mix(0.5).filled()) 269 | .data(data.into_iter().enumerate().collect::>()), 270 | ) 271 | .unwrap(); 272 | 273 | // To avoid the IO failure being ignored silently, we manually call the present function 274 | root.present().expect("Unable to write result to file, please make sure 'plotters-doc-data' dir exists under current dir"); 275 | } 276 | */ 277 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use std::time::Instant; 2 | use egui::FontDefinitions; 3 | use egui_winit_platform::{Platform, PlatformDescriptor}; 4 | use winit::event::Event::{DeviceEvent, WindowEvent, MainEventsCleared, RedrawRequested}; 5 | use rustic::app::App; 6 | use winit::event_loop::ControlFlow; 7 | 8 | fn main() { 9 | let width = 1280; 10 | let height = 720; 11 | 12 | let event_loop = winit::event_loop::EventLoopBuilder::<()>::with_user_event().build(); 13 | let window = winit::window::WindowBuilder::new() 14 | .with_decorations(true) 15 | .with_resizable(true) 16 | .with_transparent(false) 17 | .with_title("rust-path-tracer") 18 | .with_inner_size(winit::dpi::PhysicalSize { 19 | width, 20 | height, 21 | }) 22 | .build(&event_loop) 23 | .expect("Building window failed"); 24 | 25 | let mut platform = Platform::new(PlatformDescriptor { 26 | physical_width: width, 27 | physical_height: height, 28 | scale_factor: window.scale_factor(), 29 | font_definitions: FontDefinitions::default(), 30 | style: Default::default(), 31 | }); 32 | 33 | let mut app = App::new(window); 34 | 35 | let start_time = Instant::now(); 36 | event_loop.run(move |event, _, control_flow| { 37 | // Pass the winit events to the platform integration. 38 | platform.handle_event(&event); 39 | 40 | match event { 41 | RedrawRequested(..) => { 42 | app.redraw(&mut platform, &start_time); 43 | } 44 | DeviceEvent { 45 | event: winit::event::DeviceEvent::MouseMotion { delta }, 46 | .. 47 | } => { 48 | app.handle_mouse_motion(delta); 49 | } 50 | MainEventsCleared => { 51 | app.window().request_redraw(); 52 | } 53 | WindowEvent { event, .. } => match event { 54 | winit::event::WindowEvent::Resized(size) => { 55 | app.handle_resize(size); 56 | } 57 | winit::event::WindowEvent::CloseRequested => { 58 | *control_flow = ControlFlow::Exit; 59 | } 60 | winit::event::WindowEvent::DroppedFile(path) => { 61 | app.handle_file_dropped(&path); 62 | } 63 | _ => {} 64 | }, 65 | _ => (), 66 | } 67 | }); 68 | } -------------------------------------------------------------------------------- /src/resources/bluenoise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pema99/rust-path-tracer/2b31ad0384e62a84520a23f8596e4f0f9c6c173d/src/resources/bluenoise.png -------------------------------------------------------------------------------- /src/resources/render.wgsl: -------------------------------------------------------------------------------- 1 | struct VertexOut { 2 | @location(0) uv: vec2, 3 | @builtin(position) position: vec4, 4 | }; 5 | 6 | struct Uniforms { 7 | width: u32, 8 | height: u32, 9 | tonemapping: u32, 10 | }; 11 | 12 | @group(0) @binding(0) 13 | var uniforms: Uniforms; 14 | 15 | @group(0) @binding(1) 16 | var render_buffer: array; 17 | 18 | var v_positions: array, 6> = array, 6>( 19 | vec2(-1.0, -1.0), 20 | vec2(-1.0, 1.0), 21 | vec2(1.0, 1.0), 22 | vec2(1.0, 1.0), 23 | vec2(1.0, -1.0), 24 | vec2(-1.0, -1.0), 25 | ); 26 | 27 | @vertex 28 | fn vs_main(@builtin(vertex_index) v_idx: u32) -> VertexOut { 29 | var out: VertexOut; 30 | out.position = vec4(v_positions[v_idx], 0.0, 1.0); 31 | out.uv = v_positions[v_idx] * 0.5 + 0.5; 32 | return out; 33 | } 34 | 35 | // Narkowicz ACES https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/ 36 | fn aces_narkowicz(x: vec3) -> vec3 { 37 | var a = 2.51; 38 | var b = 0.03; 39 | var c = 2.43; 40 | var d = 0.59; 41 | var e = 0.14; 42 | return clamp((x * (a * x + b)) / (x * (c * x + d) + e), vec3(0.0), vec3(1.0)); 43 | } 44 | 45 | // Hill ACES https://github.com/TheRealMJP/BakingLab/blob/master/BakingLab/ACES.hlsl 46 | fn aces_hill(x: vec3) -> vec3 { 47 | let acesInput = transpose(mat3x3( 48 | vec3(0.59719, 0.35458, 0.04823), 49 | vec3(0.07600, 0.90834, 0.01566), 50 | vec3(0.02840, 0.13383, 0.83777) 51 | )); 52 | let acesOutput = transpose(mat3x3( 53 | vec3(1.60475, -0.53108, -0.07367), 54 | vec3(-0.10208, 1.10813, -0.00605), 55 | vec3(-0.00327, -0.07276, 1.07602) 56 | )); 57 | 58 | var color = acesInput * x; 59 | 60 | let a = color * (color + 0.0245786) - 0.000090537; 61 | let b = color * (0.983729 * color + 0.4329510) + 0.238081; 62 | color = a / b; 63 | 64 | color = acesOutput * color; 65 | 66 | color = saturate(color); 67 | 68 | return color; 69 | } 70 | 71 | fn reinhard(x: vec3) -> vec3 { 72 | return x / (x + 1.0); 73 | } 74 | 75 | fn neutralCurve(x: vec3, a: f32, b: f32, c: f32, d: f32, e: f32, f: f32) -> vec3 { 76 | return ((x * (a * x + c * b) + d * e) / (x * (a * x + b) + d * f)) - e / f; 77 | } 78 | 79 | fn neutralTonemap(x: vec3) -> vec3 { 80 | // Tonemap 81 | var a: f32 = 0.2; 82 | var b: f32 = 0.29; 83 | var c: f32 = 0.24; 84 | var d: f32 = 0.272; 85 | var e: f32 = 0.02; 86 | var f: f32 = 0.3; 87 | var whiteLevel: f32 = 5.3; 88 | var whiteClip: f32 = 1.0; 89 | 90 | var whiteScale: vec3 = vec3(1.0) / neutralCurve(vec3(whiteLevel), a, b, c, d, e, f); 91 | var x = neutralCurve(x * whiteScale, a, b, c, d, e, f); 92 | x *= whiteScale; 93 | 94 | // Post-curve white point adjustment 95 | x /= vec3(whiteClip); 96 | 97 | return x; 98 | } 99 | 100 | fn unchartedPartial(x: vec3) -> vec3 { 101 | var A = 0.15f; 102 | var B = 0.50f; 103 | var C = 0.10f; 104 | var D = 0.20f; 105 | var E = 0.02f; 106 | var F = 0.30f; 107 | return ((x*(A*x+C*B)+D*E)/(x*(A*x+B)+D*F))-E/F; 108 | } 109 | 110 | fn uncharted(v: vec3) -> vec3 { 111 | var exposure_bias = 2.0; 112 | var curr = unchartedPartial(v * exposure_bias); 113 | 114 | var W = vec3(11.2); 115 | var white_scale = vec3(1.0) / unchartedPartial(W); 116 | return curr * white_scale; 117 | } 118 | 119 | @fragment 120 | fn fs_main(in: VertexOut) -> @location(0) vec4 { 121 | var uv = in.uv; 122 | uv.y = 1.0 - uv.y; 123 | var puv: vec2 = vec2(uv * vec2(f32(uniforms.width), f32(uniforms.height))); 124 | var idx: u32 = (puv.y*u32(uniforms.width)+puv.x); 125 | var color: vec4 = vec4(0.0, 0.0, 0.0, 0.0); 126 | color.r = render_buffer[idx*3u+0u]; 127 | color.g = render_buffer[idx*3u+1u]; 128 | color.b = render_buffer[idx*3u+2u]; 129 | 130 | var tonemapped = color.rgb; 131 | switch (uniforms.tonemapping) { 132 | case 1u: { // Reinhard 133 | tonemapped = reinhard(tonemapped); 134 | } 135 | case 2u: { // ACES (Narkowicz) 136 | tonemapped = aces_narkowicz(tonemapped * 0.6); 137 | } 138 | case 3u: { // ACES (Narkowicz, overexposed) 139 | tonemapped = aces_narkowicz(tonemapped); 140 | } 141 | case 4u: { // ACES (Hill) 142 | tonemapped = aces_hill(tonemapped); 143 | } 144 | case 5u: { // Neutral 145 | tonemapped = neutralTonemap(tonemapped); 146 | } 147 | case 6u: { // Uncharted 148 | tonemapped = uncharted(tonemapped); 149 | } 150 | default: { 151 | // No tonemapping 152 | } 153 | } 154 | 155 | return vec4(tonemapped, 1.0); 156 | } -------------------------------------------------------------------------------- /src/trace.rs: -------------------------------------------------------------------------------- 1 | const KERNEL: &[u8] = include_bytes!(env!("kernels.spv")); 2 | const BLUE_BYTES: &[u8] = include_bytes!("resources/bluenoise.png"); 3 | lazy_static::lazy_static! { 4 | pub static ref FW: gpgpu::Framework = make_framework(); 5 | pub static ref BLUE_TEXTURE: RgbaImage = Reader::new(Cursor::new(BLUE_BYTES)).with_guessed_format().unwrap().decode().unwrap().into_rgba8(); 6 | } 7 | 8 | use glam::{UVec2, Vec4, UVec3}; 9 | use gpgpu::{ 10 | BufOps, DescriptorSet, GpuBuffer, GpuBufferUsage, GpuUniformBuffer, Kernel, Program, Shader, Sampler, SamplerWrapMode, SamplerFilterMode, GpuConstImage, primitives::pixels::Rgba32Float 11 | }; 12 | use image::{RgbaImage, io::Reader, GenericImageView}; 13 | use parking_lot::RwLock; 14 | use pollster::FutureExt; 15 | use shared_structs::CpuImage; 16 | pub use shared_structs::TracingConfig; 17 | use std::{sync::{ 18 | atomic::{Ordering, AtomicBool, AtomicU32}, 19 | Arc, 20 | }, io::Cursor}; 21 | use rayon::prelude::*; 22 | 23 | use crate::{asset::{World, GpuWorld, dynamic_image_to_cpu_buffer, load_dynamic_image, dynamic_image_to_gpu_image, fallback_gpu_image, fallback_cpu_buffer}}; 24 | 25 | fn make_framework() -> gpgpu::Framework { 26 | let backend = wgpu::util::backend_bits_from_env().unwrap_or(wgpu::Backends::PRIMARY); 27 | let power_preference = wgpu::util::power_preference_from_env() 28 | .unwrap_or(wgpu::PowerPreference::HighPerformance); 29 | let instance = wgpu::Instance::new(backend); 30 | let adapter = instance 31 | .request_adapter(&wgpu::RequestAdapterOptions { 32 | power_preference, 33 | ..Default::default() 34 | }) 35 | .block_on() 36 | .expect("Failed at adapter creation."); 37 | gpgpu::Framework::new(adapter, std::time::Duration::from_millis(1)).block_on() 38 | } 39 | 40 | pub struct TracingState { 41 | pub framebuffer: RwLock>, 42 | pub running: AtomicBool, 43 | pub samples: AtomicU32, 44 | pub denoise: AtomicBool, 45 | pub sync_rate: AtomicU32, 46 | pub use_blue_noise: AtomicBool, 47 | pub interacting: AtomicBool, 48 | pub dirty: AtomicBool, 49 | pub config: RwLock, 50 | } 51 | 52 | impl TracingState { 53 | pub fn make_view_dependent_state( 54 | width: u32, 55 | height: u32, 56 | config: Option, 57 | ) -> (TracingConfig, Vec) { 58 | let config = TracingConfig { 59 | width, 60 | height, 61 | ..config.unwrap_or_default() 62 | }; 63 | let data_size = width as usize * height as usize * 3; 64 | let framebuffer = vec![0.0; data_size]; 65 | (config, framebuffer) 66 | } 67 | 68 | pub fn new (width: u32, height: u32) -> Self { 69 | let (config, framebuffer) = Self::make_view_dependent_state(width, height, None); 70 | let config = RwLock::new(config); 71 | let framebuffer = RwLock::new(framebuffer); 72 | let running = AtomicBool::new(false); 73 | let samples = AtomicU32::new(0); 74 | let denoise = AtomicBool::new(false); 75 | let sync_rate = AtomicU32::new(32); 76 | let use_blue_noise = AtomicBool::new(true); 77 | let interacting = AtomicBool::new(false); 78 | let dirty = AtomicBool::new(false); 79 | 80 | Self { 81 | framebuffer, 82 | running, 83 | samples, 84 | denoise, 85 | sync_rate, 86 | use_blue_noise, 87 | interacting, 88 | dirty, 89 | config, 90 | } 91 | } 92 | } 93 | 94 | struct PathTracingKernel<'fw>(Kernel<'fw>); 95 | 96 | impl<'fw> PathTracingKernel<'fw> { 97 | fn new( 98 | config_buffer: &GpuUniformBuffer<'fw, TracingConfig>, 99 | rng_buffer: &GpuBuffer<'fw, UVec2>, 100 | output_buffer: &GpuBuffer<'fw, Vec4>, 101 | world: &GpuWorld<'fw>, 102 | skybox: &GpuConstImage<'fw, Rgba32Float>, 103 | ) -> Self { 104 | let shader = Shader::from_spirv_bytes(&FW, KERNEL, Some("compute")); 105 | let sampler = Sampler::new(&FW, SamplerWrapMode::ClampToEdge, SamplerFilterMode::Linear); 106 | let bindings = DescriptorSet::default() 107 | .bind_uniform_buffer(config_buffer) 108 | .bind_buffer(rng_buffer, GpuBufferUsage::ReadWrite) 109 | .bind_buffer(output_buffer, GpuBufferUsage::ReadWrite) 110 | .bind_buffer(&world.per_vertex_buffer, GpuBufferUsage::ReadOnly) 111 | .bind_buffer(&world.index_buffer, GpuBufferUsage::ReadOnly) 112 | .bind_buffer(&world.bvh.nodes_buffer, GpuBufferUsage::ReadOnly) 113 | .bind_buffer(&world.material_data_buffer, GpuBufferUsage::ReadOnly) 114 | .bind_buffer(&world.light_pick_buffer, GpuBufferUsage::ReadOnly) 115 | .bind_sampler(&sampler) 116 | .bind_const_image(&world.atlas) 117 | .bind_const_image(&skybox); 118 | let program = Program::new(&shader, "trace_kernel").add_descriptor_set(bindings); 119 | let kernel = Kernel::new(&FW, program); 120 | 121 | Self(kernel) 122 | } 123 | } 124 | 125 | #[cfg(feature = "oidn")] 126 | fn denoise_image(width: usize, height: usize, input: &mut [f32]) { 127 | let device = oidn::Device::new(); 128 | oidn::RayTracing::new(&device) 129 | .hdr(true) 130 | .srgb(false) 131 | .image_dimensions(width, height) 132 | .filter_in_place(input) 133 | .expect("Filter config error!"); 134 | } 135 | 136 | pub fn trace_gpu( 137 | scene_path: &str, 138 | skybox_path: Option<&str>, 139 | state: Arc, 140 | ) { 141 | let Some(world) = World::from_path(scene_path).map(|w| w.into_gpu()) else { 142 | return; 143 | }; 144 | let skybox = skybox_path.and_then(load_dynamic_image).map(dynamic_image_to_gpu_image).unwrap_or_else(|| fallback_gpu_image()); 145 | 146 | let screen_width = state.config.read().width; 147 | let screen_height = state.config.read().height; 148 | let pixel_count = (screen_width * screen_height) as usize; 149 | let mut rng = rand::thread_rng(); 150 | let mut rng_data_blue: Vec = vec![UVec2::ZERO; pixel_count]; 151 | let mut rng_data_uniform: Vec = vec![UVec2::ZERO; pixel_count]; 152 | for y in 0..screen_height { 153 | for x in 0..screen_width { 154 | let pixel_index = (y * screen_width + x) as usize; 155 | let pixel = BLUE_TEXTURE.get_pixel(x % BLUE_TEXTURE.width(), y % BLUE_TEXTURE.height())[0] as f32 / 255.0; 156 | rng_data_blue[pixel_index].x = 0; 157 | rng_data_blue[pixel_index].y = (pixel * 4294967295.0) as u32; 158 | rng_data_uniform[pixel_index].x = rand::Rng::gen(&mut rng); 159 | } 160 | } 161 | 162 | // Restore previous state, if there is any 163 | let samples_init = state.samples.load(Ordering::Relaxed) as f32; 164 | let output_buffer_init = state.framebuffer.read().chunks(3).map(|c| Vec4::new(c[0], c[1], c[2], 1.0) * samples_init).collect::>(); 165 | 166 | // Setup tracing state 167 | let pixel_count = (screen_width * screen_height) as u64; 168 | let config_buffer = GpuUniformBuffer::from_slice(&FW, &[*state.config.read()]); 169 | let rng_buffer = GpuBuffer::from_slice(&FW, if state.use_blue_noise.load(Ordering::Relaxed) { &rng_data_blue } else { &rng_data_uniform }); 170 | let output_buffer = GpuBuffer::from_slice(&FW, &output_buffer_init); 171 | 172 | let mut image_buffer_raw: Vec = vec![Vec4::ZERO; pixel_count as usize]; 173 | let mut image_buffer: Vec = vec![0.0; pixel_count as usize * 3]; 174 | 175 | let rt = PathTracingKernel::new(&config_buffer, &rng_buffer, &output_buffer, &world, &skybox); 176 | 177 | while state.running.load(Ordering::Relaxed) { 178 | // Dispatch 179 | let sync_rate = state.sync_rate.load(Ordering::Relaxed); 180 | let mut flush = false; 181 | let mut finished_samples = 0; 182 | for _ in 0..sync_rate { 183 | rt.0.enqueue(screen_width.div_ceil(8), screen_height.div_ceil(8), 1); 184 | FW.poll_blocking(); 185 | finished_samples += 1; 186 | 187 | flush |= state.interacting.load(Ordering::Relaxed) || state.dirty.load(Ordering::Relaxed); 188 | if flush { 189 | break; 190 | } 191 | if !state.running.load(Ordering::Relaxed) { 192 | return; 193 | } 194 | } 195 | state.samples.fetch_add(finished_samples, Ordering::Relaxed); 196 | 197 | // Readback from GPU 198 | let _ = output_buffer.read_blocking(&mut image_buffer_raw); 199 | let sample_count = state.samples.load(Ordering::Relaxed) as f32; 200 | for (i, col) in image_buffer_raw.iter().enumerate() { 201 | image_buffer[i * 3] = col.x / sample_count; 202 | image_buffer[i * 3 + 1] = col.y / sample_count; 203 | image_buffer[i * 3 + 2] = col.z / sample_count; 204 | } 205 | 206 | // Denoise 207 | #[cfg(feature = "oidn")] 208 | if state.denoise.load(Ordering::Relaxed) && !flush { 209 | denoise_image(screen_width as usize, screen_height as usize, &mut image_buffer); 210 | } 211 | 212 | // Push to render thread 213 | state.framebuffer.write().copy_from_slice(image_buffer.as_slice()); 214 | 215 | // Interaction 216 | if flush { 217 | state.dirty.store(false, Ordering::Relaxed); 218 | state.samples.store(0, Ordering::Relaxed); 219 | let _ = config_buffer.write(&[*state.config.read()]); 220 | let _ = output_buffer.write(&vec![Vec4::ZERO; pixel_count as usize]); 221 | let _ = rng_buffer.write(if state.use_blue_noise.load(Ordering::Relaxed) { &rng_data_blue } else { &rng_data_uniform }); 222 | } 223 | } 224 | } 225 | 226 | pub fn trace_cpu( 227 | scene_path: &str, 228 | skybox_path: Option<&str>, 229 | state: Arc, 230 | ) { 231 | let Some(world) = World::from_path(scene_path) else { 232 | return; 233 | }; 234 | let mut skybox_image_buffer = fallback_cpu_buffer(); 235 | let mut skybox_size = (2, 2); 236 | if let Some(skybox_source) = skybox_path.and_then(load_dynamic_image) { 237 | skybox_size = skybox_source.dimensions(); 238 | skybox_image_buffer = dynamic_image_to_cpu_buffer(skybox_source); 239 | } 240 | let skybox_image = CpuImage::new(&skybox_image_buffer, skybox_size.0, skybox_size.1); 241 | 242 | let screen_width = state.config.read().width; 243 | let screen_height = state.config.read().height; 244 | let pixel_count = (screen_width * screen_height) as usize; 245 | let mut rng = rand::thread_rng(); 246 | let mut rng_data_blue: Vec = vec![UVec2::ZERO; pixel_count]; 247 | let mut rng_data_uniform: Vec = vec![UVec2::ZERO; pixel_count]; 248 | for y in 0..screen_height { 249 | for x in 0..screen_width { 250 | let pixel_index = (y * screen_width + x) as usize; 251 | let pixel = BLUE_TEXTURE.get_pixel(x % BLUE_TEXTURE.width(), y % BLUE_TEXTURE.height())[0] as f32 / 255.0; 252 | rng_data_blue[pixel_index].x = 0; 253 | rng_data_blue[pixel_index].y = (pixel * 4294967295.0) as u32; 254 | rng_data_uniform[pixel_index].x = rand::Rng::gen(&mut rng); 255 | } 256 | } 257 | 258 | // Reset previous state, if there is any 259 | let samples_init = state.samples.load(Ordering::Relaxed) as f32; 260 | let mut output_buffer = state.framebuffer.read().chunks(3).map(|c| Vec4::new(c[0], c[1], c[2], 1.0) * samples_init).collect::>(); 261 | 262 | // Setup tracing state 263 | let pixel_count = (screen_width * screen_height) as u64; 264 | let mut rng_buffer = if state.use_blue_noise.load(Ordering::Relaxed) { &mut rng_data_blue } else { &mut rng_data_uniform }; 265 | 266 | let mut image_buffer: Vec = vec![0.0; pixel_count as usize * 3]; 267 | 268 | let atlas_width = world.atlas.width(); 269 | let atlas_height = world.atlas.height(); 270 | let atlas_buffer = dynamic_image_to_cpu_buffer(world.atlas); 271 | let atlas_image = CpuImage::new(&atlas_buffer, atlas_width, atlas_height); 272 | 273 | while state.running.load(Ordering::Relaxed) { 274 | // Dispatch 275 | let flush = state.interacting.load(Ordering::Relaxed) || state.dirty.load(Ordering::Relaxed); 276 | { 277 | let config = state.config.read(); 278 | let outputs = output_buffer.par_chunks_mut(screen_width as usize).enumerate(); 279 | let rngs = rng_buffer.par_chunks_mut(screen_width as usize); 280 | outputs.zip(rngs).for_each(|((y, output), rng)| { 281 | for x in 0..screen_width { 282 | let (radiance, rng_state) = kernels::trace_pixel( 283 | UVec3::new(x, y as u32, 1), 284 | &config, 285 | rng[x as usize], 286 | &world.per_vertex_buffer, 287 | &world.index_buffer, 288 | &world.bvh.nodes, 289 | &world.material_data_buffer, 290 | &world.light_pick_buffer, 291 | &shared_structs::Sampler, 292 | &atlas_image, 293 | &skybox_image, 294 | ); 295 | output[x as usize] += radiance; 296 | rng[x as usize] = rng_state; 297 | } 298 | }); 299 | } 300 | state.samples.fetch_add(1, Ordering::Relaxed); 301 | 302 | // Readback from GPU 303 | let sample_count = state.samples.load(Ordering::Relaxed) as f32; 304 | for (i, col) in output_buffer.iter().enumerate() { 305 | image_buffer[i * 3] = col.x / sample_count; 306 | image_buffer[i * 3 + 1] = col.y / sample_count; 307 | image_buffer[i * 3 + 2] = col.z / sample_count; 308 | } 309 | 310 | // Denoise 311 | #[cfg(feature = "oidn")] 312 | if state.denoise.load(Ordering::Relaxed) && !flush { 313 | denoise_image(screen_width as usize, screen_height as usize, &mut image_buffer); 314 | } 315 | 316 | // Push to render thread 317 | state.framebuffer.write().copy_from_slice(image_buffer.as_slice()); 318 | 319 | // Interaction 320 | if flush { 321 | state.dirty.store(false, Ordering::Relaxed); 322 | state.samples.store(0, Ordering::Relaxed); 323 | output_buffer = vec![Vec4::ZERO; pixel_count as usize]; 324 | rng_buffer = if state.use_blue_noise.load(Ordering::Relaxed) { &mut rng_data_blue } else { &mut rng_data_uniform }; 325 | } 326 | } 327 | } 328 | 329 | // Harness for running syncronous tracing 330 | #[allow(dead_code)] 331 | pub fn setup_trace(width: u32, height: u32, samples: u32) -> Arc { 332 | let state = Arc::new(TracingState::new(width, height)); 333 | state.running.store(true, Ordering::Relaxed); 334 | { 335 | let state = state.clone(); 336 | std::thread::spawn(move || { 337 | while state.samples.load(Ordering::Relaxed) < samples { 338 | std::thread::yield_now(); 339 | } 340 | state.running.store(false, Ordering::Relaxed); 341 | }); 342 | } 343 | state 344 | } -------------------------------------------------------------------------------- /tests/correctness_tests.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use rustic::trace::*; 4 | use shared_structs::NextEventEstimation; 5 | 6 | fn trace(use_cpu: bool, scene: &str, skybox: Option<&str>, state: &Arc) { 7 | if use_cpu { 8 | trace_cpu(scene, skybox, state.clone()); 9 | } else { 10 | trace_gpu(scene, skybox, state.clone()); 11 | } 12 | } 13 | 14 | fn furnace_test(use_cpu: bool, use_mis: bool) { 15 | let size = 128; 16 | let coord = (65, 75); 17 | let albedo = 0.8; 18 | let tolerance = 0.02; 19 | 20 | let state = setup_trace(size as u32, size as u32, 32); 21 | if use_mis { 22 | state.config.write().nee = NextEventEstimation::MultipleImportanceSampling.to_u32(); 23 | } 24 | trace(use_cpu, "scenes/FurnaceTest.glb", None, &state); 25 | let frame = state.framebuffer.read(); 26 | 27 | let pixel_r = frame[(size * 3) * coord.1 + coord.0 * 3 + 0].powf(1.0 / 2.2); 28 | let pixel_g = frame[(size * 3) * coord.1 + coord.0 * 3 + 1].powf(1.0 / 2.2); 29 | let pixel_b = frame[(size * 3) * coord.1 + coord.0 * 3 + 2].powf(1.0 / 2.2); 30 | assert!((pixel_r - albedo).abs() < tolerance); 31 | assert!((pixel_g - albedo).abs() < tolerance); 32 | assert!((pixel_b - albedo).abs() < tolerance); 33 | } 34 | 35 | #[test] 36 | fn furnace_test_cpu() { 37 | furnace_test(true, false); 38 | } 39 | 40 | #[test] 41 | fn furnace_test_gpu() { 42 | furnace_test(false, false); 43 | } 44 | 45 | #[test] 46 | fn furnace_test_cpu_mis() { 47 | furnace_test(true, true); 48 | } 49 | 50 | #[test] 51 | fn furnace_test_gpu_mis() { 52 | furnace_test(false, true); 53 | } --------------------------------------------------------------------------------