├── .appveyor.yml ├── .cargo └── config ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── license ├── models ├── box_walls.obj ├── box_windows.obj ├── indoor.obj ├── plane.obj ├── stanford_bunny.obj ├── stanford_dragon.obj ├── suzanne.obj └── suzannes_in_box.obj ├── readme.md ├── rust-toolchain ├── screenshots ├── accumulative.png └── interactive.png ├── src ├── aabb.rs ├── bench.rs ├── bvh.rs ├── gpu │ ├── blend.glsl │ ├── gbuffer.glsl │ ├── id.glsl │ ├── median.glsl │ └── vertex.glsl ├── main.rs ├── material.rs ├── quaternion.rs ├── random.rs ├── ray.rs ├── renderer.rs ├── scene.rs ├── simd.rs ├── stats.rs ├── trace.rs ├── triangle.rs ├── ui.rs ├── util.rs ├── vector3.rs └── wavefront.rs ├── textures ├── floor.jpg ├── license.md └── wood_light.jpg └── tools ├── approx_acos.py ├── approx_cos.py └── approx_sin.py /.appveyor.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | matrix: 3 | - target: nightly-x86_64-pc-windows-msvc 4 | 5 | install: 6 | # Download the Rust and Cargo installer. 7 | - ps: Start-FileDownload "https://static.rust-lang.org/dist/rust-${env:target}.msi" 8 | 9 | # Install Rust and Cargo and wait for installation to finish by using Write-Output. 10 | - ps: msiexec /package "rust-${env:target}.msi" /quiet /norestart | Write-Output 11 | 12 | # Pick up the new Path variable after the installer modified it. 13 | - ps: $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") 14 | 15 | # Print versions for future reference. 16 | - rustc --version 17 | - cargo --version 18 | 19 | build_script: 20 | - cargo build 21 | 22 | test_script: 23 | - cargo test 24 | -------------------------------------------------------------------------------- /.cargo/config: -------------------------------------------------------------------------------- 1 | [build] 2 | rustflags = ["-C", "target-feature=+avx,+fma"] 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Cargo files 2 | /target 3 | 4 | # Editor files 5 | *.swp 6 | *.swo 7 | 8 | # Written trace output 9 | /trace.json 10 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "convector" 3 | version = "0.2.0" 4 | authors = ["Ruud van Asseldonk "] 5 | 6 | [dependencies] 7 | filebuffer = "0.1" 8 | glium = "0.16" 9 | imagefmt = "4.0" 10 | num_cpus = "1.0" 11 | rand = "0.3" 12 | rayon = "0.6" 13 | scoped_threadpool = "0.1" 14 | thread-id = "3.0" 15 | time = "0.1" 16 | -------------------------------------------------------------------------------- /models/box_walls.obj: -------------------------------------------------------------------------------- 1 | # A box with a few holes in it, like an interior scene. 2 | 3 | v 8.000000 0.000000 -8.000000 4 | v 8.000000 0.000000 8.000000 5 | v -8.000001 0.000000 7.999998 6 | v -7.999997 0.000000 -8.000003 7 | v 8.000004 7.000000 -7.999996 8 | v 7.999995 7.000000 8.000005 9 | v -8.000003 7.000000 7.999997 10 | v -7.999999 7.000000 -8.000000 11 | v 8.000000 0.000000 -4.000000 12 | v 8.000000 0.000000 0.000000 13 | v 8.000000 0.000000 4.000000 14 | v 4.000001 0.000000 -8.000001 15 | v 0.000001 0.000000 -8.000002 16 | v -3.999998 0.000000 -8.000002 17 | v 8.000001 1.750000 -7.999999 18 | v 8.000002 3.500000 -7.999998 19 | v 8.000003 5.250000 -7.999997 20 | v 4.000000 0.000000 8.000000 21 | v -0.000000 0.000000 7.999999 22 | v -4.000000 0.000000 7.999999 23 | v 7.999999 1.750000 8.000001 24 | v 7.999998 3.500000 8.000002 25 | v 7.999996 5.250000 8.000004 26 | v -8.000000 0.000000 3.999998 27 | v -7.999999 0.000000 -0.000002 28 | v -7.999998 0.000000 -4.000002 29 | v -8.000002 1.750000 7.999998 30 | v -8.000002 3.500000 7.999998 31 | v -8.000002 5.250000 7.999997 32 | v -7.999997 1.750000 -8.000002 33 | v -7.999998 3.500000 -8.000001 34 | v -7.999998 5.250000 -8.000000 35 | v 8.000002 7.000000 -3.999996 36 | v 8.000000 7.000000 0.000004 37 | v 7.999997 7.000000 4.000004 38 | v 4.000003 7.000000 -7.999997 39 | v 0.000002 7.000000 -7.999998 40 | v -3.999999 7.000000 -7.999999 41 | v 3.999996 7.000000 8.000003 42 | v -0.000004 7.000000 8.000001 43 | v -4.000003 7.000000 7.999999 44 | v -8.000002 7.000000 3.999998 45 | v -8.000001 7.000000 -0.000002 46 | v -8.000000 7.000000 -4.000001 47 | v 4.000003 5.250000 -7.999998 48 | v 0.000002 5.250000 -7.999999 49 | v -3.999998 5.250000 -7.999999 50 | v 4.000002 3.500000 -7.999999 51 | v 0.000002 3.500000 -8.000000 52 | v -3.999998 3.500000 -8.000000 53 | v 4.000001 1.750000 -8.000000 54 | v 0.000002 1.750000 -8.000001 55 | v -3.999998 1.750000 -8.000002 56 | v -8.000001 1.750000 3.999998 57 | v -8.000000 1.750000 -0.000002 58 | v -7.999998 1.750000 -4.000002 59 | v -8.000001 3.500000 3.999998 60 | v -8.000000 3.500000 -0.000002 61 | v -7.999999 3.500000 -4.000001 62 | v -8.000001 5.250000 3.999998 63 | v -8.000000 5.250000 -0.000002 64 | v -7.999999 5.250000 -4.000001 65 | v 3.999999 1.750000 8.000000 66 | v -0.000001 1.750000 8.000000 67 | v -4.000001 1.750000 7.999999 68 | v 3.999998 3.500000 8.000001 69 | v -0.000002 3.500000 8.000000 70 | v -4.000002 3.500000 7.999999 71 | v 3.999997 5.250000 8.000002 72 | v -0.000003 5.250000 8.000000 73 | v -4.000002 5.250000 7.999999 74 | v 8.000000 1.750000 -3.999999 75 | v 8.000000 1.750000 0.000001 76 | v 7.999999 1.750000 4.000001 77 | v 8.000001 3.500000 -3.999998 78 | v 8.000000 3.500000 0.000002 79 | v 7.999999 3.500000 4.000002 80 | v 8.000001 5.250000 -3.999997 81 | v 8.000000 5.250000 0.000003 82 | v 7.999998 5.250000 4.000003 83 | v 4.000001 7.000000 -3.999997 84 | v 3.999999 7.000000 0.000003 85 | v 3.999998 7.000000 4.000003 86 | v 0.000000 7.000000 -3.999998 87 | v -0.000001 7.000000 0.000001 88 | v -0.000002 7.000000 4.000001 89 | v -4.000000 7.000000 -4.000000 90 | v -4.000001 7.000000 0.000000 91 | v -4.000002 7.000000 4.000000 92 | v 4.000000 0.000000 -4.000000 93 | v 0.000001 0.000000 -4.000001 94 | v -3.999999 0.000000 -4.000002 95 | v 4.000000 0.000000 -0.000001 96 | v 0.000000 0.000000 -0.000001 97 | v -3.999999 0.000000 -0.000002 98 | v 4.000000 0.000000 4.000000 99 | v 0.000000 0.000000 3.999999 100 | v -4.000000 0.000000 3.999999 101 | v 6.000000 0.000000 -8.000000 102 | v 6.000000 0.000000 8.000000 103 | v 6.000003 7.000000 -7.999997 104 | v 5.999995 7.000000 8.000004 105 | v 6.000003 5.250000 -7.999998 106 | v 6.000002 3.500000 -7.999999 107 | v 6.000001 1.750000 -8.000000 108 | v 5.999998 3.500000 8.000002 109 | v 6.000000 0.000000 -4.000000 110 | v 6.000000 0.000000 -0.000000 111 | v 6.000000 0.000000 4.000000 112 | f 98 24 3 20 113 | f 89 41 7 42 114 | f 76 77 80 35 34 79 115 | f 20 3 27 28 68 116 | f 100 18 63 66 106 22 21 2 117 | f 38 8 32 31 50 47 118 | f 26 4 30 31 59 56 119 | f 37 38 47 50 49 120 | f 49 50 53 14 13 52 121 | f 36 37 46 49 48 45 122 | f 30 4 14 53 50 31 123 | f 104 105 15 16 17 103 124 | f 25 26 56 59 58 55 125 | f 28 57 60 42 7 29 126 | f 3 24 54 57 28 27 127 | f 24 25 55 58 57 54 128 | f 58 59 62 44 43 61 129 | f 32 8 44 62 59 31 130 | f 60 61 43 42 131 | f 105 51 12 99 1 15 132 | f 18 19 64 67 66 63 133 | f 66 67 70 40 39 69 134 | f 67 68 71 41 40 70 135 | f 19 20 65 68 67 64 136 | f 29 7 41 71 68 28 137 | f 1 9 72 75 16 138 | f 23 6 35 80 77 22 139 | f 9 10 73 76 75 72 140 | f 10 11 74 77 76 73 141 | f 11 2 21 22 77 74 142 | f 48 49 52 13 12 51 143 | f 16 75 78 33 5 144 | f 75 76 79 34 33 78 145 | f 99 12 90 107 9 1 146 | f 83 39 40 86 147 | f 86 40 41 89 148 | f 35 6 102 39 83 149 | f 36 81 84 37 150 | f 81 82 85 84 151 | f 82 83 86 85 152 | f 37 84 87 38 153 | f 84 85 88 87 154 | f 85 86 89 88 155 | f 38 87 44 8 156 | f 87 88 43 44 157 | f 88 89 42 43 158 | f 14 4 26 92 159 | f 92 26 25 95 160 | f 95 25 24 98 161 | f 107 90 93 108 10 9 162 | f 12 13 91 90 163 | f 13 14 92 91 164 | f 108 93 96 109 11 10 165 | f 90 91 94 93 166 | f 91 92 95 94 167 | f 109 96 18 100 2 11 168 | f 93 94 97 96 169 | f 94 95 98 97 170 | f 96 97 19 18 171 | f 97 98 20 19 172 | f 35 83 82 34 173 | f 101 36 45 103 17 5 174 | f 34 82 81 33 175 | f 33 81 36 101 5 176 | f 106 66 69 39 102 6 23 22 177 | -------------------------------------------------------------------------------- /models/box_windows.obj: -------------------------------------------------------------------------------- 1 | # A box with a few holes in it, like an interior scene. 2 | # The holes are filled with glass windows. 3 | 4 | v 8.000000 0.000000 -8.000000 5 | v 8.000000 0.000000 8.000000 6 | v -8.000001 0.000000 7.999998 7 | v -7.999997 0.000000 -8.000003 8 | v 8.000004 7.000000 -7.999996 9 | v 7.999995 7.000000 8.000005 10 | v -8.000003 7.000000 7.999997 11 | v -7.999999 7.000000 -8.000000 12 | v 8.000000 0.000000 -4.000000 13 | v 8.000000 0.000000 0.000000 14 | v 8.000000 0.000000 4.000000 15 | v 4.000001 0.000000 -8.000001 16 | v 0.000001 0.000000 -8.000002 17 | v -3.999998 0.000000 -8.000002 18 | v 8.000001 1.750000 -7.999999 19 | v 8.000002 3.500000 -7.999998 20 | v 8.000003 5.250000 -7.999997 21 | v 4.000000 0.000000 8.000000 22 | v -0.000000 0.000000 7.999999 23 | v -4.000000 0.000000 7.999999 24 | v 7.999999 1.750000 8.000001 25 | v 7.999998 3.500000 8.000002 26 | v 7.999996 5.250000 8.000004 27 | v -8.000000 0.000000 3.999998 28 | v -7.999999 0.000000 -0.000002 29 | v -7.999998 0.000000 -4.000002 30 | v -8.000002 1.750000 7.999998 31 | v -8.000002 3.500000 7.999998 32 | v -8.000002 5.250000 7.999997 33 | v -7.999997 1.750000 -8.000002 34 | v -7.999998 3.500000 -8.000001 35 | v -7.999998 5.250000 -8.000000 36 | v 8.000002 7.000000 -3.999996 37 | v 8.000000 7.000000 0.000004 38 | v 7.999997 7.000000 4.000004 39 | v 4.000003 7.000000 -7.999997 40 | v 0.000002 7.000000 -7.999998 41 | v -3.999999 7.000000 -7.999999 42 | v 3.999996 7.000000 8.000003 43 | v -0.000004 7.000000 8.000001 44 | v -4.000003 7.000000 7.999999 45 | v -8.000002 7.000000 3.999998 46 | v -8.000001 7.000000 -0.000002 47 | v -8.000000 7.000000 -4.000001 48 | v 4.000003 5.250000 -7.999998 49 | v 0.000002 5.250000 -7.999999 50 | v -3.999998 5.250000 -7.999999 51 | v 4.000002 3.500000 -7.999999 52 | v 0.000002 3.500000 -8.000000 53 | v -3.999998 3.500000 -8.000000 54 | v 4.000001 1.750000 -8.000000 55 | v 0.000002 1.750000 -8.000001 56 | v -3.999998 1.750000 -8.000002 57 | v -8.000001 1.750000 3.999998 58 | v -8.000000 1.750000 -0.000002 59 | v -7.999998 1.750000 -4.000002 60 | v -8.000001 3.500000 3.999998 61 | v -8.000000 3.500000 -0.000002 62 | v -7.999999 3.500000 -4.000001 63 | v -8.000001 5.250000 3.999998 64 | v -8.000000 5.250000 -0.000002 65 | v -7.999999 5.250000 -4.000001 66 | v 3.999999 1.750000 8.000000 67 | v -0.000001 1.750000 8.000000 68 | v -4.000001 1.750000 7.999999 69 | v 3.999998 3.500000 8.000001 70 | v -0.000002 3.500000 8.000000 71 | v -4.000002 3.500000 7.999999 72 | v 3.999997 5.250000 8.000002 73 | v -0.000003 5.250000 8.000000 74 | v -4.000002 5.250000 7.999999 75 | v 8.000000 1.750000 -3.999999 76 | v 8.000000 1.750000 0.000001 77 | v 7.999999 1.750000 4.000001 78 | v 8.000001 3.500000 -3.999998 79 | v 8.000000 3.500000 0.000002 80 | v 7.999999 3.500000 4.000002 81 | v 8.000001 5.250000 -3.999997 82 | v 8.000000 5.250000 0.000003 83 | v 7.999998 5.250000 4.000003 84 | v 4.000001 7.000000 -3.999997 85 | v 3.999999 7.000000 0.000003 86 | v 3.999998 7.000000 4.000003 87 | v 0.000000 7.000000 -3.999998 88 | v -0.000001 7.000000 0.000001 89 | v -0.000002 7.000000 4.000001 90 | v -4.000000 7.000000 -4.000000 91 | v -4.000001 7.000000 0.000000 92 | v -4.000002 7.000000 4.000000 93 | v 4.000000 0.000000 -4.000000 94 | v 0.000001 0.000000 -4.000001 95 | v -3.999999 0.000000 -4.000002 96 | v 4.000000 0.000000 -0.000001 97 | v 0.000000 0.000000 -0.000001 98 | v -3.999999 0.000000 -0.000002 99 | v 4.000000 0.000000 4.000000 100 | v 0.000000 0.000000 3.999999 101 | v -4.000000 0.000000 3.999999 102 | v 6.000000 0.000000 -8.000000 103 | v 6.000000 0.000000 8.000000 104 | v 6.000003 7.000000 -7.999997 105 | v 5.999995 7.000000 8.000004 106 | v 6.000003 5.250000 -7.999998 107 | v 6.000002 3.500000 -7.999999 108 | v 6.000001 1.750000 -8.000000 109 | v 5.999998 3.500000 8.000002 110 | v 6.000000 0.000000 -4.000000 111 | v 6.000000 0.000000 -0.000000 112 | v 6.000000 0.000000 4.000000 113 | 114 | # Window vertices 115 | v 4.000003 5.250000 -7.999998 116 | v 4.000002 3.500000 -7.999999 117 | v 4.000001 1.750000 -8.000000 118 | v -8.000001 3.500000 3.999998 119 | v -8.000000 3.500000 -0.000002 120 | v -8.000001 5.250000 3.999998 121 | v -8.000000 5.250000 -0.000002 122 | v 6.000003 5.250000 -7.999998 123 | v 6.000002 3.500000 -7.999999 124 | 125 | usemtl wall 126 | f 98 24 3 20 127 | f 89 41 7 42 128 | f 76 77 80 35 34 79 129 | f 20 3 27 28 68 130 | f 100 18 63 66 106 22 21 2 131 | f 38 8 32 31 50 47 132 | f 26 4 30 31 59 56 133 | f 37 38 47 50 49 134 | f 49 50 53 14 13 52 135 | f 36 37 46 49 48 45 136 | f 30 4 14 53 50 31 137 | f 104 105 15 16 17 103 138 | f 25 26 56 59 58 55 139 | f 28 57 60 42 7 29 140 | f 3 24 54 57 28 27 141 | f 24 25 55 58 57 54 142 | f 58 59 62 44 43 61 143 | f 32 8 44 62 59 31 144 | f 60 61 43 42 145 | f 105 51 12 99 1 15 146 | f 18 19 64 67 66 63 147 | f 66 67 70 40 39 69 148 | f 67 68 71 41 40 70 149 | f 19 20 65 68 67 64 150 | f 29 7 41 71 68 28 151 | f 1 9 72 75 16 152 | f 23 6 35 80 77 22 153 | f 9 10 73 76 75 72 154 | f 10 11 74 77 76 73 155 | f 11 2 21 22 77 74 156 | f 48 49 52 13 12 51 157 | f 16 75 78 33 5 158 | f 75 76 79 34 33 78 159 | f 99 12 90 107 9 1 160 | f 83 39 40 86 161 | f 86 40 41 89 162 | f 35 6 102 39 83 163 | f 36 81 84 37 164 | f 81 82 85 84 165 | f 82 83 86 85 166 | f 37 84 87 38 167 | f 84 85 88 87 168 | f 85 86 89 88 169 | f 38 87 44 8 170 | f 87 88 43 44 171 | f 88 89 42 43 172 | f 14 4 26 92 173 | f 92 26 25 95 174 | f 95 25 24 98 175 | f 107 90 93 108 10 9 176 | f 12 13 91 90 177 | f 13 14 92 91 178 | f 108 93 96 109 11 10 179 | f 90 91 94 93 180 | f 91 92 95 94 181 | f 109 96 18 100 2 11 182 | f 93 94 97 96 183 | f 94 95 98 97 184 | f 96 97 19 18 185 | f 97 98 20 19 186 | f 35 83 82 34 187 | f 101 36 45 103 17 5 188 | f 34 82 81 33 189 | f 33 81 36 101 5 190 | f 106 66 69 39 102 6 23 22 191 | 192 | usemtl glass 193 | f 117 110 111 112 118 194 | f 113 114 116 115 195 | -------------------------------------------------------------------------------- /models/indoor.obj: -------------------------------------------------------------------------------- 1 | # An indoor scene, modelled manually so the geometry is optimized for my path 2 | # tracer. For instance, many objects do not have back sides because they would 3 | # not be visible anyway. The number of primitives has been kept low: 142 4 | # vertices, 100 triangles. That is less than the low-poly Suzanne mesh. 5 | 6 | v 4.000000 0.100000 4.015000 7 | v -4.000000 0.100000 4.015000 8 | v 4.000000 -0.100000 3.985000 9 | v 4.000000 0.100000 3.985000 10 | v -4.000000 -0.100000 3.985000 11 | v -4.000000 0.100000 3.985000 12 | usemtl baseboard 13 | f 4 5 6 14 | f 6 1 4 15 | f 4 3 5 16 | f 6 2 1 17 | v -4.000000 -0.200000 4.000000 18 | v 4.000000 -0.200000 4.000000 19 | v -4.000000 3.300000 4.000000 20 | v 4.000000 3.300000 4.000000 21 | usemtl wall 22 | f 9 8 7 23 | f 9 10 8 24 | v -3.166173 0.494651 1.363312 25 | v -3.327058 0.973895 1.430704 26 | v -3.229106 0.097355 1.389674 27 | v -3.500403 0.905491 1.503315 28 | v -3.513893 0.494651 0.533197 29 | v -3.674779 0.973895 0.600590 30 | v -3.576827 0.097355 0.559559 31 | v -3.848124 0.905491 0.673201 32 | v -3.338880 0.979871 1.435656 33 | v -3.494891 0.918308 1.501006 34 | v -3.842611 0.918308 0.670892 35 | v -3.686601 0.979871 0.605542 36 | v -3.843320 0.908911 0.660347 37 | v -3.364755 0.501423 1.446495 38 | v -3.687310 0.970474 0.594997 39 | v -3.526424 0.491231 0.527604 40 | v -3.170976 0.491231 1.376166 41 | v -3.331862 0.970474 1.443558 42 | v -3.487873 0.908911 1.508909 43 | v -3.707672 0.504843 0.603526 44 | v -3.712475 0.501423 0.616380 45 | v -3.352224 0.504843 1.452088 46 | usemtl fauteuil 47 | f 31 13 24 48 | f 16 11 15 49 | f 14 29 20 50 | f 22 20 19 51 | f 12 22 19 52 | f 14 21 18 53 | f 23 18 21 54 | f 25 30 23 55 | f 22 16 25 56 | f 31 23 30 57 | f 16 26 25 58 | f 28 32 27 59 | f 11 28 27 60 | f 21 25 23 61 | f 20 28 19 62 | f 19 28 12 63 | f 14 32 29 64 | f 24 14 31 65 | f 31 17 13 66 | f 14 18 31 67 | f 16 12 11 68 | f 22 21 20 69 | f 12 16 22 70 | f 14 20 21 71 | f 25 26 30 72 | f 31 18 23 73 | f 16 15 26 74 | f 28 29 32 75 | f 11 12 28 76 | f 21 22 25 77 | f 20 29 28 78 | f 14 24 32 79 | v -2.883356 -0.020000 0.269076 80 | v -2.892579 0.740000 0.272940 81 | v -3.814929 0.740000 0.659296 82 | v -2.960627 -0.020000 0.084607 83 | v -2.969851 0.740000 0.088470 84 | v -3.892200 0.740000 0.474826 85 | v -3.896064 -0.020000 0.465603 86 | v -2.973714 -0.020000 0.079247 87 | v -2.888716 -0.020000 0.282164 88 | v -3.811065 -0.020000 0.668520 89 | v -2.883356 0.730000 0.269076 90 | v -2.960627 0.730000 0.084607 91 | v -3.896064 0.730000 0.465603 92 | v -2.973714 0.730000 0.079247 93 | v -2.888716 0.730000 0.282164 94 | v -3.811065 0.730000 0.668520 95 | usemtl fauteuil 96 | f 36 46 44 97 | f 43 36 44 98 | f 38 34 37 99 | f 46 39 45 100 | f 46 37 44 101 | f 48 41 47 102 | f 34 47 43 103 | f 43 41 33 104 | f 37 43 44 105 | f 37 45 38 106 | f 35 47 34 107 | f 42 45 39 108 | f 38 48 35 109 | f 36 40 46 110 | f 43 33 36 111 | f 38 35 34 112 | f 46 40 39 113 | f 48 42 41 114 | f 43 47 41 115 | f 37 34 43 116 | f 37 46 45 117 | f 35 48 47 118 | f 42 48 45 119 | f 38 45 48 120 | v -2.458364 -0.020000 1.283661 121 | v -2.467588 0.740000 1.287525 122 | v -3.389937 0.740000 1.673881 123 | v -2.535635 -0.020000 1.099191 124 | v -2.544859 0.740000 1.103055 125 | v -3.467209 0.740000 1.489411 126 | v -3.471072 -0.020000 1.480187 127 | v -2.548723 -0.020000 1.093831 128 | v -2.463724 -0.020000 1.296748 129 | v -3.386074 -0.020000 1.683104 130 | v -2.458364 0.730000 1.283661 131 | v -2.535635 0.730000 1.099191 132 | v -3.471072 0.730000 1.480187 133 | v -2.548723 0.730000 1.093831 134 | v -2.463724 0.730000 1.296748 135 | v -3.386074 0.730000 1.683104 136 | usemtl fauteuil 137 | f 52 62 60 138 | f 59 52 60 139 | f 54 50 53 140 | f 62 55 61 141 | f 62 53 60 142 | f 64 57 63 143 | f 50 63 59 144 | f 59 57 49 145 | f 53 59 60 146 | f 53 61 54 147 | f 50 64 63 148 | f 58 61 55 149 | f 51 61 64 150 | f 52 56 62 151 | f 59 49 52 152 | f 54 51 50 153 | f 62 56 55 154 | f 64 58 57 155 | f 59 63 57 156 | f 53 50 59 157 | f 53 62 61 158 | f 50 51 64 159 | f 58 64 61 160 | f 51 54 61 161 | v -2.563306 0.100000 1.110782 162 | v -2.544859 0.500000 1.103055 163 | v -3.227398 0.100000 1.388958 164 | v -2.911026 0.100000 0.280667 165 | v -2.892579 0.500000 0.272940 166 | v -3.575118 0.100000 0.558843 167 | v -2.544859 0.380000 1.103055 168 | v -2.892579 0.380000 0.272940 169 | v -2.911026 0.376955 0.280667 170 | v -2.563306 0.376955 1.110782 171 | v -2.554082 0.510000 1.106918 172 | v -3.181280 0.510000 1.369640 173 | v -2.901803 0.510000 0.276804 174 | v -3.529001 0.510000 0.539526 175 | usemtl fauteuil 176 | f 74 68 73 177 | f 67 68 65 178 | f 69 75 66 179 | f 66 72 69 180 | f 72 74 73 181 | f 78 75 77 182 | f 74 65 68 183 | f 67 70 68 184 | f 69 77 75 185 | f 66 71 72 186 | f 72 71 74 187 | f 78 76 75 188 | v -3.384160 0.494651 -0.905824 189 | v -3.523827 0.973895 -1.010318 190 | v -3.438793 0.097355 -0.946699 191 | v -3.674310 0.905491 -1.122905 192 | v -2.845004 0.494651 -1.626455 193 | v -2.984670 0.973895 -1.730950 194 | v -2.899637 0.097355 -1.667330 195 | v -3.135153 0.905491 -1.843537 196 | v -3.534090 0.979871 -1.017997 197 | v -3.669524 0.918308 -1.119325 198 | v -3.130367 0.918308 -1.839957 199 | v -2.994933 0.979871 -1.738628 200 | v -3.121638 0.908911 -1.845915 201 | v -3.556551 0.501423 -1.034802 202 | v -2.986204 0.970474 -1.744586 203 | v -2.846537 0.491231 -1.640092 204 | v -3.397675 0.491231 -0.903446 205 | v -3.537342 0.970474 -1.007941 206 | v -3.672776 0.908911 -1.109269 207 | v -3.003880 0.504843 -1.757811 208 | v -3.017395 0.501423 -1.755434 209 | v -3.555018 0.504843 -1.021166 210 | usemtl fauteuil 211 | f 99 81 92 212 | f 80 83 84 213 | f 82 97 88 214 | f 89 87 90 215 | f 84 87 80 216 | f 86 88 89 217 | f 91 86 89 218 | f 93 98 91 219 | f 90 84 93 220 | f 86 98 99 221 | f 83 93 84 222 | f 96 100 95 223 | f 79 96 95 224 | f 89 93 91 225 | f 88 96 87 226 | f 87 96 80 227 | f 82 100 97 228 | f 92 82 99 229 | f 99 85 81 230 | f 82 86 99 231 | f 80 79 83 232 | f 89 88 87 233 | f 84 90 87 234 | f 86 82 88 235 | f 93 94 98 236 | f 86 91 98 237 | f 83 94 93 238 | f 96 97 100 239 | f 79 80 96 240 | f 89 90 93 241 | f 88 97 96 242 | f 82 92 100 243 | v -2.297627 -0.020000 -1.216924 244 | v -2.305634 0.740000 -1.222914 245 | v -3.106336 0.740000 -1.821977 246 | v -2.177815 -0.020000 -1.377064 247 | v -2.185822 0.740000 -1.383055 248 | v -2.986524 0.740000 -1.982118 249 | v -2.980533 -0.020000 -1.990125 250 | v -2.179831 -0.020000 -1.391062 251 | v -2.311625 -0.020000 -1.214907 252 | v -3.112327 -0.020000 -1.813970 253 | v -2.297627 0.730000 -1.216924 254 | v -2.177815 0.730000 -1.377064 255 | v -2.980533 0.730000 -1.990125 256 | v -2.179831 0.730000 -1.391062 257 | v -2.311625 0.730000 -1.214907 258 | v -3.112327 0.730000 -1.813970 259 | usemtl fauteuil 260 | f 104 114 112 261 | f 111 104 112 262 | f 106 102 105 263 | f 114 107 113 264 | f 114 105 112 265 | f 116 109 115 266 | f 102 115 111 267 | f 111 109 101 268 | f 102 112 105 269 | f 105 113 106 270 | f 102 116 115 271 | f 110 113 107 272 | f 106 116 103 273 | f 104 108 114 274 | f 111 101 104 275 | f 106 103 102 276 | f 114 108 107 277 | f 116 110 109 278 | f 111 115 109 279 | f 102 111 112 280 | f 105 114 113 281 | f 102 103 116 282 | f 110 116 113 283 | f 106 113 116 284 | v -2.956596 -0.020000 -0.336152 285 | v -2.964603 0.740000 -0.342142 286 | v -3.765305 0.740000 -0.941205 287 | v -2.836784 -0.020000 -0.496292 288 | v -2.844791 0.740000 -0.502283 289 | v -3.645493 0.740000 -1.101346 290 | v -3.639502 -0.020000 -1.109353 291 | v -2.838800 -0.020000 -0.510290 292 | v -2.970594 -0.020000 -0.334135 293 | v -3.771296 -0.020000 -0.933198 294 | v -2.956596 0.730000 -0.336152 295 | v -2.836784 0.730000 -0.496292 296 | v -3.639502 0.730000 -1.109353 297 | v -2.838800 0.730000 -0.510290 298 | v -2.970594 0.730000 -0.334135 299 | v -3.771296 0.730000 -0.933198 300 | usemtl fauteuil 301 | f 120 130 128 302 | f 127 120 128 303 | f 121 119 118 304 | f 130 123 129 305 | f 130 121 128 306 | f 132 125 131 307 | f 118 131 127 308 | f 127 125 117 309 | f 121 127 128 310 | f 121 129 122 311 | f 119 131 118 312 | f 126 129 123 313 | f 122 132 119 314 | f 120 124 130 315 | f 127 117 120 316 | f 121 122 119 317 | f 130 124 123 318 | f 132 126 125 319 | f 127 131 125 320 | f 121 118 127 321 | f 121 130 129 322 | f 119 132 131 323 | f 126 132 129 324 | f 122 129 132 325 | v -2.860805 0.100000 -0.514264 326 | v -2.844791 0.500000 -0.502283 327 | v -3.437310 0.100000 -0.945589 328 | v -2.321648 0.100000 -1.234896 329 | v -2.305634 0.500000 -1.222914 330 | v -2.898154 0.100000 -1.666221 331 | v -2.844791 0.380000 -0.502283 332 | v -2.305634 0.380000 -1.222914 333 | v -2.321648 0.376955 -1.234896 334 | v -2.860805 0.376955 -0.514264 335 | v -2.852798 0.510000 -0.508273 336 | v -3.397275 0.510000 -0.915636 337 | v -2.313641 0.510000 -1.228905 338 | v -2.858119 0.510000 -1.636268 339 | usemtl fauteuil 340 | f 142 136 141 341 | f 135 136 133 342 | f 134 145 143 343 | f 134 140 137 344 | f 140 142 141 345 | f 146 143 145 346 | f 142 133 136 347 | f 135 138 136 348 | f 134 137 145 349 | f 134 139 140 350 | f 140 139 142 351 | f 146 144 143 352 | v -3.850000 3.126285 -3.106950 353 | v 3.850000 3.126285 -3.106950 354 | v -3.850000 0.176997 -4.180403 355 | v 3.850000 0.176997 -4.180403 356 | v -1.850000 3.126285 -3.106950 357 | v 0.150000 3.126285 -3.106950 358 | v 1.850000 3.126285 -3.106950 359 | v 1.850000 0.176997 -4.180403 360 | v -0.150000 0.176997 -4.180403 361 | v -1.850000 0.176997 -4.180403 362 | v 2.150000 3.126285 -3.106950 363 | v 2.150000 0.176997 -4.180403 364 | v -0.150000 3.126285 -3.106950 365 | v 0.150000 0.176997 -4.180403 366 | v -2.150000 3.126285 -3.106950 367 | v -2.150000 0.176997 -4.180403 368 | usemtl glass 369 | f 158 148 157 370 | f 149 161 147 371 | f 156 159 151 372 | f 160 153 152 373 | f 158 150 148 374 | f 149 162 161 375 | f 156 155 159 376 | f 160 154 153 377 | v -4.000000 3.100000 -3.140000 378 | v -4.000000 3.100000 -2.700000 379 | v -4.000000 3.500000 -2.700000 380 | v 4.000000 3.100000 -3.140000 381 | v 4.000000 3.100000 -2.700000 382 | v 4.000000 3.500000 -2.700000 383 | vt 0.305169 0.000000 384 | vt 0.456596 2.000000 385 | vt 0.305169 2.000000 386 | vt 0.855107 -0.000000 387 | vt 0.705756 2.000000 388 | vt 0.705756 0.000000 389 | vt 0.456596 0.000000 390 | vt 0.855107 2.000000 391 | usemtl wood_light 392 | f 164/1 166/2 167/3 393 | f 168/4 164/5 167/6 394 | f 164/1 163/7 166/2 395 | f 168/4 165/8 164/5 396 | v -4.000000 3.300000 4.000000 397 | v 4.000000 3.300000 4.000000 398 | v -4.000000 3.300000 -2.710000 399 | v 4.000000 3.300000 -2.710000 400 | usemtl ceiling 401 | f 171 170 169 402 | f 171 172 170 403 | v 4.120000 1.902521 -3.518132 404 | v 4.120000 3.509489 -1.603021 405 | v 4.120000 1.718670 -3.363863 406 | v 4.120000 3.325639 -1.448752 407 | v 3.880000 1.902521 -3.518132 408 | v 3.880000 3.509489 -1.603021 409 | v 3.880000 1.718670 -3.363863 410 | v 3.880000 3.325639 -1.448752 411 | vt 0.619141 1.000000 412 | vt 0.542969 0.000000 413 | vt 0.542969 1.000000 414 | vt 0.464844 1.000000 415 | vt 0.541992 0.000000 416 | vt 0.541992 1.000000 417 | vt 0.100586 1.000000 418 | vt 0.177734 0.000000 419 | vt 0.177734 1.000000 420 | vt 0.619141 0.000000 421 | vt 0.464844 0.000000 422 | vt 0.100586 0.000000 423 | usemtl wood_light 424 | f 180/9 175/10 176/11 425 | f 178/12 179/13 180/14 426 | f 174/15 177/16 178/17 427 | f 180/9 179/18 175/10 428 | f 178/12 177/19 179/13 429 | f 174/15 173/20 177/16 430 | v 4.150000 -0.239241 -4.027450 431 | v 4.150000 3.707468 -2.590966 432 | v 3.850000 -0.136635 -4.309358 433 | v 3.850000 3.810074 -2.872873 434 | v 3.850000 -0.239241 -4.027450 435 | v 3.850000 3.707468 -2.590966 436 | vt 0.619141 1.000000 437 | vt 0.542969 0.000000 438 | vt 0.542969 1.000000 439 | vt 0.464844 1.000000 440 | vt 0.541992 0.000000 441 | vt 0.541992 1.000000 442 | vt 0.619141 0.000000 443 | vt 0.464844 0.000000 444 | usemtl wood_light 445 | f 186/21 181/22 182/23 446 | f 184/24 185/25 186/26 447 | f 186/21 185/27 181/22 448 | f 184/24 183/28 185/25 449 | v 1.850000 -0.239241 -4.027450 450 | v 1.850000 3.707468 -2.590966 451 | v 1.850000 -0.136635 -4.309358 452 | v 1.850000 3.810074 -2.872873 453 | v 2.150000 -0.239241 -4.027450 454 | v 2.150000 3.707468 -2.590966 455 | v 2.150000 -0.136635 -4.309358 456 | v 2.150000 3.810074 -2.872873 457 | vt 0.702637 1.000000 458 | vt 0.623047 0.000000 459 | vt 0.701660 0.000000 460 | vt 0.464844 1.000000 461 | vt 0.541992 0.000000 462 | vt 0.541992 1.000000 463 | vt 0.100586 1.000000 464 | vt 0.177734 0.000000 465 | vt 0.177734 1.000000 466 | vt 0.623047 1.000000 467 | vt 0.464844 0.000000 468 | vt 0.100586 0.000000 469 | usemtl wood_light 470 | f 188/29 189/30 187/31 471 | f 192/32 193/33 194/34 472 | f 188/35 191/36 192/37 473 | f 188/29 190/38 189/30 474 | f 192/32 191/39 193/33 475 | f 188/35 187/40 191/36 476 | v 2.120000 1.718670 -3.363863 477 | v 2.120001 3.325639 -1.448752 478 | v 1.880000 1.718670 -3.363863 479 | v 1.880001 3.325639 -1.448752 480 | v 2.120000 1.902521 -3.518132 481 | v 2.120001 3.509489 -1.603021 482 | v 1.880000 1.902521 -3.518132 483 | v 1.880001 3.509489 -1.603021 484 | vt 0.623047 1.000000 485 | vt 0.701660 0.000000 486 | vt 0.702637 1.000000 487 | vt 0.542969 1.000000 488 | vt 0.619141 0.000000 489 | vt 0.542969 0.000000 490 | vt 0.541992 1.000000 491 | vt 0.464844 0.000000 492 | vt 0.541992 0.000000 493 | vt 0.100586 1.000000 494 | vt 0.177734 0.000000 495 | vt 0.177734 1.000000 496 | vt 0.623047 0.000000 497 | vt 0.619141 1.000000 498 | vt 0.464844 1.000000 499 | vt 0.100586 0.000000 500 | usemtl wood_light 501 | f 198/41 195/42 196/43 502 | f 198/44 201/45 197/46 503 | f 202/47 199/48 201/49 504 | f 196/50 199/51 200/52 505 | f 198/41 197/53 195/42 506 | f 198/44 202/54 201/45 507 | f 202/47 200/55 199/48 508 | f 196/50 195/56 199/51 509 | v -2.120000 1.902521 -3.518132 510 | v -2.120000 3.509489 -1.603021 511 | v -1.880000 1.902521 -3.518132 512 | v -1.880000 3.509489 -1.603021 513 | v -2.120000 1.718670 -3.363863 514 | v -2.120000 3.325639 -1.448752 515 | v -1.880000 1.718670 -3.363863 516 | v -1.880000 3.325639 -1.448752 517 | vt 0.623047 1.000000 518 | vt 0.701660 0.000000 519 | vt 0.702637 1.000000 520 | vt 0.619141 1.000000 521 | vt 0.542969 0.000000 522 | vt 0.542969 1.000000 523 | vt 0.464844 1.000000 524 | vt 0.541992 0.000000 525 | vt 0.541992 1.000000 526 | vt 0.177734 1.000000 527 | vt 0.100586 0.000000 528 | vt 0.177734 0.000000 529 | vt 0.623047 0.000000 530 | vt 0.619141 0.000000 531 | vt 0.464844 0.000000 532 | vt 0.100586 1.000000 533 | usemtl wood_light 534 | f 206/57 203/58 204/59 535 | f 210/60 205/61 206/62 536 | f 208/63 209/64 210/65 537 | f 208/66 203/67 207/68 538 | f 206/57 205/69 203/58 539 | f 210/60 209/70 205/61 540 | f 208/63 207/71 209/64 541 | f 208/66 204/72 203/67 542 | v -1.850000 -0.136635 -4.309358 543 | v -1.850000 3.810074 -2.872873 544 | v -1.850000 -0.239241 -4.027450 545 | v -1.850000 3.707468 -2.590966 546 | v -2.150000 -0.136635 -4.309358 547 | v -2.150000 3.810074 -2.872873 548 | v -2.150000 -0.239241 -4.027450 549 | v -2.150000 3.707468 -2.590966 550 | vt 0.623047 1.000000 551 | vt 0.701660 0.000000 552 | vt 0.702637 1.000000 553 | vt 0.619141 1.000000 554 | vt 0.542969 0.000000 555 | vt 0.542969 1.000000 556 | vt 0.464844 1.000000 557 | vt 0.541992 0.000000 558 | vt 0.541992 1.000000 559 | vt 0.623047 0.000000 560 | vt 0.619141 0.000000 561 | vt 0.464844 0.000000 562 | usemtl wood_light 563 | f 214/73 211/74 212/75 564 | f 218/76 213/77 214/78 565 | f 216/79 217/80 218/81 566 | f 214/73 213/82 211/74 567 | f 218/76 217/83 213/77 568 | f 216/79 215/84 217/80 569 | v -3.850000 -0.136635 -4.309358 570 | v -3.850000 3.810074 -2.872873 571 | v -4.150000 -0.239241 -4.027450 572 | v -4.150000 3.707468 -2.590966 573 | v -3.850000 -0.239241 -4.027450 574 | v -3.850000 3.707468 -2.590966 575 | vt 0.619141 1.000000 576 | vt 0.542969 0.000000 577 | vt 0.542969 1.000000 578 | vt 0.464844 1.000000 579 | vt 0.541992 0.000000 580 | vt 0.541992 1.000000 581 | vt 0.619141 0.000000 582 | vt 0.464844 0.000000 583 | usemtl wood_light 584 | f 224/85 219/86 220/87 585 | f 222/88 223/89 224/90 586 | f 224/85 223/91 219/86 587 | f 222/88 221/92 223/89 588 | v -3.880000 1.902521 -3.518132 589 | v -3.880000 3.509489 -1.603021 590 | v -3.880000 1.718670 -3.363863 591 | v -3.880000 3.325639 -1.448752 592 | v -4.120000 1.902521 -3.518132 593 | v -4.120000 3.509489 -1.603021 594 | v -4.120000 1.718670 -3.363863 595 | v -4.120000 3.325639 -1.448752 596 | vt 0.623047 1.000000 597 | vt 0.701660 0.000000 598 | vt 0.702637 1.000000 599 | vt 0.619141 1.000000 600 | vt 0.542969 0.000000 601 | vt 0.542969 1.000000 602 | vt 0.100586 1.000000 603 | vt 0.177734 0.000000 604 | vt 0.177734 1.000000 605 | vt 0.623047 0.000000 606 | vt 0.619141 0.000000 607 | vt 0.100586 0.000000 608 | usemtl wood_light 609 | f 228/93 225/94 226/95 610 | f 232/96 227/97 228/98 611 | f 226/99 229/100 230/101 612 | f 228/93 227/102 225/94 613 | f 232/96 231/103 227/97 614 | f 226/99 225/104 229/100 615 | v -0.120000 1.718670 -3.363863 616 | v -0.120000 3.325639 -1.448752 617 | v -0.120000 1.902521 -3.518132 618 | v -0.120000 3.509489 -1.603021 619 | v 0.120000 1.718670 -3.363863 620 | v 0.120000 3.325639 -1.448752 621 | v 0.120000 1.902521 -3.518132 622 | v 0.120000 3.509489 -1.603021 623 | vt 0.702637 1.000000 624 | vt 0.623047 0.000000 625 | vt 0.701660 0.000000 626 | vt 0.619141 1.000000 627 | vt 0.542969 0.000000 628 | vt 0.542969 1.000000 629 | vt 0.464844 1.000000 630 | vt 0.541992 0.000000 631 | vt 0.541992 1.000000 632 | vt 0.100586 1.000000 633 | vt 0.177734 0.000000 634 | vt 0.177734 1.000000 635 | vt 0.623047 1.000000 636 | vt 0.619141 0.000000 637 | vt 0.464844 0.000000 638 | vt 0.100586 0.000000 639 | usemtl wood_light 640 | f 234/105 235/106 233/107 641 | f 240/108 235/109 236/110 642 | f 238/111 239/112 240/113 643 | f 234/114 237/115 238/116 644 | f 234/105 236/117 235/106 645 | f 240/108 239/118 235/109 646 | f 238/111 237/119 239/112 647 | f 234/114 233/120 237/115 648 | v 4.000000 3.300000 4.000000 649 | v 4.000000 -0.200000 4.000000 650 | v 4.000000 3.300000 -2.750000 651 | v 4.000000 -0.200000 -4.020000 652 | usemtl wall 653 | f 243 242 241 654 | f 243 244 242 655 | v -4.000000 3.300000 4.000000 656 | v -4.000000 -0.200000 4.000000 657 | v -4.000000 3.300000 -2.750000 658 | v -4.000000 -0.200000 -4.020000 659 | usemtl wall 660 | f 246 247 245 661 | f 246 248 247 662 | v -0.150000 -0.136635 -4.309358 663 | v -0.150000 3.810074 -2.872873 664 | v 0.150000 -0.136635 -4.309358 665 | v 0.150000 3.810074 -2.872873 666 | v -0.150000 -0.239241 -4.027450 667 | v -0.150000 3.707468 -2.590966 668 | v 0.150000 -0.239241 -4.027450 669 | v 0.150000 3.707468 -2.590966 670 | vt 0.619141 1.000000 671 | vt 0.542969 0.000000 672 | vt 0.542969 1.000000 673 | vt 0.464844 1.000000 674 | vt 0.541992 0.000000 675 | vt 0.541992 1.000000 676 | vt 0.100586 1.000000 677 | vt 0.177734 0.000000 678 | vt 0.177734 1.000000 679 | vt 0.619141 0.000000 680 | vt 0.464844 0.000000 681 | vt 0.100586 0.000000 682 | usemtl wood_light 683 | f 256/121 251/122 252/123 684 | f 254/124 255/125 256/126 685 | f 250/127 253/128 254/129 686 | f 256/121 255/130 251/122 687 | f 254/124 253/131 255/125 688 | f 250/127 249/132 253/128 689 | v -4.000000 -0.200000 -3.800000 690 | v -4.000000 0.200000 -3.800000 691 | v -4.000000 0.200000 -4.200000 692 | v 4.000000 -0.200000 -3.800000 693 | v 4.000000 0.200000 -3.800000 694 | v 4.000000 0.200000 -4.200000 695 | vt 0.305169 0.000000 696 | vt 0.456596 2.000000 697 | vt 0.305169 2.000000 698 | vt 0.855107 -0.000000 699 | vt 0.705756 2.000000 700 | vt 0.705756 0.000000 701 | vt 0.456596 0.000000 702 | vt 0.855107 2.000000 703 | usemtl wood_light 704 | f 258/133 260/134 261/135 705 | f 262/136 258/137 261/138 706 | f 258/133 257/139 260/134 707 | f 262/136 259/140 258/137 708 | v 3.985000 -0.100000 4.000000 709 | v 3.985000 0.100000 4.000000 710 | v 3.985000 -0.100000 -4.000000 711 | v 3.985000 0.100000 -4.000000 712 | v 4.015000 0.100000 4.000000 713 | v 4.015000 0.100000 -4.000000 714 | usemtl baseboard 715 | f 266 263 264 716 | f 268 264 267 717 | f 266 265 263 718 | f 268 266 264 719 | v -4.015000 0.100000 4.000000 720 | v -4.015000 0.100000 -4.000000 721 | v -3.985000 -0.100000 4.000000 722 | v -3.985000 0.100000 4.000000 723 | v -3.985000 -0.100000 -4.000000 724 | v -3.985000 0.100000 -4.000000 725 | usemtl baseboard 726 | f 272 273 274 727 | f 274 269 272 728 | f 272 271 273 729 | f 274 270 269 730 | v -4.000000 -0.010000 4.000000 731 | v 4.000000 -0.010000 4.000000 732 | v -4.000000 -0.010000 -4.000000 733 | v 4.000000 -0.010000 -4.000000 734 | vt 0.000100 0.000100 735 | vt 0.999900 0.999900 736 | vt 0.000100 0.999900 737 | vt 0.999900 0.000100 738 | usemtl floor 739 | f 276/141 277/142 275/143 740 | f 276/141 278/144 277/142 741 | -------------------------------------------------------------------------------- /models/plane.obj: -------------------------------------------------------------------------------- 1 | # A simple xy-plane with normal in the positive z-direction. 2 | # 4 vertices, 2 triangles 3 | 4 | v -9.0 0.0 9.0 5 | v 9.0 0.0 9.0 6 | v 9.0 0.0 -9.0 7 | v -9.0 0.0 -9.0 8 | 9 | f 1 2 3 4 10 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | Convector 2 | ========= 3 | 4 | Interactive CPU path tracer. 5 | 6 | [![Build Status][ci-img]][ci] 7 | 8 | On the left: interactive mode, running at about 10 fps on my machine. On the 9 | right: accumulative mode, after rendering for a few minutes. 10 | 11 | | ![Interactive][interactive] | ![Accumulative][accumulative] | 12 | |-----------------------------|-------------------------------| 13 | 14 | Requirements 15 | ------------ 16 | 17 | Hardware: a CPU that supports the AVX instructions is required. In practice this 18 | means Sandy Bridge or later. FMA instructions can be taken advantage of too, 19 | those are Haswell or later. 20 | 21 | Software: a recent nightly version of the 22 | [Rust programming language](https://rust-lang.org) is required. Version 1.10 is 23 | recommended. On Windows you need the version with the MSVC ABI. 24 | 25 | Compiling and Running 26 | --------------------- 27 | 28 | * `cargo run --release` to build and run the release executable. 29 | * `cargo build --release` to build in release mode without running. 30 | * `cargo bench` to build and run all benchmarks in release mode. 31 | * `cargo test` to build and run all tests in debug mode. 32 | 33 | If you do not want to use the FMA instructions, remove the `+fma` from the 34 | codegen options in `.cargo/config`. 35 | 36 | Controls 37 | -------- 38 | 39 | * Press `b` to toggle blending recent frames. 40 | * Press `d` to toggle debug view. 41 | The green channel shows the number of primary AABB intersections, 42 | the blue channel shows the number of primary triangle intersections. 43 | * Press `m` to toggle the median filter for noise reduction. 44 | * Press `q` to quit the application. 45 | * Press `r` to switch between realtime and accumulative rendering. 46 | * Press `s` to print statistics to the console. 47 | * Press `t` to write a trace to trace.json. 48 | It can be opened with Chrome by going to chrome://tracing. 49 | 50 | About the code 51 | -------------- 52 | 53 | Many structs represent eight instances at once for SIMD. In that case the name 54 | has been prefixed with `M` (for “multi”). The single-instance struct types have 55 | the prefix `S` instead (for “single”). 56 | 57 | The most interesting stuff is in `src/triangle.rs`, `src/material.rs`, 58 | and `src/renderer.rs`, and `src/bvh.rs`. Shaders are in `src/gpu`. 59 | 60 | License 61 | ------- 62 | 63 | Convector is free software. It is licensed under the 64 | [GNU General Public License][gplv3], version 3. 65 | 66 | [gplv3]: https://www.gnu.org/licenses/gpl-3.0.html 67 | [interactive]: https://raw.githubusercontent.com/ruuda/convector/master/screenshots/interactive.png 68 | [accumulative]: https://raw.githubusercontent.com/ruuda/convector/master/screenshots/accumulative.png 69 | [ci-img]: https://ci.appveyor.com/api/projects/status/nkqhoi829382i1ow?svg=true 70 | [ci]: https://ci.appveyor.com/project/ruuda/convector 71 | -------------------------------------------------------------------------------- /rust-toolchain: -------------------------------------------------------------------------------- 1 | nightly-2017-04-15 2 | -------------------------------------------------------------------------------- /screenshots/accumulative.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruuda/convector/2f5f2428fa6c54002bd2ee8ce3d0f2188aab49f8/screenshots/accumulative.png -------------------------------------------------------------------------------- /screenshots/interactive.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruuda/convector/2f5f2428fa6c54002bd2ee8ce3d0f2188aab49f8/screenshots/interactive.png -------------------------------------------------------------------------------- /src/aabb.rs: -------------------------------------------------------------------------------- 1 | // Convector -- An interactive CPU path tracer 2 | // Copyright 2016 Ruud van Asseldonk 3 | 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License version 3. A copy 6 | // of the License is available in the root of the repository. 7 | 8 | //! This module implements axis-aligned bounding boxes and related functions. 9 | 10 | use ray::MRay; 11 | use simd::{Mask, Mf32}; 12 | use vector3::{MVector3, SVector3}; 13 | 14 | #[cfg(test)] 15 | use {bench, test}; 16 | 17 | /// An axis-aligned bounding box. 18 | #[derive(Clone, Debug)] 19 | pub struct Aabb { 20 | pub origin: SVector3, 21 | 22 | /// The origin plus the size. 23 | pub far: SVector3, 24 | } 25 | 26 | /// Caches AABB intersection distances. 27 | pub struct MAabbIntersection { 28 | // The AABB was intersected by the line defined by the ray if tmax > tmin. 29 | // The mask contains the result of this comparison. If tmax is negative, the 30 | // AABB lies behind the ray entirely. 31 | tmin: Mf32, 32 | tmax: Mf32, 33 | 34 | // The mask can be computed from tmin and tmax, but benchmarks show that it 35 | // is slightly faster to store it, than to re-compute it when needed. 36 | mask: Mask, 37 | } 38 | 39 | impl Aabb { 40 | pub fn new(origin: SVector3, far: SVector3) -> Aabb { 41 | Aabb { 42 | origin: origin, 43 | far: far, 44 | } 45 | } 46 | 47 | pub fn zero() -> Aabb { 48 | Aabb { 49 | origin: SVector3::zero(), 50 | far: SVector3::zero(), 51 | } 52 | } 53 | 54 | /// Returns the smalles axis-aligned bounding box that contains all input 55 | /// points. 56 | pub fn enclose_points<'a, I>(points: I) -> Aabb 57 | where I: IntoIterator 58 | { 59 | let mut it = points.into_iter(); 60 | let &first = it.next().expect("enclosure must encluse at least one point"); 61 | 62 | let mut min = first; 63 | let mut max = first; 64 | 65 | while let Some(&point) = it.next() { 66 | min = SVector3::min(min, point); 67 | max = SVector3::max(max, point); 68 | } 69 | 70 | Aabb::new(min, max) 71 | } 72 | 73 | /// Returns the smallest bounding box that contains all input boxes. 74 | pub fn enclose_aabbs<'a, I>(aabbs: I) -> Aabb 75 | where I: IntoIterator 76 | { 77 | let mut it = aabbs.into_iter(); 78 | let first = it.next().expect("enclosure must enclose at least one AABB"); 79 | 80 | let mut min = first.origin; 81 | let mut max = first.far; 82 | 83 | while let Some(aabb) = it.next() { 84 | min = SVector3::min(min, aabb.origin); 85 | max = SVector3::max(max, aabb.far); 86 | } 87 | 88 | Aabb::new(min, max) 89 | } 90 | 91 | /// Returns the size of the bounding box. 92 | pub fn size(&self) -> SVector3 { 93 | self.far - self.origin 94 | } 95 | 96 | /// Returns the surface area of the bounding box. 97 | pub fn area(&self) -> f32 { 98 | let s = self.size(); 99 | let x = s.y * s.z; 100 | let y = s.z * s.x; 101 | let z = s.x * s.y; 102 | 2.0 * (x + y + z) 103 | } 104 | 105 | pub fn intersect(&self, ray: &MRay) -> MAabbIntersection { 106 | // Note: this method, in combination with `MAabbIntersection::any()` 107 | // compiles down to ~65 instructions, taking up ~168 bytes of 108 | // instruction cache; 3 cache lines. 109 | 110 | // Note: the compiler is smart enough to inline this method and compute 111 | // these reciprocals only once per ray, so there is no need to clutter 112 | // the code by passing around precomputed values. 113 | let xinv = ray.direction.x.recip_fast(); 114 | let yinv = ray.direction.y.recip_fast(); 115 | let zinv = ray.direction.z.recip_fast(); 116 | 117 | let d1 = MVector3::broadcast(self.origin) - ray.origin; 118 | let d2 = MVector3::broadcast(self.far) - ray.origin; 119 | 120 | let (tx1, tx2) = (d1.x * xinv, d2.x * xinv); 121 | let txmin = tx1.min(tx2); 122 | let txmax = tx1.max(tx2); 123 | 124 | let (ty1, ty2) = (d1.y * yinv, d2.y * yinv); 125 | let tymin = ty1.min(ty2); 126 | let tymax = ty1.max(ty2); 127 | 128 | let (tz1, tz2) = (d1.z * zinv, d2.z * zinv); 129 | let tzmin = tz1.min(tz2); 130 | let tzmax = tz1.max(tz2); 131 | 132 | // The minimum t in all dimension is the maximum of the per-axis minima. 133 | let tmin = txmin.max(tymin.max(tzmin)); 134 | let tmax = txmax.min(tymax.min(tzmax)); 135 | 136 | MAabbIntersection { 137 | tmin: tmin, 138 | tmax: tmax, 139 | mask: tmax.geq(tmin), 140 | } 141 | } 142 | } 143 | 144 | impl MAabbIntersection { 145 | /// Returns whether any of the active rays intersected the AABB. 146 | pub fn any(&self) -> bool { 147 | // If there was an intersection in front of the ray, then tmax will 148 | // definitely be positive. The mask is only set for the rays that 149 | // actually intersected the bounding box. 150 | self.tmax.any_sign_bit_positive_masked(self.mask) 151 | } 152 | 153 | /// Returns whether any of the active rays intersected the AABB. 154 | // TODO: I should get rid of one of these `any` methods. 155 | pub fn any_masked(&self, active: Mask) -> bool { 156 | // If there was an intersection in front of the ray, then tmax will 157 | // definitely be positive. The mask is only set for the rays that 158 | // actually intersected the bounding box. 159 | // 160 | // The active mask has an unfortunate sign for this purpose: its sign 161 | // bit is 0 for rays that should be considered, and 1 for rays that that 162 | // can be ignored. To get (mask & !active), we can do (mask ^ active) & 163 | // mask. 164 | let mask = (self.mask ^ active) & self.mask; 165 | self.tmax.any_sign_bit_positive_masked(mask) 166 | } 167 | 168 | /// Returns whether for all rays that intersect the AABB and for which the 169 | /// sign bit in the active mask is 0 (positive) the given distance is 170 | /// smaller than the distance to the AABB. 171 | pub fn is_further_away_than(&self, distance: Mf32, active: Mask) -> bool { 172 | // If distance < self.tmin (when false should be returned for the ray), 173 | // the comparison results in positive 0.0. If distance < self.min for 174 | // any of the values for which the mask is set, then for that ray the 175 | // AABB is not further away. Hence all sign bits must be negative. 176 | (self.tmin.geq(distance) | active).all_sign_bits_negative_masked(self.mask) 177 | } 178 | 179 | /// Returns whether this AABB should be visited before the other one. 180 | pub fn should_try_before(&self, other: &MAabbIntersection) -> bool { 181 | (self.tmin - other.tmin).all_sign_bits_positive() 182 | } 183 | } 184 | 185 | #[test] 186 | fn aabb_enclose_aabbs() { 187 | let a = Aabb::new(SVector3::new(1.0, 2.0, 3.0), SVector3::new(5.0, 7.0, 9.0)); 188 | let b = Aabb::new(SVector3::new(0.0, 3.0, 2.0), SVector3::new(9.0, 6.0, 9.0)); 189 | let ab = Aabb::enclose_aabbs(&[a, b]); 190 | assert_eq!(ab.origin, SVector3::new(0.0, 2.0, 2.0)); 191 | assert_eq!(ab.far, SVector3::new(9.0, 7.0, 9.0)); 192 | } 193 | 194 | #[test] 195 | fn aabb_area() { 196 | // Width: 4, height: 5, depth: 6. 197 | let aabb = Aabb::new(SVector3::new(1.0, 2.0, 3.0), SVector3::new(5.0, 7.0, 9.0)); 198 | assert_eq!(40.0 + 60.0 + 48.0, aabb.area()); 199 | } 200 | 201 | #[test] 202 | fn intersect_aabb() { 203 | use ray::SRay; 204 | 205 | let aabb = Aabb { 206 | origin: SVector3::new(0.0, 1.0, 2.0), 207 | far: SVector3::new(1.0, 3.0, 5.0), 208 | }; 209 | 210 | // Intersects forwards but not backwards. 211 | let r1 = SRay { 212 | origin: SVector3::zero(), 213 | direction: SVector3::new(2.0, 3.0, 5.0).normalized(), 214 | }; 215 | let mr1 = MRay::broadcast(&r1); 216 | assert!(aabb.intersect(&mr1).any()); 217 | assert!(!aabb.intersect(&-mr1).any()); 218 | 219 | // Intersects forwards but not backwards. 220 | let r2 = SRay { 221 | origin: SVector3::zero(), 222 | direction: SVector3::new(1.0, 4.0, 5.0).normalized(), 223 | }; 224 | let mr2 = MRay::broadcast(&r2); 225 | assert!(aabb.intersect(&mr2).any()); 226 | assert!(!aabb.intersect(&-mr2).any()); 227 | 228 | // Intersects neither forwards nor backwards. 229 | let r3 = SRay { 230 | origin: SVector3::zero(), 231 | direction: SVector3::new(2.0, 3.0, 0.0).normalized(), 232 | }; 233 | let mr3 = MRay::broadcast(&r3); 234 | assert!(!aabb.intersect(&mr3).any()); 235 | assert!(!aabb.intersect(&-mr3).any()); 236 | 237 | // Intersects both forwards and backwards (origin is inside the aabb). 238 | let r4 = SRay { 239 | origin: SVector3::new(0.2, 1.2, 2.2), 240 | direction: SVector3::new(1.0, 1.0, 0.0).normalized(), 241 | }; 242 | let mr4 = MRay::broadcast(&r4); 243 | assert!(aabb.intersect(&mr4).any()); 244 | assert!(aabb.intersect(&-mr4).any()); 245 | 246 | // Intersects both forwards and backwards (origin is inside the aabb). 247 | let r5 = SRay { 248 | origin: SVector3::new(0.01, 2.0, 3.5), 249 | direction: SVector3::new(0.0, 0.0, 1.0).normalized(), 250 | }; 251 | let mr5 = MRay::broadcast(&r5); 252 | assert!(aabb.intersect(&mr5).any()); 253 | assert!(aabb.intersect(&-mr5).any()); 254 | } 255 | 256 | #[bench] 257 | fn bench_intersect_p100(b: &mut test::Bencher) { 258 | let (aabb, rays) = bench::aabb_with_mrays(4096, 4096); 259 | let mut rays_it = rays.iter().cycle(); 260 | b.iter(|| { 261 | let isect = aabb.intersect(rays_it.next().unwrap()); 262 | test::black_box(isect.any()); 263 | }); 264 | } 265 | 266 | #[bench] 267 | fn bench_intersect_p50(b: &mut test::Bencher) { 268 | let (aabb, rays) = bench::aabb_with_mrays(4096, 2048); 269 | let mut rays_it = rays.iter().cycle(); 270 | b.iter(|| { 271 | let isect = aabb.intersect(rays_it.next().unwrap()); 272 | test::black_box(isect.any()); 273 | }); 274 | } 275 | 276 | #[bench] 277 | fn bench_intersect_8_mrays_per_aabb(b: &mut test::Bencher) { 278 | let rays = bench::mrays_inward(4096 / 8); 279 | let aabbs = bench::aabbs(4096); 280 | let mut rays_it = rays.iter().cycle(); 281 | let mut aabbs_it = aabbs.iter().cycle(); 282 | b.iter(|| { 283 | let aabb = aabbs_it.next().unwrap(); 284 | for _ in 0..8 { 285 | let isect = aabb.intersect(rays_it.next().unwrap()); 286 | test::black_box(isect.any()); 287 | } 288 | }); 289 | } 290 | 291 | #[bench] 292 | fn bench_intersect_8_aabbs_per_mray(b: &mut test::Bencher) { 293 | let rays = bench::mrays_inward(4096 / 8); 294 | let aabbs = bench::aabbs(4096); 295 | let mut rays_it = rays.iter().cycle(); 296 | let mut aabbs_it = aabbs.iter().cycle(); 297 | b.iter(|| { 298 | let ray = rays_it.next().unwrap(); 299 | for _ in 0..8 { 300 | let aabb = aabbs_it.next().unwrap(); 301 | let isect = aabb.intersect(ray); 302 | test::black_box(isect.any()); 303 | } 304 | }); 305 | } 306 | -------------------------------------------------------------------------------- /src/bench.rs: -------------------------------------------------------------------------------- 1 | // Convector -- An interactive CPU path tracer 2 | // Copyright 2016 Ruud van Asseldonk 3 | 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License version 3. A copy 6 | // of the License is available in the root of the repository. 7 | 8 | //! This module generates test data for the benchmarks. 9 | 10 | use aabb::Aabb; 11 | use material::SMaterial; 12 | use quaternion::{MQuaternion, SQuaternion}; 13 | use rand; 14 | use rand::Rng; 15 | use rand::distributions::{IndependentSample, Range}; 16 | use ray::{MRay, SRay}; 17 | use simd::Mf32; 18 | use std::f32::consts; 19 | use triangle::Triangle; 20 | use vector3::{MVector3, SVector3}; 21 | 22 | /// Generates n random Mf32s in the range [0, 1). 23 | pub fn mf32_unit(n: usize) -> Vec { 24 | let mut mf32s = Vec::with_capacity(n); 25 | let mut rng = rand::thread_rng(); 26 | let range = Range::new(0.0, 1.0); 27 | for _ in 0..n { 28 | mf32s.push(Mf32::generate(|_| range.ind_sample(&mut rng))); 29 | } 30 | mf32s 31 | } 32 | 33 | /// Generates n random Mf32s in the range [-1, 1). 34 | pub fn mf32_biunit(n: usize) -> Vec { 35 | let mut mf32s = Vec::with_capacity(n); 36 | let mut rng = rand::thread_rng(); 37 | let range = Range::new(-1.0, 1.0); 38 | for _ in 0..n { 39 | mf32s.push(Mf32::generate(|_| range.ind_sample(&mut rng))); 40 | } 41 | mf32s 42 | } 43 | 44 | /// Generates n vectors distributed uniformly on the unit sphere. 45 | pub fn svectors_on_unit_sphere(n: usize) -> Vec { 46 | let mut rng = rand::thread_rng(); 47 | let phi_range = Range::new(0.0, 2.0 * consts::PI); 48 | let cos_theta_range = Range::new(-1.0_f32, 1.0); 49 | let mut vectors = Vec::with_capacity(n); 50 | for _ in 0..n { 51 | let phi = phi_range.ind_sample(&mut rng); 52 | let theta = cos_theta_range.ind_sample(&mut rng).acos(); 53 | let vector = SVector3 { 54 | x: phi.cos() * theta.sin(), 55 | y: phi.sin() * theta.sin(), 56 | z: theta.cos(), 57 | }; 58 | vectors.push(vector); 59 | } 60 | vectors 61 | } 62 | 63 | /// Generates n times 8 vectors distributed uniformly on the unit sphere. 64 | pub fn mvectors_on_unit_sphere(n: usize) -> Vec { 65 | let mut vectors = Vec::with_capacity(n); 66 | for _ in 0..n { 67 | let p = svectors_on_unit_sphere(8); 68 | let x = Mf32::generate(|i| p[i].x); 69 | let y = Mf32::generate(|i| p[i].y); 70 | let z = Mf32::generate(|i| p[i].z); 71 | vectors.push(MVector3::new(x, y, z)); 72 | } 73 | vectors 74 | } 75 | 76 | /// Generates n quaternions uniformly distributed over the unit sphere. 77 | pub fn unit_squaternions(n: usize) -> Vec { 78 | let mut rng = rand::thread_rng(); 79 | let range = Range::new(-1.0_f32, 1.0); 80 | let mut quaternions = Vec::with_capacity(n); 81 | 82 | let mut i = 0; 83 | while i < n { 84 | let a = range.ind_sample(&mut rng); 85 | let b = range.ind_sample(&mut rng); 86 | let c = range.ind_sample(&mut rng); 87 | let d = range.ind_sample(&mut rng); 88 | 89 | // Use rejection sampling because I do not know how to sample a 4D unit 90 | // sphere uniformly. 91 | let norm_squared = a * a + b * b + c * c + d * d; 92 | if norm_squared > 1.0 { 93 | continue; 94 | } 95 | 96 | let norm = norm_squared.sqrt(); 97 | let q = SQuaternion::new(a / norm, b / norm, c / norm, d / norm); 98 | quaternions.push(q); 99 | 100 | i += 1; 101 | } 102 | 103 | quaternions 104 | } 105 | 106 | /// Generates n times 8 quaternions uniformly distributed over the unit sphere. 107 | pub fn unit_mquaternions(n: usize) -> Vec { 108 | let mut quaternions = Vec::with_capacity(n); 109 | for _ in 0..n { 110 | let q = unit_squaternions(8); 111 | let a = Mf32::generate(|i| q[i].a); 112 | let b = Mf32::generate(|i| q[i].b); 113 | let c = Mf32::generate(|i| q[i].c); 114 | let d = Mf32::generate(|i| q[i].d); 115 | quaternions.push(MQuaternion::new(a, b, c, d)); 116 | } 117 | quaternions 118 | } 119 | 120 | /// Generates n pairs of nonzero vectors. 121 | pub fn svector3_pairs(n: usize) -> Vec<(SVector3, SVector3)> { 122 | let mut a = svectors_on_unit_sphere(n); 123 | let mut b = svectors_on_unit_sphere(n); 124 | let pairs = a.drain(..).zip(b.drain(..)).collect(); 125 | pairs 126 | } 127 | 128 | /// Generates n times 8 pairs of nonzero vectors. 129 | pub fn mvector3_pairs(n: usize) -> Vec<(MVector3, MVector3)> { 130 | let mut a = mvectors_on_unit_sphere(n); 131 | let mut b = mvectors_on_unit_sphere(n); 132 | let pairs = a.drain(..).zip(b.drain(..)).collect(); 133 | pairs 134 | } 135 | 136 | /// Generates rays with origin on a sphere, pointing to the origin. 137 | pub fn srays_inward(radius: f32, n: usize) -> Vec { 138 | svectors_on_unit_sphere(n).iter().map(|&x| SRay::new(x * radius, -x)).collect() 139 | } 140 | 141 | /// Generates a random AABB and n rays of which m intersect the box. 142 | pub fn aabb_with_srays(n: usize, m: usize) -> (Aabb, Vec) { 143 | let origin = SVector3::new(-1.0, -1.0, -1.0); 144 | let far = SVector3::new(1.0, 1.0, 1.0); 145 | let aabb = Aabb::new(origin, far); 146 | let up = SVector3::new(0.0, 0.0, 1.0); 147 | let mut rays = srays_inward(16.0, n); 148 | 149 | // Offset the m-n rays that should not intersect the box in a direction 150 | // perpendicular to the ray. 151 | for i in m..n { 152 | let p = rays[i].origin + up.cross(rays[i].direction).normalized() * 16.0; 153 | rays[i].origin = p; 154 | } 155 | 156 | // Shuffle the intersecting and non-intersecting rays to confuse the branch 157 | // predictor. 158 | rand::thread_rng().shuffle(&mut rays[..]); 159 | 160 | (aabb, rays) 161 | } 162 | 163 | /// Generates a random AABB and n rays of which m intersect the box, 164 | /// packed per 8 rays. N must be a multiple of 8. 165 | pub fn aabb_with_mrays(n: usize, m: usize) -> (Aabb, Vec) { 166 | assert_eq!(0, n & 7); // Must be a multiple of 8. 167 | let (aabb, srays) = aabb_with_srays(n, m); 168 | let mrays = srays.chunks(8) 169 | .map(|rs| MRay::generate(|i| rs[i].clone())) 170 | .collect(); 171 | (aabb, mrays) 172 | } 173 | 174 | /// Generates n triangles with vertices on the unit sphere. 175 | pub fn triangles(n: usize) -> Vec { 176 | let v0s = svectors_on_unit_sphere(n); 177 | let v1s = svectors_on_unit_sphere(n); 178 | let v2s = svectors_on_unit_sphere(n); 179 | v0s.iter() 180 | .zip(v1s.iter().zip(v2s.iter())) 181 | .map(|(&v0, (&v1, &v2))| Triangle::new(v0, v1, v2, SMaterial::white())) 182 | .collect() 183 | } 184 | 185 | /// Generates n bounding boxes with two vertices on the unit sphere. 186 | pub fn aabbs(n: usize) -> Vec { 187 | let v0s = svectors_on_unit_sphere(n); 188 | let v1s = svectors_on_unit_sphere(n); 189 | v0s.iter() 190 | .zip(v1s.iter()) 191 | .map(|(&v0, &v1)| Aabb::new(SVector3::min(v0, v1), SVector3::max(v0, v1))) 192 | .collect() 193 | } 194 | 195 | /// Generates n mrays originating from a sphere of radius 10, pointing inward. 196 | pub fn mrays_inward(n: usize) -> Vec { 197 | let origins = mvectors_on_unit_sphere(n); 198 | let dests = mvectors_on_unit_sphere(n); 199 | origins.iter() 200 | .zip(dests.iter()) 201 | .map(|(&from, &to)| { 202 | let origin = from * Mf32::broadcast(10.0); 203 | let direction = (to - origin).normalized(); 204 | MRay::new(origin, direction) 205 | }) 206 | .collect() 207 | } 208 | 209 | /// Generates n mrays originating from a sphere of radius 10, pointing inward. 210 | /// The rays share the origin and point roughly in the same direction. 211 | pub fn mrays_inward_coherent(n: usize) -> Vec { 212 | let origins = svectors_on_unit_sphere(n); 213 | let dests = mvectors_on_unit_sphere(n); 214 | origins.iter() 215 | .zip(dests.iter()) 216 | .map(|(&from, &to)| { 217 | let origin = MVector3::broadcast(from * 10.0); 218 | let dest = to * Mf32::broadcast(0.5); 219 | let direction = (dest - origin).normalized(); 220 | MRay::new(origin, direction) 221 | }) 222 | .collect() 223 | } 224 | 225 | #[test] 226 | fn aabb_with_srays_respects_probability() { 227 | let (aabb, rays) = aabb_with_srays(4096, 2048); 228 | let mut n = 0; 229 | for ray in &rays { 230 | let mray = MRay::broadcast(ray); 231 | if aabb.intersect(&mray).any() { 232 | n += 1; 233 | } 234 | } 235 | assert_eq!(2048, n); 236 | } 237 | -------------------------------------------------------------------------------- /src/gpu/blend.glsl: -------------------------------------------------------------------------------- 1 | // Convector -- An interactive CPU path tracer 2 | // Copyright 2016 Ruud van Asseldonk 3 | 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License version 3. A copy 6 | // of the License is available in the root of the repository. 7 | 8 | #version 140 9 | 10 | in vec2 v_tex_coords; 11 | out vec4 color; 12 | 13 | uniform sampler2D frame0; 14 | uniform sampler2D frame1; 15 | uniform sampler2D frame2; 16 | uniform sampler2D frame3; 17 | uniform sampler2D frame4; 18 | uniform sampler2D frame5; 19 | uniform sampler2D frame6; 20 | uniform sampler2D frame7; 21 | 22 | void main() { 23 | vec4 c0 = texture(frame0, v_tex_coords); 24 | vec4 c1 = texture(frame1, v_tex_coords); 25 | vec4 c2 = texture(frame2, v_tex_coords); 26 | vec4 c3 = texture(frame3, v_tex_coords); 27 | vec4 c4 = texture(frame4, v_tex_coords); 28 | vec4 c5 = texture(frame5, v_tex_coords); 29 | vec4 c6 = texture(frame6, v_tex_coords); 30 | vec4 c7 = texture(frame7, v_tex_coords); 31 | 32 | // Take the mean of the eight frames. 33 | color = (c0 + c1 + c2 + c3 + c4 + c5 + c6 + c7) * 0.125f; 34 | color.a = 1.0f; 35 | } 36 | -------------------------------------------------------------------------------- /src/gpu/gbuffer.glsl: -------------------------------------------------------------------------------- 1 | // Convector -- An interactive CPU path tracer 2 | // Copyright 2016 Ruud van Asseldonk 3 | 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License version 3. A copy 6 | // of the License is available in the root of the repository. 7 | 8 | #version 140 9 | 10 | in vec2 v_tex_coords; 11 | out vec4 color; 12 | 13 | uniform sampler2D frame; 14 | uniform sampler2D gbuffer; 15 | uniform sampler2D texture1; 16 | uniform sampler2D texture2; 17 | 18 | void main() { 19 | color = texture(frame, v_tex_coords); 20 | vec4 data = texture(gbuffer, v_tex_coords); 21 | 22 | float fresnel = data.b; 23 | vec4 white = vec4(1.0f, 1.0f, 1.0f, 1.0f); 24 | 25 | // The alpha channel contains the texture index. Texture index 0 indicates 26 | // that the texture is not used, so the pixel is already correct. For the 27 | // other textures, sample them and blend according to the Fresnel factor. 28 | 29 | if (data.a == 1.0f / 255.0f) { 30 | vec4 tex_color = texture(texture1, data.xy); 31 | vec4 surface_color = white * fresnel + tex_color * (1.0f - fresnel); 32 | color = color * surface_color; 33 | } 34 | 35 | if (data.a == 2.0f / 255.0f) { 36 | vec4 tex_color = texture(texture2, data.xy); 37 | vec4 surface_color = white * fresnel + tex_color * (1.0f - fresnel); 38 | color = color * surface_color; 39 | } 40 | 41 | // Texture index 3 is currently not used. 42 | } 43 | -------------------------------------------------------------------------------- /src/gpu/id.glsl: -------------------------------------------------------------------------------- 1 | // Convector -- An interactive CPU path tracer 2 | // Copyright 2016 Ruud van Asseldonk 3 | 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License version 3. A copy 6 | // of the License is available in the root of the repository. 7 | 8 | #version 140 9 | 10 | in vec2 v_tex_coords; 11 | out vec4 color; 12 | 13 | uniform sampler2D frame; 14 | 15 | void main() { 16 | color = texture(frame, v_tex_coords); 17 | } 18 | -------------------------------------------------------------------------------- /src/gpu/median.glsl: -------------------------------------------------------------------------------- 1 | // Convector -- An interactive CPU path tracer 2 | // Copyright 2016 Ruud van Asseldonk 3 | 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License version 3. A copy 6 | // of the License is available in the root of the repository. 7 | 8 | #version 140 9 | 10 | in vec2 v_tex_coords; 11 | out vec4 color; 12 | 13 | uniform sampler2D frame; 14 | uniform vec2 pixel_size; 15 | 16 | void sort2(inout vec4 a0, inout vec4 a1) { 17 | vec4 b0 = min(a0, a1); 18 | vec4 b1 = max(a0, a1); 19 | a0 = b0; 20 | a1 = b1; 21 | } 22 | 23 | void sort(inout vec4 a0, inout vec4 a1, inout vec4 a2, inout vec4 a3, inout vec4 a4) { 24 | sort2(a0, a1); 25 | sort2(a3, a4); 26 | sort2(a0, a2); 27 | sort2(a1, a2); 28 | sort2(a0, a3); 29 | sort2(a2, a3); 30 | sort2(a1, a4); 31 | sort2(a1, a2); 32 | sort2(a3, a4); 33 | } 34 | 35 | vec4 rgb_to_xyz(vec4 c) { 36 | mat3 conv = mat3(0.49f, 0.17697f, 0.0f, 37 | 0.31f, 0.81240f, 0.01f, 38 | 0.20f, 0.01063f, 0.99f) * (1.0f / 0.17697f); 39 | c.xyz = conv * c.rgb; 40 | return c; 41 | } 42 | 43 | vec4 xyz_to_rgb(vec4 c) { 44 | mat3 conv = mat3(0.41847f, -0.091169f, 0.00092090f, 45 | -0.15866f, 0.25243f, -0.0025498f, 46 | -0.082835, 0.015708, 0.17860); 47 | c.rgb = conv * c.xyz; 48 | return c; 49 | } 50 | 51 | void main() { 52 | // Sample 5 pixels in a "+" shape. 53 | vec4 c0 = texture(frame, v_tex_coords); 54 | vec4 c1 = texture(frame, v_tex_coords + vec2(pixel_size.x, 0.0f)); 55 | vec4 c2 = texture(frame, v_tex_coords + vec2(0.0f, pixel_size.y)); 56 | vec4 c3 = texture(frame, v_tex_coords - vec2(pixel_size.x, 0.0f)); 57 | vec4 c4 = texture(frame, v_tex_coords - vec2(0.0f, pixel_size.y)); 58 | 59 | // Convert all the pixels from CIE 1931 to the CIE XYZ color space before 60 | // taking the median. This ensures that lightness is better preserved. 61 | c0 = rgb_to_xyz(c0); 62 | c1 = rgb_to_xyz(c1); 63 | c2 = rgb_to_xyz(c2); 64 | c3 = rgb_to_xyz(c3); 65 | c4 = rgb_to_xyz(c4); 66 | 67 | // Take the sort-of-median of those pixels. The true median is c2, but do 68 | // weigh in a bit of the other pixels as well for a more balanced result. 69 | sort(c0, c1, c2, c3, c4); 70 | vec4 median = c2 * 0.667f + c1 * 0.1666f + c3 * 0.1666f; 71 | 72 | // Convert back from CIE XYZ to CIE 1931 (which is a linear RGB color 73 | // space). 74 | color = xyz_to_rgb(median); 75 | } 76 | -------------------------------------------------------------------------------- /src/gpu/vertex.glsl: -------------------------------------------------------------------------------- 1 | // Convector -- An interactive CPU path tracer 2 | // Copyright 2016 Ruud van Asseldonk 3 | 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License version 3. A copy 6 | // of the License is available in the root of the repository. 7 | 8 | #version 140 9 | 10 | in vec2 position; 11 | in vec2 tex_coords; 12 | out vec2 v_tex_coords; 13 | 14 | void main() { 15 | gl_Position = vec4(position, 0.0, 1.0); 16 | v_tex_coords = tex_coords; 17 | } 18 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | // Convector -- An interactive CPU path tracer 2 | // Copyright 2016 Ruud van Asseldonk 3 | 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License version 3. A copy 6 | // of the License is available in the root of the repository. 7 | 8 | //! An interactive path tracer. 9 | 10 | #![allow(dead_code)] // TODO: Remove at some point. 11 | 12 | #![feature(alloc, cfg_target_feature, heap_api, platform_intrinsics, repr_simd, test)] 13 | 14 | extern crate alloc; 15 | extern crate filebuffer; 16 | extern crate imagefmt; 17 | extern crate num_cpus; 18 | extern crate rand; 19 | extern crate rayon; 20 | extern crate scoped_threadpool; 21 | extern crate test; 22 | extern crate thread_id; 23 | extern crate time; 24 | 25 | #[macro_use] 26 | extern crate glium; 27 | 28 | mod aabb; 29 | mod bvh; 30 | mod material; 31 | mod quaternion; 32 | mod random; 33 | mod ray; 34 | mod renderer; 35 | mod scene; 36 | mod simd; 37 | mod stats; 38 | mod trace; 39 | mod triangle; 40 | mod ui; 41 | mod util; 42 | mod vector3; 43 | mod wavefront; 44 | 45 | #[cfg(test)] 46 | mod bench; 47 | 48 | use material::SMaterial; 49 | use renderer::{RenderBuffer, Renderer}; 50 | use scene::Scene; 51 | use stats::GlobalStats; 52 | use std::collections::HashMap; 53 | use std::mem; 54 | use time::PreciseTime; 55 | use ui::{Action, Window}; 56 | use wavefront::Mesh; 57 | 58 | fn load_textures() -> Vec> { 59 | use imagefmt::ColFmt; 60 | 61 | println!("loading textures"); 62 | let tex_floor = imagefmt::read("textures/floor.jpg", ColFmt::RGB); 63 | let tex_wood = imagefmt::read("textures/wood_light.jpg", ColFmt::RGB); 64 | let mut textures = Vec::with_capacity(2); 65 | textures.push(tex_floor.expect("failed to read floor.jpeg").buf); 66 | textures.push(tex_wood.expect("failed to read wood_light.jpg").buf); 67 | textures 68 | } 69 | 70 | fn build_scene() -> Scene { 71 | println!("loading geometry"); 72 | let mut materials = HashMap::new(); 73 | materials.insert("baseboard", SMaterial::white().with_glossiness(4)); 74 | materials.insert("ceiling", SMaterial::white().with_glossiness(1)); 75 | materials.insert("fauteuil", SMaterial::diffuse(1.0, 0.1, 0.4)); 76 | materials.insert("floor", SMaterial::diffuse(0.569, 0.494, 0.345).with_glossiness(4).with_texture(1)); 77 | materials.insert("glass", SMaterial::sky()); 78 | materials.insert("wall", SMaterial::diffuse(0.65, 0.7, 0.9).with_glossiness(1)); 79 | materials.insert("wood_light", SMaterial::diffuse(0.6, 0.533, 0.455).with_glossiness(3).with_texture(2)); 80 | let indoor = Mesh::load_with_materials("models/indoor.obj", &materials); 81 | let meshes = [indoor]; 82 | 83 | println!("building bvh"); 84 | let scene = Scene::from_meshes(&meshes); 85 | scene.print_stats(); 86 | 87 | scene 88 | } 89 | 90 | fn main() { 91 | // The patch size has been tuned for 8 cores. With a resolution of 1280x736 there are 920 92 | // patches to be rendered by the worker pool. Increasing the patch size to 64 results in 230 93 | // patches, but some patches are very heavy to render and some are practically a no-op, so all 94 | // threads might stall because one thread did not yet finish the frame. A patch width of 32 is 95 | // a good balance between throughput and latency. 96 | let width = 1280; 97 | let height = 736; 98 | let patch_width = 32; 99 | 100 | let mut window = Window::new(width, height, "Convector interactive path tracer"); 101 | let mut renderer = Renderer::new(build_scene(), width, height); 102 | let mut stats = GlobalStats::new(); 103 | let mut trace_log = trace::TraceLog::with_limit(6 * 1024); 104 | let mut threadpool = scoped_threadpool::Pool::new(num_cpus::get() as u32); 105 | let mut backbuffer = RenderBuffer::new(width, height); 106 | let mut backbuffer_g = RenderBuffer::new(width, height); 107 | let mut f32_buffer = renderer.new_buffer_f32(); // TODO: Consistency. 108 | let mut f32_buffer_samples = 0; 109 | let mut should_continue = true; 110 | let mut render_realtime = true; 111 | 112 | for texture in load_textures() { 113 | window.upload_texture(texture); 114 | } 115 | 116 | backbuffer.fill_black(); 117 | let epoch = PreciseTime::now(); 118 | 119 | // Insert one fake value so we have an initial guess for the time delta. 120 | stats.frame_us.insert(16_667); 121 | 122 | println!("scene and renderer initialized, entering render loop"); 123 | 124 | while should_continue { 125 | let frame_number = trace_log.inc_frame_number(); 126 | let stw_frame = trace_log.scoped("render_frame", 0); 127 | 128 | let time = epoch.to(PreciseTime::now()).num_milliseconds() as f32 * 1e-3; 129 | let time_delta = (stats.frame_us.median() as f32) * 1e-6; 130 | 131 | match window.handle_events() { 132 | Action::DumpTrace => { 133 | trace_log.export_to_file("trace.json").expect("failed to write trace"); 134 | println!("wrote trace to trace.json"); 135 | } 136 | Action::Quit => should_continue = false, 137 | Action::PrintStats => stats.print(), 138 | Action::ToggleDebugView => renderer.toggle_debug_view(), 139 | Action::ToggleRealtime => { 140 | render_realtime = !render_realtime; 141 | f32_buffer = renderer.new_buffer_f32(); 142 | f32_buffer_samples = 0; 143 | // In accumulative mode the time is fixed and there is no motion 144 | // blur. 145 | renderer.set_time(time, 0.0); 146 | } 147 | Action::None => {} 148 | } 149 | 150 | if render_realtime { 151 | renderer.set_time(time, time_delta); 152 | } 153 | renderer.update_scene(); 154 | 155 | // When rendering in accumulation mode, first copy the current state 156 | // into the backbuffer (which will immediately after this become the new 157 | // front buffer) so we can display it later. 158 | if !render_realtime { 159 | let n = if f32_buffer_samples > 0 { f32_buffer_samples } else { 1 }; 160 | renderer.buffer_f32_into_render_buffer(&f32_buffer, &mut backbuffer, n); 161 | f32_buffer_samples += 1; 162 | } 163 | 164 | let new_backbuffer = RenderBuffer::new(width, height); 165 | let new_backbuffer_g = RenderBuffer::new(width, height); 166 | let frontbuffer = mem::replace(&mut backbuffer, new_backbuffer); 167 | let frontbuffer_g = mem::replace(&mut backbuffer_g, new_backbuffer_g); 168 | let renderer_ref = &renderer; 169 | let trace_log_ref = &trace_log; 170 | let backbuffer_ref = &backbuffer; 171 | let backbuffer_g_ref = &backbuffer_g; 172 | let f32_buffer_ref = &f32_buffer[..]; 173 | 174 | threadpool.scoped(|scope| { 175 | 176 | let w = width / patch_width; 177 | let h = height / patch_width; 178 | 179 | // Queue tasks for the worker threads to render patches. 180 | for i in 0..w { 181 | for j in 0..h { 182 | scope.execute(move || { 183 | let x = i * patch_width; 184 | let y = j * patch_width; 185 | 186 | // Multiple threads mutably borrow the buffer below, 187 | // which could cause races, but all of the patches are 188 | // disjoint, hence it is safe. 189 | 190 | if render_realtime { 191 | let _stw = trace_log_ref.scoped("render_patch_u8", j * w + i); 192 | let bitmap = unsafe { backbuffer_ref.get_mut_slice() }; 193 | let gbuffer = unsafe { backbuffer_g_ref.get_mut_slice() }; 194 | renderer_ref.render_patch_u8(bitmap, gbuffer, patch_width, x, y, frame_number); 195 | } else { 196 | let _stw = trace_log_ref.scoped("accumulate_patch_f32", j * w + i); 197 | let buffer = unsafe { util::make_mutable(f32_buffer_ref) }; 198 | let gbuffer = unsafe { backbuffer_g_ref.get_mut_slice() }; 199 | renderer_ref.accumulate_patch_f32(buffer, gbuffer, patch_width, x, y, frame_number); 200 | } 201 | }); 202 | } 203 | } 204 | 205 | // In the mean time upload the previous frame to the GPU 206 | // and display it. 207 | let _stw_display = trace_log.scoped("display_buffer", 0); 208 | window.display_buffer(frontbuffer.into_bitmap(), 209 | frontbuffer_g.into_bitmap(), 210 | &mut stats); 211 | 212 | // The scope automatically waits for all tasks to complete 213 | // before the loop continues. 214 | }); 215 | 216 | stats.frame_us.insert_time_us(stw_frame.take_duration()); 217 | } 218 | } 219 | -------------------------------------------------------------------------------- /src/quaternion.rs: -------------------------------------------------------------------------------- 1 | // Convector -- An interactive CPU path tracer 2 | // Copyright 2016 Ruud van Asseldonk 3 | 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License version 3. A copy 6 | // of the License is available in the root of the repository. 7 | 8 | //! Implements quaternion utilities to handle rotation. 9 | 10 | use simd::Mf32; 11 | use vector3::MVector3; 12 | 13 | #[cfg(test)] 14 | use {bench, test}; 15 | 16 | #[derive(Copy, Clone, Debug)] 17 | pub struct SQuaternion { 18 | pub a: f32, 19 | pub b: f32, 20 | pub c: f32, 21 | pub d: f32, 22 | } 23 | 24 | pub struct MQuaternion { 25 | pub a: Mf32, 26 | pub b: Mf32, 27 | pub c: Mf32, 28 | pub d: Mf32, 29 | } 30 | 31 | impl SQuaternion { 32 | pub fn new(a: f32, b: f32, c: f32, d: f32) -> SQuaternion { 33 | SQuaternion { 34 | a: a, 35 | b: b, 36 | c: c, 37 | d: d, 38 | } 39 | } 40 | } 41 | 42 | impl MQuaternion { 43 | pub fn new(a: Mf32, b: Mf32, c: Mf32, d: Mf32) -> MQuaternion { 44 | MQuaternion { 45 | a: a, 46 | b: b, 47 | c: c, 48 | d: d, 49 | } 50 | } 51 | 52 | pub fn broadcast(q: SQuaternion) -> MQuaternion { 53 | MQuaternion { 54 | a: Mf32::broadcast(q.a), 55 | b: Mf32::broadcast(q.b), 56 | c: Mf32::broadcast(q.c), 57 | d: Mf32::broadcast(q.d), 58 | } 59 | } 60 | 61 | /// Interpolates two quaternions and normalizes the result. 62 | pub fn interpolate(&self, delta: &MQuaternion, t: Mf32) -> MQuaternion { 63 | // The hypersphere of unit quaternions forms a double cover of SO3(R). 64 | // Every rotation is represented by two antipodal points on the 65 | // hypersphere. If we naively run over the arc subtended by the two 66 | // quaternions, then we could make an arc of more than pi/2 radians, but 67 | // that means that we could make a shorter arc by taking the antipodal 68 | // point of one of the quaternions. The shortest arc corresponds to the 69 | // interpolation we want, the longer arc rotates too much. So for 70 | // correct interpolation, compute the dot product of the two 71 | // quaternions, and if it is negative, negate one of the two. 72 | // Fortunately, in my demo I get to pick the quaternions, so I can 73 | // choose them so they get interpolated correctly, and there is no need 74 | // to negate anything. 75 | 76 | // Interpolate linearly between the two quaternions, and then project 77 | // the result onto the unit hypersphere. This is not entirely correct 78 | // because the rotation will not have a constant angular velocity. For a 79 | // proper interpolation with constant velocity, a spherical linear 80 | // interpolation is required, but that is expensive to compute. (It 81 | // involves an inverse cosine, two sines and two divisions.) For small 82 | // angles the error is very small, so do the fast thing here. 83 | let a = delta.a.mul_add(t, self.a); 84 | let b = delta.b.mul_add(t, self.b); 85 | let c = delta.c.mul_add(t, self.c); 86 | let d = delta.d.mul_add(t, self.d); 87 | 88 | let norm_squared = a.mul_add(a, b * b) + c.mul_add(c, d * d); 89 | 90 | // Using a full square root and division here makes this method about 91 | // 17% slower in comparison to using an `rsqrt()`. However, this is also 92 | // more accurate. The `rsqrt()` approach has a relatively big error, and 93 | // as this code is used to generate camera rays, it had better be 94 | // accurate. If after a few bounces the ray direction norm is 1.01, then 95 | // that will result in wrong intersection tests, but the difference is 96 | // probably not noticeable due to randomness anyway. However, the first 97 | // intersection should be correct, otherwise the geometry gets 98 | // distorted. Therefore the camera rays must be accurate. 99 | let rnorm = Mf32::one() / norm_squared.sqrt(); 100 | 101 | MQuaternion { 102 | a: a * rnorm, 103 | b: b * rnorm, 104 | c: c * rnorm, 105 | d: d * rnorm, 106 | } 107 | } 108 | } 109 | 110 | pub fn rotate(vector: &MVector3, rotation: &MQuaternion) -> MVector3 { 111 | let v = vector; 112 | let q = rotation; 113 | 114 | // For a unit quaternion q and a vector in R3 identified with the subspace 115 | // of the quaternion algebra spanned by (i, j, k), the rotated vector is 116 | // given by q * v * q^-1. (And because q is a unit quaternion, its inverse 117 | // is its conjugate.) This means that we can compute the rotation in two 118 | // steps: p = v * q^-1, and q * p. The first step is simpler than generic 119 | // quaternion multiplication because we know that v is pure imaginary. The 120 | // second step simpler than generic quaternion multiplication because we know 121 | // that the result is pure imaginary, so the real component does not have to 122 | // be computed. 123 | 124 | // For q = a + b*i + c*j + d*k and v = x*i + y*j + c*z, v * q^-1 is given 125 | // by 126 | // 127 | // b*x + c*y + d*z + 128 | // ((a - b)*x + (c - d)*(y + z) + b*x - c*y + d*z)*i + 129 | // (d*x + a*y - b*z)*j + 130 | // (-(c + d)*x + (a + b)*(y + z) + d*x - a*y - b*z)*k 131 | // 132 | // I did not bother with using `mul_add` or eliminating common 133 | // subexpressions below because the code is unreadable enough as it is ... 134 | 135 | let pa = q.b * v.x + q.c * v.y + q.d * v.z; 136 | let pb = q.b * v.x - q.c * v.y + q.d * v.z + (q.a - q.b) * v.x + (q.c - q.d) * (v.y + v.z); 137 | let pc = q.d * v.x + q.a * v.y - q.b * v.z; 138 | let pd = q.d * v.x - q.a * v.y - q.b * v.z - (q.c + q.d) * v.x + (q.a + q.b) * (v.y + v.z); 139 | 140 | // The product of q = qa + qb*i + qc*j + qd*k and 141 | // p = pa + pb*i + pc*j + pd*k is given by 142 | // 143 | // pa*qa - pb*qb - pc*qc - pd*qd + 144 | // ((pa + pb)*(qa + qb) - (pc - pd)*(qc + qd) - pa*qa - pb*qb + pc*qc - pd*qd)*i + 145 | // (pc*qa - pd*qb + pa*qc + pb*qd)*j + 146 | // ((pc + pd)*(qa + qb) + (pa - pb)*(qc + qd) - pc*qa - pd*qb - pa*qc + pb*qd)*k 147 | 148 | let rb = (pa + pb) * (q.a + q.b) - (pc - pd) * (q.c + q.d) - pa * q.a - pb * q.b + pc * q.c - pd * q.d; 149 | let rc = pc * q.a - pd * q.b + pa * q.c + pb * q.d; 150 | let rd = (pc + pd) * (q.a + q.b) + (pa - pb) * (q.c + q.d) - pc * q.a - pd * q.b - pa * q.c + pb * q.d; 151 | 152 | MVector3::new(rb, rc, rd) 153 | } 154 | 155 | #[cfg(test)] 156 | fn assert_mvectors_equal(expected: MVector3, computed: MVector3, margin: f32) { 157 | // Test that the vectors are equal, to within floating point inaccuracy 158 | // margins. 159 | let error = (computed - expected).norm_squared(); 160 | assert!((Mf32::broadcast(margin * margin) - error).all_sign_bits_positive(), 161 | "expected: ({}, {}, {}), computed: ({}, {}, {})", 162 | expected.x.0, expected.y.0, expected.z.0, 163 | computed.x.0, computed.y.0, computed.z.0); 164 | } 165 | 166 | #[test] 167 | fn rotate_identity() { 168 | let identity = SQuaternion::new(1.0, 0.0, 0.0, 0.0); 169 | let vectors = bench::mvectors_on_unit_sphere(32); 170 | for v in &vectors { 171 | assert_mvectors_equal(*v, rotate(v, &MQuaternion::broadcast(identity)), 1e-7); 172 | } 173 | } 174 | 175 | #[test] 176 | fn rotate_x() { 177 | let half_sqrt_2 = 0.5 * 2.0_f32.sqrt(); 178 | let rotation = SQuaternion::new(half_sqrt_2, half_sqrt_2, 0.0, 0.0); 179 | let vectors = bench::mvectors_on_unit_sphere(32); 180 | for v in &vectors { 181 | // Rotate the vector by pi/2 radians around the x-axis. This is 182 | // equivalent to y <- -z, z <- y, so compute the rotation in two 183 | // different ways, and verify that the result is the same to within the 184 | // floating point inaccuracy margin. 185 | let computed = rotate(v, &MQuaternion::broadcast(rotation)); 186 | let expected = MVector3::new(v.x, -v.z, v.y); 187 | assert_mvectors_equal(expected, computed, 1e-6); 188 | } 189 | } 190 | 191 | #[test] 192 | fn rotate_y() { 193 | let half_sqrt_2 = 0.5 * 2.0_f32.sqrt(); 194 | let rotation = SQuaternion::new(half_sqrt_2, 0.0, half_sqrt_2, 0.0); 195 | let vectors = bench::mvectors_on_unit_sphere(32); 196 | for v in &vectors { 197 | // Rotate the vector by pi/2 radians around the y-axis. This is 198 | // equivalent to x <- z, z <- -x, so compute the rotation in two 199 | // different ways, and verify that the result is the same to within the 200 | // floating point inaccuracy margin. 201 | let computed = rotate(v, &MQuaternion::broadcast(rotation)); 202 | let expected = MVector3::new(v.z, v.y, -v.x); 203 | assert_mvectors_equal(expected, computed, 1e-6); 204 | } 205 | } 206 | 207 | #[test] 208 | fn rotate_z() { 209 | let half_sqrt_2 = 0.5 * 2.0_f32.sqrt(); 210 | let rotation = SQuaternion::new(half_sqrt_2, 0.0, 0.0, half_sqrt_2); 211 | let vectors = bench::mvectors_on_unit_sphere(32); 212 | for v in &vectors { 213 | // Rotate the vector by pi/2 radians around the y-axis. This is 214 | // equivalent to y <- x, x <- -y, so compute the rotation in two 215 | // different ways, and verify that the result is the same to within the 216 | // floating point inaccuracy margin. 217 | let computed = rotate(v, &MQuaternion::broadcast(rotation)); 218 | let expected = MVector3::new(-v.y, v.x, v.z); 219 | assert_mvectors_equal(expected, computed, 1e-6); 220 | } 221 | } 222 | 223 | #[test] 224 | fn interpolate() { 225 | use vector3::SVector3; 226 | let half_sqrt_2 = 0.5 * 2.0_f32.sqrt(); 227 | let identity = MQuaternion::broadcast(SQuaternion::new(1.0, 0.0, 0.0, 0.0)); 228 | let rotate_z_delta = MQuaternion::broadcast(SQuaternion::new(half_sqrt_2 - 1.0, 0.0, 0.0, half_sqrt_2)); 229 | let rotation = identity.interpolate(&rotate_z_delta, Mf32::broadcast(0.5)); 230 | let v = MVector3::broadcast(SVector3::new(1.0, 0.0, 0.0)); 231 | let expected = MVector3::broadcast(SVector3::new(half_sqrt_2, half_sqrt_2, 0.0)); 232 | let computed = rotate(&v, &rotation); 233 | assert_mvectors_equal(expected, computed, 1e-6); 234 | } 235 | 236 | macro_rules! unroll_10 { 237 | { $x: block } => { 238 | $x $x $x $x $x $x $x $x $x $x 239 | } 240 | } 241 | 242 | #[bench] 243 | fn bench_rotate_1000(b: &mut test::Bencher) { 244 | let vectors = bench::mvectors_on_unit_sphere(4096 / 8); 245 | let quaternions = bench::unit_mquaternions(4096 / 8); 246 | let mut it = vectors.iter().cycle().zip(quaternions.iter().cycle()); 247 | b.iter(|| { 248 | let (v, q) = it.next().unwrap(); 249 | for _ in 0..100 { 250 | unroll_10! {{ 251 | test::black_box(rotate(test::black_box(v), test::black_box(q))); 252 | }}; 253 | } 254 | }); 255 | } 256 | 257 | #[bench] 258 | fn bench_interpolate_1000(b: &mut test::Bencher) { 259 | let q0s = bench::unit_mquaternions(4096 / 8); 260 | let q1s = bench::unit_mquaternions(4096 / 8); 261 | let ts = bench::mf32_unit(4096 / 8); 262 | let mut it = q0s.iter().cycle().zip(q1s.iter().cycle()).zip(ts.iter().cycle()); 263 | b.iter(|| { 264 | let ((q0, q1), &t) = it.next().unwrap(); 265 | for _ in 0..100 { 266 | unroll_10! {{ 267 | test::black_box( 268 | test::black_box(q0) 269 | .interpolate(test::black_box(q1), test::black_box(t))); 270 | }}; 271 | } 272 | }); 273 | } 274 | -------------------------------------------------------------------------------- /src/random.rs: -------------------------------------------------------------------------------- 1 | // Convector -- An interactive CPU path tracer 2 | // Copyright 2016 Ruud van Asseldonk 3 | 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License version 3. A copy 6 | // of the License is available in the root of the repository. 7 | 8 | //! Functions for generating random numbers fast. 9 | //! 10 | //! To do Monte Carlo integration you need random numbers. Lots of them, but not 11 | //! necessarily high-quality random numbers. Not online casino or cryptography- 12 | //! grade random numbers. So it is possible to do a lot better than conventional 13 | //! RNGs. 14 | 15 | use simd::{Mf32, Mi32, Mu64}; 16 | use std::f32::consts; 17 | use std::i32; 18 | use vector3::MVector3; 19 | 20 | #[cfg(test)] 21 | use test; 22 | 23 | // A theorem that is used intensively in this file: if n and m are coprime, then 24 | // the map x -> n * x is a bijection of Z/mZ. In practice m is a power of two 25 | // (2^64 in this case), so anything not divisible by two will do for n, but we 26 | // might as well take a prime. 27 | // 28 | // With that you can build a simple and fast hash function for integers: 29 | // multiply with a number coprime to 2. On a computer you get the "modulo a 30 | // power of two" for free. For more details on why this works pretty well, 31 | // Knuth has an entire section devoted to it in Volume 3 of TAOCP. 32 | 33 | pub struct Rng { 34 | state: Mu64, 35 | } 36 | 37 | impl Rng { 38 | /// Creates a new random number generator. 39 | /// 40 | /// The generator is seeded from three 32-bit integers, suggestively called 41 | /// x, y, and i (for frame number). These three values are hashed together, 42 | /// and that is used as the seed. 43 | pub fn with_seed(x: u32, y: u32, i: u32) -> Rng { 44 | // The constants here are all primes. It is important that the four 45 | // values in the final multiplication are distinct, otherwise the 46 | // sequences will produce the same values. Also, the primes should not 47 | // be close together, otherwise correlations will be apparent. The 48 | // values `x`, `y`, and `i` are hashed with different functions to 49 | // ensure that a permutation of (x, y, i) results in a different seed, 50 | // otherwise patterns would appear because the range of x and y is 51 | // similar. 52 | let a = (x as u64).wrapping_mul(12276630456901467871); 53 | let b = (y as u64).wrapping_mul(7661526868048087387); 54 | let c = (i as u64).wrapping_mul(2268244495640532043); 55 | let seed = a.wrapping_add(b).wrapping_add(c); 56 | 57 | // If I only use the above scheme, the seed has a severe bias modulo 58 | // small powers of two. (For instance, x and y are always multiples of 59 | // 16 and 4, so modulo 8, a + b is always 0 or 4.) To avoid this, take 60 | // the seed modulo a prime. This removes the correlation modulo small 61 | // powers of two. 62 | let seed = seed.wrapping_add(seed % 9358246936573323101); 63 | 64 | let primes = Mu64(14491630826648200009, 65 | 13149596372461506851, 66 | 6119410235796056053, 67 | 14990141545859273719); 68 | 69 | Rng { state: Mu64(seed, seed, seed, seed) * primes } 70 | } 71 | 72 | /// Updates the state and returns the old state. 73 | fn next(&mut self) -> Mu64 { 74 | let old_state = self.state; 75 | 76 | // Again, this is really nothing more than iteratively hashing the 77 | // state. It is faster than e.g. xorshift, and the quality of the 78 | // random numbers is still good enough. To demonstrate that it is 79 | // sufficient that the factor is coprime to 2 I picked a composite 80 | // number here. Try multiplying it by two and observe how the state 81 | // reaches 0 after a few iterations. 82 | 83 | let f1 = 3 * 1073243692214514217; 84 | let f2 = 5 * 3335100457702756523; 85 | let f3 = 7 * 8789056573444181; 86 | let f4 = 11 * 781436371140792079; 87 | self.state = self.state * Mu64(f1, f2, f3, f4); 88 | 89 | old_state 90 | } 91 | 92 | /// Returns 8 random 32-bit integers. 93 | /// 94 | /// Note: a sequence of generated numbers is not random modulo small 95 | /// composite numbers. Take the high order bits of this random number to 96 | /// avoid bias and correlations. 97 | pub fn sample_u32(&mut self) -> [u32; 8] { 98 | use std::mem::transmute_copy; 99 | // Note: using a `transmute` instead of `transmute_copy` can cause a 100 | // segmentation fault. See https://github.com/rust-lang/rust/issues/32947. 101 | unsafe { transmute_copy(&self.next()) } 102 | } 103 | 104 | /// Returns 8 random numbers distributed uniformly over the half-open 105 | /// interval [0, 1). 106 | pub fn sample_unit(&mut self) -> Mf32 { 107 | use std::mem::transmute; 108 | 109 | let mi32: Mi32 = unsafe { transmute(self.next()) }; 110 | let range = Mf32::broadcast(0.5 / i32::MIN as f32); 111 | let half = Mf32::broadcast(0.5); 112 | 113 | mi32.into_mf32().mul_add(range, half) 114 | } 115 | 116 | /// Returns 8 random numbers distributed uniformly over the half-open 117 | /// interval [-1, 1). 118 | pub fn sample_biunit(&mut self) -> Mf32 { 119 | use std::mem::transmute; 120 | 121 | let mi32: Mi32 = unsafe { transmute(self.next()) }; 122 | let range = Mf32::broadcast(1.0 / i32::MIN as f32); 123 | 124 | mi32.into_mf32() * range 125 | } 126 | 127 | /// Returns 8 random numbers distributed uniformly over the half-open 128 | /// interval [-pi, pi). 129 | pub fn sample_angle(&mut self) -> Mf32 { 130 | use std::mem::transmute; 131 | 132 | let mi32: Mi32 = unsafe { transmute(self.next()) }; 133 | let range = Mf32::broadcast(consts::PI / i32::MIN as f32); 134 | 135 | mi32.into_mf32() * range 136 | } 137 | 138 | /// Returns a random unit vector in the hemisphere around the positive 139 | /// z-axis, drawn from a cosine-weighted distribution. 140 | pub fn sample_hemisphere_vector(&mut self) -> MVector3 { 141 | let phi = self.sample_angle(); 142 | let r_sqr = self.sample_unit(); 143 | 144 | // Instead of the full square root, we could also do a fast inverse 145 | // square root approximation and a reciprocal approximation. It is less 146 | // precise, but according to the Intel intrinsics guide, that would take 147 | // 14 cycles instead of 21. However, we need to compute the polynomials 148 | // for sin and cos anyway and that takes time, so it is not a problem to 149 | // take the slow but precise square root: by the time we need it, plenty 150 | // of cycles will have passed. Pipelining to the rescue here. 151 | let r = r_sqr.sqrt(); 152 | let x = phi.sin() * r; 153 | let y = phi.cos() * r; // TODO: cos is a bottleneck, do I need the precision? 154 | let z = (Mf32::one() - r_sqr).sqrt(); 155 | 156 | // TODO: Perhaps it would be faster to use a less precise sin and cos, 157 | // but normalize the vector in the end? 158 | MVector3::new(x, y, z) 159 | } 160 | 161 | /// Returns a random unit vector in the hemisphere around the positive 162 | /// z-axis, drawn from a cosine-weighted distribution. 163 | /// 164 | /// This method uses a different sampling method than 165 | /// `sample_hemisphere_vector`. Benchmarks show that it is not faster, and 166 | /// with a small probability this function returns a wrong result too, so it 167 | /// should not be used at all. It is kept here for comparison purposes. 168 | fn sample_hemisphere_vector_reject(&mut self) -> MVector3 { 169 | // This function uses rejection sampling without branching: sample two 170 | // points in a square, and if the second one is not inside a circle, 171 | // take the first one instead. The probability that both points do not 172 | // lie in a circle is (1 - pi/4)^2, about 4.6%. To reduce that 173 | // probability further you can take more samples. 174 | let x0 = self.sample_biunit(); 175 | let y0 = self.sample_biunit(); 176 | let r0 = x0.mul_add(x0, y0 * y0); 177 | 178 | let x1 = self.sample_biunit(); 179 | let y1 = self.sample_biunit(); 180 | let r1 = x1.mul_add(x1, y1 * y1); 181 | 182 | // If r1 > 1, then the point lies outside of a unit disk, so the sign 183 | // bit of this value will be positive, indicating that we should pick 184 | // point 0 instead of point 1. 185 | let pick_01 = Mf32::one() - r1; 186 | 187 | let x = x0.pick(x1, pick_01); 188 | let y = y0.pick(y1, pick_01); 189 | let r = r0.pick(r1, pick_01); 190 | 191 | let z = (Mf32::one() - r).sqrt(); 192 | 193 | MVector3::new(x, y, z) 194 | } 195 | } 196 | 197 | #[test] 198 | fn sample_unit_is_in_interval() { 199 | let mut rng = Rng::with_seed(2, 5, 7); 200 | 201 | for _ in 0..4096 { 202 | let x = rng.sample_unit(); 203 | assert!(x.all_sign_bits_positive(), "{:?} should be >= 0", x); 204 | assert!((Mf32::one() - x).all_sign_bits_positive(), "{:?} should be <= 1", x); 205 | } 206 | } 207 | 208 | #[test] 209 | fn sample_biunit_is_in_interval() { 210 | let mut rng = Rng::with_seed(2, 5, 7); 211 | 212 | for _ in 0..4096 { 213 | let x = rng.sample_biunit(); 214 | assert!((Mf32::one() + x).all_sign_bits_positive(), "{:?} should be >= -1", x); 215 | assert!((Mf32::one() - x).all_sign_bits_positive(), "{:?} should be <= 1", x); 216 | } 217 | } 218 | 219 | #[test] 220 | fn sample_angle_is_in_interval() { 221 | let mut rng = Rng::with_seed(2, 5, 7); 222 | 223 | for _ in 0..4096 { 224 | let x = rng.sample_angle(); 225 | assert!((Mf32::broadcast(consts::PI) + x).all_sign_bits_positive(), "{:?} should be >= -pi", x); 226 | assert!((Mf32::broadcast(consts::PI) - x).all_sign_bits_positive(), "{:?} should be <= pi", x); 227 | } 228 | } 229 | 230 | #[test] 231 | fn sample_hemisphere_vector_has_unit_norm() { 232 | let mut rng = Rng::with_seed(2, 5, 7); 233 | 234 | for _ in 0..4096 { 235 | let v = rng.sample_hemisphere_vector(); 236 | let r = v.norm_squared().sqrt(); 237 | assert!((r - Mf32::broadcast(0.991)).all_sign_bits_positive(), "{:?} should be ~1", r); 238 | assert!((Mf32::broadcast(1.009) - r).all_sign_bits_positive(), "{:?} should be ~1", r); 239 | } 240 | } 241 | 242 | #[test] 243 | fn sample_u32_does_not_cause_sigsegv() { 244 | use util::generate_slice8; 245 | 246 | let mut rng = Rng::with_seed(2, 5, 7); 247 | let mut x = generate_slice8(|_| 0); 248 | 249 | for _ in 0..4096 { 250 | let y = rng.sample_u32(); 251 | x = generate_slice8(|i| x[i] ^ y[i]); 252 | } 253 | 254 | for i in 0..8 { 255 | // It could be 0 in theory, but that probability is 1/2^32. Mainly put 256 | // something here to ensure that nothing is optimized away. 257 | assert!(x[i] != 0); 258 | } 259 | } 260 | 261 | macro_rules! unroll_10 { 262 | { $x: block } => { 263 | $x $x $x $x $x $x $x $x $x $x 264 | } 265 | } 266 | 267 | #[bench] 268 | fn bench_sample_unit_1000(b: &mut test::Bencher) { 269 | let mut rng = Rng::with_seed(2, 5, 7); 270 | b.iter(|| { 271 | for _ in 0..100 { 272 | unroll_10! {{ 273 | test::black_box(rng.sample_unit()); 274 | }}; 275 | } 276 | }); 277 | } 278 | 279 | #[bench] 280 | fn bench_sample_hemisphere_vector_1000(b: &mut test::Bencher) { 281 | let mut rng = Rng::with_seed(2, 5, 7); 282 | b.iter(|| { 283 | for _ in 0..100 { 284 | unroll_10! {{ 285 | test::black_box(rng.sample_hemisphere_vector()); 286 | }}; 287 | } 288 | }); 289 | } 290 | 291 | #[bench] 292 | fn bench_sample_hemisphere_vector_reject_1000(b: &mut test::Bencher) { 293 | let mut rng = Rng::with_seed(2, 5, 7); 294 | b.iter(|| { 295 | for _ in 0..100 { 296 | unroll_10! {{ 297 | test::black_box(rng.sample_hemisphere_vector_reject()); 298 | }}; 299 | } 300 | }); 301 | } 302 | -------------------------------------------------------------------------------- /src/ray.rs: -------------------------------------------------------------------------------- 1 | // Convector -- An interactive CPU path tracer 2 | // Copyright 2016 Ruud van Asseldonk 3 | 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License version 3. A copy 6 | // of the License is available in the root of the repository. 7 | 8 | //! This module implements the ray and related structures. 9 | 10 | use material::MMaterial; 11 | use simd::{Mask, Mf32}; 12 | use std::ops::Neg; 13 | use vector3::{MVector3, SVector3}; 14 | 15 | #[derive(Clone)] 16 | pub struct SRay { 17 | pub origin: SVector3, 18 | pub direction: SVector3, 19 | } 20 | 21 | #[derive(Clone)] 22 | pub struct MRay { 23 | pub origin: MVector3, 24 | pub direction: MVector3, 25 | 26 | /// A mask that determines which rays are active. If the sign bit is 27 | /// positive (bit is 0) then the ray is active. If the sign bit is negative 28 | /// (bit is 1) then the ray is inactive. 29 | /// 30 | /// This convention might seem backwards, but it makes triangle intersection 31 | /// more efficient because a negation can be avoided. 32 | pub active: Mask, 33 | } 34 | 35 | pub struct MIntersection { 36 | /// The position at which the ray intersected the surface. 37 | pub position: MVector3, 38 | 39 | /// The surface normal at the intersection point. 40 | pub normal: MVector3, 41 | 42 | /// This distance between the ray origin and the position. 43 | pub distance: Mf32, 44 | 45 | /// The material at the intersection surface. 46 | pub material: MMaterial, 47 | 48 | /// Texture coordinates at the intersection point. 49 | pub tex_coords: (Mf32, Mf32), 50 | } 51 | 52 | impl SRay { 53 | pub fn new(origin: SVector3, direction: SVector3) -> SRay { 54 | SRay { 55 | origin: origin, 56 | direction: direction, 57 | } 58 | } 59 | } 60 | 61 | impl MRay { 62 | pub fn new(origin: MVector3, direction: MVector3) -> MRay { 63 | MRay { 64 | origin: origin, 65 | direction: direction, 66 | active: Mf32::zero(), 67 | } 68 | } 69 | 70 | pub fn broadcast(ray: &SRay) -> MRay { 71 | MRay { 72 | origin: MVector3::broadcast(ray.origin), 73 | direction: MVector3::broadcast(ray.direction), 74 | active: Mf32::zero(), 75 | } 76 | } 77 | 78 | /// Builds an mray by applying the function to the numbers 0..7. 79 | /// 80 | /// Note: this is essentially a transpose, avoid in hot code. 81 | pub fn generate(mut f: F) -> MRay 82 | where F: FnMut(usize) -> SRay 83 | { 84 | MRay { 85 | origin: MVector3::generate(|i| f(i).origin), 86 | direction: MVector3::generate(|i| f(i).direction), 87 | active: Mf32::zero(), 88 | } 89 | } 90 | } 91 | 92 | impl MIntersection { 93 | /// Constructs an empyt intersection with the specified distance and zeroes 94 | /// in all other fields. The material is set to the sky material. 95 | pub fn with_max_distance(max_dist: f32) -> MIntersection { 96 | MIntersection { 97 | position: MVector3::zero(), 98 | normal: MVector3::zero(), 99 | distance: Mf32::broadcast(max_dist), 100 | material: MMaterial::sky(), 101 | tex_coords: (Mf32::zero(), Mf32::zero()), 102 | } 103 | } 104 | 105 | pub fn pick(&self, other: &MIntersection, mask: Mask) -> MIntersection { 106 | let u = self.tex_coords.0.pick(other.tex_coords.0, mask); 107 | let v = self.tex_coords.1.pick(other.tex_coords.1, mask); 108 | MIntersection { 109 | position: self.position.pick(other.position, mask), 110 | normal: self.normal.pick(other.normal, mask), 111 | distance: self.distance.pick(other.distance, mask), 112 | material: self.material.pick(other.material, mask), 113 | tex_coords: (u, v), 114 | } 115 | } 116 | } 117 | 118 | impl Neg for MRay { 119 | type Output = MRay; 120 | 121 | fn neg(self) -> MRay { 122 | MRay { 123 | origin: self.origin, 124 | direction: MVector3::zero() - self.direction, 125 | active: self.active, 126 | } 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /src/renderer.rs: -------------------------------------------------------------------------------- 1 | // Convector -- An interactive CPU path tracer 2 | // Copyright 2016 Ruud van Asseldonk 3 | 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License version 3. A copy 6 | // of the License is available in the root of the repository. 7 | 8 | use material::{continue_path, sky_intensity}; 9 | use random::Rng; 10 | use scene::Scene; 11 | use simd::{Mf32, Mi32}; 12 | use std::cell::UnsafeCell; 13 | use util::{cache_line_aligned_vec, generate_slice8}; 14 | use vector3::{MVector3, SVector3}; 15 | 16 | pub struct Renderer { 17 | scene: Scene, 18 | width: u32, 19 | height: u32, 20 | enable_debug_view: bool, 21 | 22 | /// A value that increases at a rate of 1 per second. 23 | time: f32, 24 | 25 | /// The amount that time increases per frame. 26 | time_delta: f32, 27 | } 28 | 29 | /// The buffer that an image is rendered into. 30 | pub struct RenderBuffer { 31 | buffer: UnsafeCell>, 32 | } 33 | 34 | struct MPixelData { 35 | color: MVector3, 36 | tex_index: Mi32, 37 | tex_coords: (Mf32, Mf32), 38 | fresnel: Mf32, 39 | } 40 | 41 | impl RenderBuffer { 42 | /// Allocates a new buffer to render into, memory uninitialized. 43 | /// 44 | /// The width and height must be a multiple of 16. 45 | pub fn new(width: u32, height: u32) -> RenderBuffer { 46 | assert_eq!(width & 15, 0); // Width must be a multiple of 16. 47 | assert_eq!(height & 15, 0); // Height must be a multiple of 16. 48 | 49 | // There are 8 RGBA pixels in one mi32. 50 | let num_elems = (width as usize) * (height as usize) / 8; 51 | 52 | let mut vec = cache_line_aligned_vec(num_elems); 53 | unsafe { vec.set_len(num_elems); } 54 | 55 | RenderBuffer { 56 | buffer: UnsafeCell::new(vec), 57 | } 58 | } 59 | 60 | /// Zeroes the buffer. 61 | pub fn fill_black(&mut self) { 62 | // This is actually safe because self is borrowed mutably. 63 | for pixels in unsafe { self.get_mut_slice() } { 64 | *pixels = Mi32::zero(); 65 | } 66 | } 67 | 68 | /// Returns a mutable view into the buffer. 69 | /// 70 | /// This is unsafe because it allows creating multiple mutable borrows of 71 | /// the buffer, which could result in races. Threads should ensure that 72 | /// they write to disjoint parts of the buffer. 73 | pub unsafe fn get_mut_slice(&self) -> &mut [Mi32] { 74 | (*self.buffer.get()).as_mut_slice() 75 | } 76 | 77 | /// Returns an RGBA bitmap suitable for display. 78 | #[cfg(not(windows))] 79 | pub fn into_bitmap(self) -> Vec { 80 | use util::transmute_vec; 81 | 82 | // This is actually safe because self is moved into the method. 83 | let buffer = unsafe { self.buffer.into_inner() }; 84 | unsafe { transmute_vec(buffer) } 85 | } 86 | 87 | /// Returns an RGBA bitmap suitable for display. 88 | #[cfg(windows)] 89 | pub fn into_bitmap(self) -> Vec { 90 | use std::mem; 91 | use util::drop_cache_line_aligned_vec; 92 | 93 | // This is actually safe because self is moved into the method. 94 | let buffer = unsafe { self.buffer.into_inner() }; 95 | 96 | // On Windows we must make an extra copy; we cannot just transmute the 97 | // buffer into a buffer of bytes, because the allocator then uses the 98 | // alignment of a byte to free the buffer, but it asserts that the 99 | // alignment for deallocation matches the alignment that the buffer was 100 | // allocated with. I raised this point in the allocator RFC discussion: 101 | // https://github.com/rust-lang/rfcs/pull/1398#issuecomment-198584430. 102 | // The extra copy is unfortunate, but the allocator API needs to change 103 | // before it can be avoided. 104 | let byte_buffer = buffer.iter() 105 | .flat_map(|mi32| { 106 | let bytes: &[u8; 32] = unsafe { mem::transmute(mi32) }; 107 | bytes 108 | }) 109 | .cloned() 110 | .collect(); 111 | 112 | drop_cache_line_aligned_vec(buffer); 113 | byte_buffer 114 | } 115 | } 116 | 117 | // The render buffer must be shared among threads, but UnsafeCell is not Sync. 118 | unsafe impl Sync for RenderBuffer {} 119 | 120 | impl Renderer { 121 | pub fn new(scene: Scene, width: u32, height: u32) -> Renderer { 122 | Renderer { 123 | scene: scene, 124 | width: width, 125 | height: height, 126 | enable_debug_view: false, 127 | time: 0.0, 128 | time_delta: 0.0, 129 | } 130 | } 131 | 132 | /// Sets the current time and the amount that the time is expected to change 133 | /// per frame. 134 | pub fn set_time(&mut self, time: f32, delta: f32) { 135 | self.time = time; 136 | self.time_delta = delta; 137 | } 138 | 139 | /// For an interactive scene, updates the scene for the new frame. 140 | /// TODO: This method does not really belong here. 141 | pub fn update_scene(&mut self) { 142 | let alpha = self.time * -0.02 + 0.1; 143 | let alpha_delta = self.time_delta * -0.02; 144 | let cam_position = SVector3::new(-3.8 * alpha.sin(), 1.6, 3.0 * alpha.cos()); 145 | let cam_pos_delta = SVector3::new(-3.8 * alpha.cos(), 0.0, -3.0 * alpha.sin()) * alpha_delta; 146 | self.scene.camera.set_position(cam_position, cam_pos_delta); 147 | self.scene.camera.set_rotation(alpha, alpha_delta); 148 | } 149 | 150 | pub fn toggle_debug_view(&mut self) { 151 | self.enable_debug_view = !self.enable_debug_view; 152 | } 153 | 154 | /// Returns the screen coordinates of the block of 16x4 pixels where (x, y) 155 | /// is the bottom-left coordinate. The order is as follows: 156 | /// 157 | /// 0c 0d 0e 0f 1c 1d 1e 1f 2c 2d 2e 2f 3c 3d 3e 3f 158 | /// 08 09 0a 0b 18 19 1a 1b 28 29 2a 2b 38 39 3a 3b 159 | /// 04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37 160 | /// 00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33 161 | /// 162 | /// Or, in terms of the mf32s: 163 | /// 164 | /// 1 1 1 1 3 3 3 3 5 5 5 5 7 7 7 7 165 | /// 1 1 1 1 3 3 3 3 5 5 5 5 7 7 7 7 166 | /// 0 0 0 0 2 2 2 2 4 4 4 4 6 6 6 6 167 | /// 0 0 0 0 2 2 2 2 4 4 4 4 6 6 6 6 168 | /// 169 | /// Where inside every mf32 the pixels are ordered from left to right, 170 | /// bottom to top. 171 | fn get_pixel_coords_16x4(&self, x: u32, y: u32, rng: &mut Rng) -> ([Mf32; 8], [Mf32; 8]) { 172 | let scale = Mf32::broadcast(2.0 / self.width as f32); 173 | let scale_mul = Mf32(2.0, 4.0, 8.0, 12.0, 0.0, 0.0, 0.0, 0.0) * scale; 174 | 175 | let off_x = Mf32(0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0); 176 | let off_y = Mf32(0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0); 177 | 178 | let base_x = scale * (off_x + Mf32::broadcast(x as f32 - self.width as f32 * 0.5)); 179 | let base_y = scale * (off_y + Mf32::broadcast(y as f32 - self.height as f32 * 0.5)); 180 | 181 | let xs = [ 182 | base_x, 183 | base_x, 184 | base_x + Mf32::broadcast(scale_mul.1), // 4.0 * scale 185 | base_x + Mf32::broadcast(scale_mul.1), // 4.0 * scale 186 | base_x + Mf32::broadcast(scale_mul.2), // 8.0 * scale 187 | base_x + Mf32::broadcast(scale_mul.2), // 8.0 * scale 188 | base_x + Mf32::broadcast(scale_mul.3), // 12.0 * scale 189 | base_x + Mf32::broadcast(scale_mul.3) // 12.0 * scale 190 | ]; 191 | 192 | let ys = [ 193 | base_y, base_y + Mf32::broadcast(scale_mul.0), // 2.0 * scale 194 | base_y, base_y + Mf32::broadcast(scale_mul.0), // 2.0 * scale 195 | base_y, base_y + Mf32::broadcast(scale_mul.0), // 2.0 * scale 196 | base_y, base_y + Mf32::broadcast(scale_mul.0) // 2.0 * scale 197 | ]; 198 | 199 | // Add a random offset of at most one pixel, to sample with anti-alias. 200 | // TODO: If I ever do multiple samples per pixel in one frame, I could 201 | // do stratified sampling here. 202 | let xs_aa = generate_slice8(|i| rng.sample_unit().mul_add(scale, xs[i])); 203 | let ys_aa = generate_slice8(|i| rng.sample_unit().mul_add(scale, ys[i])); 204 | 205 | (xs_aa, ys_aa) 206 | } 207 | 208 | /// Shuffles bytes around to store 16x4 rendered pixels in the correct 209 | /// location in a bitmap. 210 | fn store_mi32_16x4(&self, target: &mut [Mi32], x: u32, y: u32, data: &[Mi32; 8]) { 211 | // Helper functions to shuffle around the pixels from the order as 212 | // described in `get_pixel_coords_16x4` into four rows of 16 pixels. 213 | let mk_line0 = |left: Mi32, right: Mi32| 214 | Mi32(left.0, left.1, left.2, left.3, right.0, right.1, right.2, right.3); 215 | let mk_line1 = |left: Mi32, right: Mi32| 216 | Mi32(left.4, left.5, left.6, left.7, right.4, right.5, right.6, right.7); 217 | 218 | // Store the pixels in the bitmap. If the bitmap is aligned to the cache 219 | // line size, this stores exactly four cache lines, so there is no need 220 | // to fetch those lines because all bytes are overwritten. This saves a 221 | // trip to memory, which makes this store fast. 222 | let idx_line0 = ((y * self.width + 0 * self.width + x) / 8) as usize; 223 | let idx_line1 = ((y * self.width + 1 * self.width + x) / 8) as usize; 224 | let idx_line2 = ((y * self.width + 2 * self.width + x) / 8) as usize; 225 | let idx_line3 = ((y * self.width + 3 * self.width + x) / 8) as usize; 226 | 227 | target[idx_line0 + 0] = mk_line0(data[0], data[2]); 228 | target[idx_line0 + 1] = mk_line0(data[4], data[6]); 229 | target[idx_line1 + 0] = mk_line1(data[0], data[2]); 230 | target[idx_line1 + 1] = mk_line1(data[4], data[6]); 231 | target[idx_line2 + 0] = mk_line0(data[1], data[3]); 232 | target[idx_line2 + 1] = mk_line0(data[5], data[7]); 233 | target[idx_line3 + 0] = mk_line1(data[1], data[3]); 234 | target[idx_line3 + 1] = mk_line1(data[5], data[7]); 235 | } 236 | 237 | /// Converts floating-point color values to 32-bit RGBA and stores the 238 | /// values in the bitmap. 239 | fn store_pixels_color_16x4(&self, 240 | bitmap: &mut [Mi32], 241 | x: u32, 242 | y: u32, 243 | data: &[MPixelData; 8]) { 244 | // Convert f32 colors to i32 colors in the range 0-255. 245 | let range = Mf32::broadcast(255.0); 246 | let rgbas = generate_slice8(|i| { 247 | // Multiply color by 2.0 to brighten up the scene a bit. 248 | let rgb_255 = (data[i].color * Mf32::broadcast(2.0)).clamp_one() * range; 249 | let r = rgb_255.x.into_mi32(); 250 | let g = rgb_255.y.into_mi32().map(|x| x << 8); 251 | let b = rgb_255.z.into_mi32().map(|x| x << 16); 252 | (r | g) | b 253 | }); 254 | 255 | self.store_mi32_16x4(bitmap, x, y, &rgbas); 256 | } 257 | 258 | /// Converts floating-point texture coordinates to integers and stores the 259 | /// values in the bitmap. 260 | fn store_pixels_gbuffer_16x4(&self, 261 | gbuffer: &mut [Mi32], 262 | x: u32, 263 | y: u32, 264 | data: &[MPixelData; 8]) { 265 | // Generate the pixels for texture coordinates and the Fresnel factor. 266 | let range = Mf32::broadcast(255.0); 267 | let uvs = generate_slice8(|i| { 268 | let tex_index = data[i].tex_index; 269 | let tex_x = data[i].tex_coords.0 * range; 270 | let tex_y = data[i].tex_coords.1 * range; 271 | let fresnel = data[i].fresnel * range; 272 | 273 | // Do not clamp the texture coordinates, make them wrap instead. 274 | let wrap = Mi32::broadcast(0xff); 275 | let r = tex_x.into_mi32() & wrap; 276 | let g = (tex_y.into_mi32() & wrap).map(|x| x << 8); 277 | let b = fresnel.into_mi32().map(|x| x << 16); 278 | 279 | // Store the texture index in the alpha channel. 280 | let a = tex_index.map(|x| x << 24); 281 | 282 | (r | g) | (b | a) 283 | }); 284 | 285 | self.store_mi32_16x4(gbuffer, x, y, &uvs); 286 | } 287 | 288 | /// Renders a block of 16x4 pixels, where (x, y) is the coordinate of the 289 | /// bottom-left pixel. Bitmap must be an array of 8 pixels at once, and it 290 | /// must be aligned to 64 bytes (a cache line). Also returns texture indices 291 | /// for every pixel. 292 | fn render_block_16x4(&self, x: u32, y: u32, rng: &mut Rng) -> [MPixelData; 8] { 293 | let (xs, ys) = self.get_pixel_coords_16x4(x, y, rng); 294 | 295 | if self.enable_debug_view { 296 | generate_slice8(|i| self.render_pixels_debug(xs[i], ys[i])) 297 | } else { 298 | generate_slice8(|i| self.render_pixels(xs[i], ys[i], rng)) 299 | } 300 | } 301 | 302 | /// Renders a square part of a frame. 303 | /// 304 | /// The (x, y) coordinate is the coordinate of the bottom-left pixel of the 305 | /// patch. The patch width must be a multiple of 16. 306 | pub fn render_patch_u8(&self, 307 | bitmap: &mut [Mi32], 308 | gbuffer: &mut [Mi32], 309 | patch_width: u32, 310 | x: u32, 311 | y: u32, 312 | frame_number: u32) { 313 | assert_eq!(patch_width & 15, 0); // Patch width must be a multiple of 16. 314 | let w = patch_width / 16; 315 | let h = patch_width / 4; 316 | let mut rng = Rng::with_seed(x, y, frame_number); 317 | 318 | for i in 0..w { 319 | for j in 0..h { 320 | let xb = x + i * 16; 321 | let yb = y + j * 4; 322 | let data = self.render_block_16x4(xb, yb, &mut rng); 323 | self.store_pixels_color_16x4(bitmap, xb, yb, &data); 324 | self.store_pixels_gbuffer_16x4(gbuffer, xb, yb, &data); 325 | } 326 | } 327 | } 328 | 329 | /// Renders a square part of a frame, adds the contribution to the buffer. 330 | /// 331 | /// The (x, y) coordinate is the coordinate of the bottom-left pixel of the 332 | /// patch. The patch width must be a multiple of 16. The memory layout of 333 | /// the HDR buffer is as a bitmap of 16x4 blocks. 334 | /// 335 | /// This also fills the gbuffer. This is not done accumulatively, it is 336 | /// filled for the current frame. (Though the gbuffer should be fairly 337 | /// constant anyway, and there is no way to blend it, apart from averaging 338 | /// texture coordinates.) 339 | pub fn accumulate_patch_f32(&self, 340 | hdr_buffer: &mut [[MVector3; 8]], 341 | gbuffer: &mut [Mi32], 342 | patch_width: u32, 343 | x: u32, 344 | y: u32, 345 | frame_number: u32) { 346 | assert_eq!(patch_width & 15, 0); // Patch width must be a multiple of 16. 347 | let w = patch_width / 16; 348 | let h = patch_width / 4; 349 | let mut rng = Rng::with_seed(x, y, frame_number); 350 | 351 | for i in 0..w { 352 | for j in 0..h { 353 | let xb = x + i * 16; 354 | let yb = y + j * 4; 355 | let data = self.render_block_16x4(xb, yb, &mut rng); 356 | let index = ((y / 4 + j) * (self.width / 16) + (x / 16 + i)) as usize; 357 | let current = hdr_buffer[index]; 358 | hdr_buffer[index] = generate_slice8(|k| current[k] + data[k].color); 359 | self.store_pixels_gbuffer_16x4(gbuffer, xb, yb, &data); 360 | } 361 | } 362 | } 363 | 364 | /// Creates a new float buffer, the size of the viewport, that can be 365 | /// rendered to with `accumulate_patch_f32()`. 366 | pub fn new_buffer_f32(&self) -> Vec<[MVector3; 8]> { 367 | let w = self.width / 16; 368 | let h = self.height / 4; 369 | let mut buffer = Vec::with_capacity((w * h) as usize); 370 | for _ in 0..(w * h) { 371 | buffer.push(generate_slice8(|_| MVector3::zero())); 372 | } 373 | buffer 374 | } 375 | 376 | /// Converts a buffer of floating point values used for accumulative 377 | /// rendering into a 32 bit per pixel RGBA bitmap. 378 | pub fn buffer_f32_into_render_buffer(&self, 379 | hdr_buffer: &[[MVector3; 8]], 380 | render_buffer: &mut RenderBuffer, 381 | num_samples: u32) { 382 | let w = self.width / 16; 383 | let h = self.height / 4; 384 | assert_eq!(w * 16, self.width); 385 | assert_eq!(h * 4, self.height); 386 | let factor = Mf32::broadcast(1.0 / (num_samples as f32)); 387 | 388 | { 389 | // This is safe here because there is only one mutable borrow. 390 | let bitmap = unsafe { render_buffer.get_mut_slice() }; 391 | 392 | for j in 0..h { 393 | for i in 0..w { 394 | let rgbs = hdr_buffer[(j * w + i) as usize]; 395 | let rgbs = generate_slice8(|k| rgbs[k] * factor); 396 | let data = generate_slice8(|k| { 397 | MPixelData { 398 | color: rgbs[k], 399 | // These values are unused, only the color is stored 400 | // in this function. 401 | tex_index: Mi32::zero(), 402 | tex_coords: (Mf32::zero(), Mf32::zero()), 403 | fresnel: Mf32::zero(), 404 | } 405 | }); 406 | self.store_pixels_color_16x4(bitmap, i * 16, j * 4, &data); 407 | } 408 | } 409 | } 410 | } 411 | 412 | /// Returns colors for the pixels, as well as the texture indices. 413 | fn render_pixels(&self, x: Mf32, y: Mf32, rng: &mut Rng) -> MPixelData { 414 | let t = rng.sample_unit(); 415 | let mut ray = self.scene.camera.get_ray(x, y, t); 416 | let mut color = MVector3::new(Mf32::one(), Mf32::one(), Mf32::one()); 417 | let mut hit_emissive = Mf32::zero(); 418 | let mut texture_index = Mi32::zero(); 419 | let mut texture_coords = (Mf32::zero(), Mf32::zero()); 420 | let mut fresnel = Mf32::zero(); 421 | 422 | let max_bounces = 5; 423 | for i in 0..max_bounces { 424 | let isect = self.scene.intersect_nearest(&ray); 425 | hit_emissive = isect.material; 426 | 427 | // Do not allow NaNs to creep in. 428 | debug_assert!(ray.direction.all_finite(), "infinite ray direction at iteration {}", i); 429 | debug_assert!(isect.position.all_finite(), "infinite intersection at iteration {}", i); 430 | debug_assert!(isect.distance.all_finite(), "infinite distance at iteration {}", i); 431 | 432 | // Stop when every ray hit a light source. 433 | if isect.material.all_sign_bits_negative() { 434 | break; 435 | } 436 | 437 | // Get a new ray and the color modulation. For the first bounce, the 438 | // Fresnel term should not contribute to the color modulation 439 | // because that is handled on the GPU. 440 | let (new_ray, color_mod, fr) = 441 | continue_path(isect.material, &self.scene, &ray, &isect, rng, i == 0); 442 | ray = new_ray; 443 | color = color.mul_coords(color_mod); 444 | 445 | if i == 0 { 446 | texture_index = isect.material.get_texture(); 447 | texture_coords = isect.tex_coords; 448 | fresnel = fr; 449 | } 450 | } 451 | 452 | // Compute light contribution. 453 | let emission = sky_intensity(ray.direction); 454 | color = color.mul_coords(emission); 455 | 456 | // If the last thing that a ray hit was an emissive material, it has 457 | // found a light source and the computed color is correct. If the ray 458 | // did not find a light source but the loop was terminated, the computed 459 | // color is invalid; it should be black. 460 | let color = MVector3::zero().pick(color, hit_emissive); 461 | 462 | MPixelData { 463 | color: color, 464 | tex_index: texture_index, 465 | tex_coords: texture_coords, 466 | fresnel: fresnel, 467 | } 468 | } 469 | 470 | fn render_pixels_debug(&self, x: Mf32, y: Mf32) -> MPixelData { 471 | let t = Mf32::zero(); 472 | let ray = self.scene.camera.get_ray(x, y, t); 473 | let (numi_aabb, numi_tri) = self.scene.intersect_debug(&ray); 474 | 475 | let g = Mf32::broadcast((numi_aabb as f32).log2() * 0.1); 476 | let b = Mf32::broadcast((numi_tri as f32).log2() * 0.1); 477 | 478 | let color = MVector3::new(Mf32::zero(), g, b); 479 | 480 | MPixelData { 481 | color: color, 482 | tex_index: Mi32::zero(), 483 | tex_coords: (Mf32::zero(), Mf32::zero()), 484 | fresnel: Mf32::zero(), 485 | } 486 | } 487 | } 488 | 489 | #[test] 490 | fn render_buffer_into_bitmap() { 491 | let render_buffer = RenderBuffer::new(1280, 736); 492 | let bitmap = render_buffer.into_bitmap(); 493 | drop(bitmap); 494 | let render_buffer = RenderBuffer::new(1280, 736); 495 | let _bitmap = render_buffer.into_bitmap(); 496 | // The render buffer was transmuted or copied into a vector of pixels, and 497 | // dropping the vector at this point should not result in a crash. 498 | } 499 | -------------------------------------------------------------------------------- /src/scene.rs: -------------------------------------------------------------------------------- 1 | // Convector -- An interactive CPU path tracer 2 | // Copyright 2016 Ruud van Asseldonk 3 | 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License version 3. A copy 6 | // of the License is available in the root of the repository. 7 | 8 | use bvh::Bvh; 9 | use material::{MDirectSample, MMaterial}; 10 | use quaternion::{MQuaternion, SQuaternion, rotate}; 11 | use random::Rng; 12 | use ray::{MIntersection, MRay}; 13 | use simd::Mf32; 14 | use std::f32::consts::PI; 15 | use triangle::Triangle; 16 | use util::generate_slice8; 17 | use vector3::{MVector3, SVector3}; 18 | use wavefront::Mesh; 19 | 20 | pub struct Camera { 21 | position: SVector3, 22 | position_delta: SVector3, 23 | 24 | orientation: SQuaternion, 25 | orientation_delta: SQuaternion, 26 | 27 | /// Distance such that a vector at `(1, 0, screen_distance)` makes an angle 28 | /// of the desired field of view with `(-1, 0, screen_distance)`. 29 | screen_distance: f32, 30 | } 31 | 32 | impl Camera { 33 | /// Creates a camera at the origin with 60 degrees field of view. 34 | pub fn new() -> Camera { 35 | Camera { 36 | position: SVector3::zero(), 37 | position_delta: SVector3::zero(), 38 | orientation: SQuaternion::new(1.0, 0.0, 0.0, 0.0), 39 | orientation_delta: SQuaternion::new(0.0, 0.0, 0.0, 0.0), 40 | screen_distance: 1.0 / (PI / 5.0).sin(), 41 | } 42 | } 43 | 44 | /// Sets the position of the camera at the beginning of the frame, and the 45 | /// offset such that position + delta is the position at the end of the 46 | /// frame. 47 | pub fn set_position(&mut self, position: SVector3, delta: SVector3) { 48 | self.position = position; 49 | self.position_delta = delta; 50 | } 51 | 52 | /// Sets the orientation of the camera at the beginning of the frame, and 53 | /// the delta such that orientation + delta normalized is the orientation at 54 | /// the end of the frame. 55 | pub fn set_orientation(&mut self, orientation: SQuaternion, delta: SQuaternion) { 56 | self.orientation = orientation; 57 | self.orientation_delta = delta; 58 | } 59 | 60 | /// Sets the desired horizontal field of view in radians. 61 | pub fn set_fov(&mut self, fov: f32) { 62 | self.screen_distance = 1.0 / (fov / 2.0).sin(); 63 | } 64 | 65 | /// Sets the rotation of the camera in the xz-plane. 66 | pub fn set_rotation(&mut self, radians: f32, delta: f32) { 67 | let x = (radians * 0.5).cos(); 68 | let y = (radians * 0.5).sin(); 69 | self.orientation = SQuaternion::new(x, 0.0, -y, 0.0); 70 | 71 | let x_delta = 0.5 * -(radians * 0.5).sin() * delta; 72 | let y_delta = 0.5 * (radians * 0.5).cos() * delta; 73 | self.orientation_delta = SQuaternion::new(x_delta, 0.0, -y_delta, 0.0); 74 | } 75 | 76 | /// Returns a camera ray for the given screen coordinates. 77 | /// 78 | /// Values for x are in the range (-1, 1), the scale is uniform in both 79 | /// directions. The time ranges from 0.0 at the beginning of the frame to 80 | /// 1.0 at the end of the frame. 81 | pub fn get_ray(&self, x: Mf32, y: Mf32, t: Mf32) -> MRay { 82 | let origin = MVector3::broadcast(self.position); 83 | let origin_delta = MVector3::broadcast(self.position_delta); 84 | let origin = origin_delta.mul_add(t, origin); 85 | 86 | let orientation = MQuaternion::broadcast(self.orientation); 87 | let orientation_delta = MQuaternion::broadcast(self.orientation_delta); 88 | let orientation = orientation.interpolate(&orientation_delta, t); 89 | 90 | let dist = Mf32::broadcast(-self.screen_distance); 91 | let dir_src = MVector3::new(x, y, dist).normalized(); 92 | let dir = rotate(&dir_src, &orientation); 93 | 94 | MRay { 95 | origin: origin, 96 | direction: dir, 97 | active: Mf32::zero(), 98 | } 99 | } 100 | } 101 | 102 | pub struct Scene { 103 | pub camera: Camera, 104 | 105 | /// Bounding volume hierarchy of all triangles in the scene. 106 | bvh: Bvh, 107 | 108 | /// Indices into the BVH's triangle list, of triangles that have a material 109 | /// eligible for direct sampling. 110 | direct_sample: Vec, 111 | } 112 | 113 | impl Scene { 114 | pub fn from_meshes(meshes: &[Mesh]) -> Scene { 115 | let bvh = Bvh::from_meshes(meshes); 116 | 117 | let mut direct_sample = Vec::new(); 118 | for i in 0..bvh.triangles.len() { 119 | if bvh.triangles[i].material.is_direct_sample() { 120 | direct_sample.push(i as u32); 121 | } 122 | } 123 | 124 | Scene { 125 | camera: Camera::new(), 126 | bvh: bvh, 127 | direct_sample: direct_sample, 128 | } 129 | } 130 | 131 | pub fn print_stats(&self) { 132 | self.bvh.print_stats(); 133 | 134 | println!("scene statistics:"); 135 | println!(" triangles eligible for direct sampling: {} / {} ({:0.1}%)", 136 | self.direct_sample.len(), 137 | self.bvh.triangles.len(), 138 | 100.0 * self.direct_sample.len() as f32 / self.bvh.triangles.len() as f32); 139 | } 140 | 141 | /// Returns 8 random points on 8 random triangles eligible for direct 142 | /// sampling. 143 | pub fn get_direct_sample(&self, rng: &mut Rng) -> MDirectSample { 144 | // The number of triangles eligible for direct sampling must be greater 145 | // than 0, bute for good random number, I assume below that there are 8. 146 | // This is the case for my hard-coded scene. 147 | debug_assert!(self.direct_sample.len() == 8); 148 | 149 | let random_bits = rng.sample_u32(); 150 | 151 | // Pick a random direct sampling triangle for every coordinate. This has 152 | // to be done serially, unfortunately. The low order bits of the random 153 | // number are not really random modulo 8, but the high order bits are. 154 | // This has to do with how Rng works. In short, the sequence x*p^n is 155 | // not random modulo 8, because p^n can take at most 4 values mod 8. And 156 | // if you are unlucky, x = 0 mod 8, and then all indices are the same. 157 | // Therefore take the high order bits, which are sufficiently random. 158 | // TODO: Are the bounds checks a bottleneck here? 159 | let indices = generate_slice8(|i| (random_bits[i] >> 29) as u32); 160 | let tri_indices = generate_slice8(|i| self.direct_sample[indices[i] as usize]); 161 | let tris = generate_slice8(|i| &self.bvh.triangles[tri_indices[i] as usize]); 162 | 163 | // Gather the vertices of the triangles into SIMD vectors, so from now 164 | // on we are not serial any more. 165 | let v0 = MVector3::generate(|i| tris[i].v0); 166 | let v1 = MVector3::generate(|i| tris[i].v1); 167 | let v2 = MVector3::generate(|i| tris[i].v2); 168 | 169 | let e1 = v0 - v2; 170 | let e2 = v1 - v0; 171 | let normal_denorm = e1.cross(e2); 172 | let cross_norm_recip = normal_denorm.norm_squared().rsqrt(); 173 | let normal = normal_denorm * cross_norm_recip; 174 | let area = Mf32::broadcast(0.5) * cross_norm_recip.recip_fast(); 175 | 176 | let u = rng.sample_unit(); 177 | let v = rng.sample_unit(); 178 | // If u + v > 1, the point lies outside of the triangle, and s will have 179 | // negative sign. If the point is inside the triangle, s will have 180 | // positive sign. 181 | let s = (Mf32::one() - u) - v; 182 | // If the point lies outside the triangle, it lies in the other half of 183 | // the parallellogram, so transform the coordinates to get them into the 184 | // correct triangle again. 185 | let u = u.pick(Mf32::one() - u, s); 186 | let v = v.pick(Mf32::one() - v, s); 187 | 188 | let p = e2.mul_add(v, e1.neg_mul_add(u, v0)); 189 | 190 | let ds = MDirectSample { 191 | position: p, 192 | normal: normal, 193 | area: area, 194 | }; 195 | 196 | // Prevent NaNs from creeping in, and ensure that the sample is valid. 197 | debug_assert!(normal.all_finite()); 198 | debug_assert!(area.all_finite()); 199 | debug_assert!(area.all_sign_bits_positive(), "area must be positive"); 200 | 201 | ds 202 | } 203 | 204 | /// Returns the number of triangles eligible for direct sampling. 205 | pub fn direct_sample_num(&self) -> usize { 206 | self.direct_sample.len() 207 | } 208 | 209 | pub fn foreach_direct_sample(&self, mut f: F) { 210 | for i in &self.direct_sample { 211 | // TODO: Remove the bounds check? 212 | let triangle = &self.bvh.triangles[*i as usize]; 213 | f(triangle); 214 | } 215 | } 216 | 217 | /// Returns the interections with the shortest distance along the ray. 218 | /// 219 | /// Intersects the sky if no other geometry was intersected. 220 | pub fn intersect_nearest(&self, ray: &MRay) -> MIntersection { 221 | let huge_distance = Mf32::broadcast(1.0e5); 222 | let far_away = MIntersection { 223 | position: ray.direction.mul_add(huge_distance, ray.origin), 224 | normal: ray.direction, 225 | distance: huge_distance, 226 | material: MMaterial::sky(), 227 | tex_coords: (Mf32::zero(), Mf32::zero()), 228 | }; 229 | self.bvh.intersect_nearest(ray, far_away) 230 | } 231 | 232 | /// Returns the number of AABBs and triangles intersected to find the 233 | /// nearest intersection. 234 | pub fn intersect_debug(&self, ray: &MRay) -> (u32, u32) { 235 | let huge_distance = Mf32::broadcast(1.0e5); 236 | let far_away = MIntersection { 237 | position: ray.direction.mul_add(huge_distance, ray.origin), 238 | normal: ray.direction, 239 | distance: huge_distance, 240 | material: MMaterial::sky(), 241 | tex_coords: (Mf32::zero(), Mf32::zero()), 242 | }; 243 | self.bvh.intersect_debug(ray, far_away) 244 | } 245 | } 246 | -------------------------------------------------------------------------------- /src/stats.rs: -------------------------------------------------------------------------------- 1 | // Convector -- An interactive CPU path tracer 2 | // Copyright 2016 Ruud van Asseldonk 3 | 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License version 3. A copy 6 | // of the License is available in the root of the repository. 7 | 8 | //! A simple way to keep track of statistics. 9 | 10 | use time::Duration; 11 | 12 | /// Keeps track of the min, median, and max of a variable. 13 | /// 14 | /// The number of values stored is bounded. 15 | pub struct Stats { 16 | values: Vec, 17 | } 18 | 19 | impl Stats { 20 | pub fn new() -> Stats { 21 | Stats { values: Vec::with_capacity(128) } 22 | } 23 | 24 | pub fn insert(&mut self, value: u32) { 25 | // Make room if there is none. Removing one extreme value below the 26 | // median and one above does not affect the median, so we can discard 27 | // values without affecting the median. However, when the median 28 | // shifts, these values could have been imporant, and the result is 29 | // incorrect. For a stable value, the median will not shift by much, so 30 | // it is best to remove the most extreme values. On the other hand, the 31 | // min and max are interesting to know, so merge the values after the 32 | // min and before the max. 33 | if self.values.len() == self.values.capacity() { 34 | debug_assert!(self.values.len() >= 4); 35 | let len = self.values.len(); 36 | // Merge the two values after the min and the two values before the 37 | // max. 38 | let avg_high = (self.values[len - 3] + self.values[len - 2]) / 2; 39 | let avg_low = (self.values[1] + self.values[2]) / 2; 40 | self.values[len - 3] = avg_high; 41 | self.values[2] = avg_low; 42 | self.values.remove(len - 2); 43 | self.values.remove(1); 44 | } 45 | 46 | let idx = match self.values.binary_search(&value) { 47 | Ok(i) => i, 48 | Err(i) => i, 49 | }; 50 | 51 | self.values.insert(idx, value); 52 | } 53 | 54 | /// Inserts the duration rounded to microseconds. 55 | pub fn insert_time_us(&mut self, duration: Duration) { 56 | let ns = duration.num_nanoseconds().unwrap(); 57 | let us = (ns + 500) / 1000; 58 | self.insert(us as u32); 59 | } 60 | 61 | /// Returns the median of the stored values. 62 | /// 63 | /// Panics if no values are present. 64 | pub fn median(&self) -> u32 { 65 | // This is not correct for an even number of values, but as the number 66 | // of values grows bigger this difference becomes smaller. 67 | self.values[self.values.len() / 2] 68 | } 69 | 70 | /// Returns the minimum of the stored values. 71 | /// 72 | /// Panics if no values are present. 73 | pub fn min(&self) -> u32 { 74 | self.values[0] 75 | } 76 | } 77 | 78 | /// A collection of global stats that the app keeps track of. 79 | pub struct GlobalStats { 80 | /// Texture upload time in microseconds. 81 | pub tex_upload_us: Stats, 82 | /// Draw and wait for vsync time in microseconds. 83 | pub draw_vsync_us: Stats, 84 | /// Total time of rendering and drawing a frame. 85 | pub frame_us: Stats, 86 | } 87 | 88 | impl GlobalStats { 89 | pub fn new() -> GlobalStats { 90 | GlobalStats { 91 | tex_upload_us: Stats::new(), 92 | draw_vsync_us: Stats::new(), 93 | frame_us: Stats::new(), 94 | } 95 | } 96 | 97 | pub fn print(&self) { 98 | println!(""); 99 | println!("texture upload: median {} us, min {} us", 100 | self.tex_upload_us.median(), 101 | self.tex_upload_us.min()); 102 | println!("draw and vsync: median {} us, min {} us", 103 | self.draw_vsync_us.median(), 104 | self.draw_vsync_us.min()); 105 | println!("frame time: median {} us, min {} us -> {:0.1} fps", 106 | self.frame_us.median(), 107 | self.frame_us.min(), 108 | 1.0 / (self.frame_us.median() as f32 * 1e-6)); 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /src/trace.rs: -------------------------------------------------------------------------------- 1 | // Convector -- An interactive CPU path tracer 2 | // Copyright 2016 Ruud van Asseldonk 3 | 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License version 3. A copy 6 | // of the License is available in the root of the repository. 7 | 8 | //! This mod writes trace logs that can be inspected with chrome://tracing. 9 | //! It is intended as a debugging tool, so I can see what all the cores are 10 | //! doing; how work is scheduled among CPUs and what is blocking. 11 | //! 12 | //! Note: this mod is not related to ray tracing, sorry for the name. 13 | 14 | // TODO: Integrate this with stats. 15 | 16 | use std::collections::VecDeque; 17 | use std::fs::File; 18 | use std::io; 19 | use std::path::Path; 20 | use std::sync::{Arc, Mutex}; 21 | use thread_id; 22 | use time::{Duration, PreciseTime}; 23 | 24 | struct TraceEvent { 25 | start: PreciseTime, 26 | end: PreciseTime, 27 | description: &'static str, 28 | frame: u32, 29 | id: u32, 30 | tid: u64, 31 | } 32 | 33 | pub struct ScopedTraceEvent { 34 | start: PreciseTime, 35 | description: &'static str, 36 | frame: u32, 37 | id: u32, 38 | log: Arc>, 39 | handled: bool, 40 | } 41 | 42 | struct TraceLogImpl { 43 | events: VecDeque, 44 | limit: usize, 45 | } 46 | 47 | pub struct TraceLog { 48 | log: Arc>, 49 | epoch: PreciseTime, 50 | frame_number: u32, 51 | } 52 | 53 | impl ScopedTraceEvent { 54 | /// Records the event in the trace log and returns its duration. 55 | pub fn take_duration(mut self) -> Duration { 56 | let end = PreciseTime::now(); 57 | self.add_to_trace(end); 58 | self.start.to(end) 59 | } 60 | 61 | fn add_to_trace(&mut self, now: PreciseTime) { 62 | let event = TraceEvent { 63 | start: self.start, 64 | end: now, 65 | description: self.description, 66 | frame: self.frame, 67 | id: self.id, 68 | tid: thread_id::get() as u64, 69 | }; 70 | let mut trace_log_impl = self.log.lock().unwrap(); 71 | if trace_log_impl.events.len() == trace_log_impl.limit { 72 | trace_log_impl.events.pop_front(); 73 | } 74 | trace_log_impl.events.push_back(event); 75 | self.handled = true; 76 | } 77 | } 78 | 79 | impl Drop for ScopedTraceEvent { 80 | fn drop(&mut self) { 81 | if !self.handled { 82 | let end = PreciseTime::now(); 83 | self.add_to_trace(end); 84 | } 85 | } 86 | } 87 | 88 | impl TraceLog { 89 | pub fn with_limit(limit: usize) -> TraceLog { 90 | let trace_log_impl = TraceLogImpl { 91 | events: VecDeque::with_capacity(limit), 92 | limit: limit, 93 | }; 94 | TraceLog { 95 | log: Arc::new(Mutex::new(trace_log_impl)), 96 | epoch: PreciseTime::now(), 97 | frame_number: 0, 98 | } 99 | } 100 | 101 | /// Increments the frame number and returns the current frame number. 102 | pub fn inc_frame_number(&mut self) -> u32 { 103 | self.frame_number += 1; 104 | self.frame_number 105 | } 106 | 107 | /// Starts a new trace event. When the returned value goes out of scope, it 108 | /// is added to the log with the correct end time. 109 | pub fn scoped(&self, description: &'static str, id: u32) -> ScopedTraceEvent { 110 | ScopedTraceEvent { 111 | start: PreciseTime::now(), 112 | description: description, 113 | frame: self.frame_number, 114 | id: id, 115 | log: self.log.clone(), 116 | handled: false, 117 | } 118 | } 119 | 120 | /// Writes the trace as a json string in the trace log format that can be 121 | /// read by Chrome’s trace viewer (chrome://tracing). 122 | pub fn export(&self, output: &mut W) -> io::Result<()> { 123 | try!(write!(output, "{{\"traceEvents\":[")); 124 | let mut is_first = true; 125 | for event in self.log.lock().unwrap().events.iter() { 126 | if !is_first { 127 | try!(write!(output, ",")); 128 | } 129 | let ts = self.epoch.to(event.start).num_microseconds().unwrap(); 130 | let dur = event.start.to(event.end).num_microseconds().unwrap(); 131 | try!(write!(output, "{{\"name\":\"{0}\",\ 132 | \"cat\":\"\",\ 133 | \"ph\":\"X\",\ 134 | \"ts\":{1},\ 135 | \"dur\":{2},\ 136 | \"pid\":0,\ 137 | \"tid\":{3},\ 138 | \"args\":{{\ 139 | \"frame\":{4},\ 140 | \"id\":{5}}}}}", 141 | event.description, ts, dur, event.tid, 142 | event.frame, event.id)); 143 | is_first = false; 144 | } 145 | write!(output, "],\"displayTimeUnit\":\"ms\"}}") 146 | } 147 | 148 | /// Writes the trace to a json file. 149 | pub fn export_to_file>(&self, path: P) -> io::Result<()> { 150 | let mut file = try!(File::create(path)); 151 | self.export(&mut file) 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /src/triangle.rs: -------------------------------------------------------------------------------- 1 | // Convector -- An interactive CPU path tracer 2 | // Copyright 2016 Ruud van Asseldonk 3 | 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License version 3. A copy 6 | // of the License is available in the root of the repository. 7 | 8 | //! This module implement the triangle primitive and related geometry functions. 9 | //! 10 | //! The only primitive is the triangle, there are no spheres or other shapes. 11 | //! This avoids having to dispatch on the primitive type to intersect an object. 12 | //! It avoids a virtual method call, which in turn enables the triangle 13 | //! intersection code to be inlined. 14 | 15 | use material::{SMaterial, MMaterial}; 16 | use ray::{MIntersection, MRay}; 17 | use simd::Mf32; 18 | use vector3::{MVector3, SVector3}; 19 | 20 | #[cfg(test)] 21 | use {bench, test}; 22 | 23 | #[derive(Clone, Debug)] 24 | pub struct Triangle { 25 | pub v0: SVector3, 26 | pub v1: SVector3, 27 | pub v2: SVector3, 28 | pub uv0: (f32, f32), 29 | pub uv1: (f32, f32), 30 | pub uv2: (f32, f32), 31 | pub material: SMaterial, 32 | } 33 | 34 | /// The result of intersecting a triangle to compute a probability density. 35 | pub struct MDirectIntersection { 36 | pub normal: MVector3, 37 | pub area: Mf32, 38 | pub distance: Mf32, 39 | pub mask: Mf32, 40 | } 41 | 42 | impl Triangle { 43 | pub fn new(v0: SVector3, v1: SVector3, v2: SVector3, mat: SMaterial) -> Triangle { 44 | Triangle { 45 | v0: v0, 46 | v1: v1, 47 | v2: v2, 48 | uv0: (0.0, 0.0), 49 | uv1: (0.0, 0.0), 50 | uv2: (0.0, 0.0), 51 | material: mat, 52 | } 53 | } 54 | 55 | pub fn barycenter(&self) -> SVector3 { 56 | (self.v0 + self.v1 + self.v2) * 3.0f32.recip() 57 | } 58 | 59 | pub fn intersect(&self, ray: &MRay, isect: MIntersection) -> MIntersection { 60 | // One would expect that if the triangle were represented as 61 | // (v0, e1, e2) instead of (v0, v1, v2), that would be faster because we 62 | // could avoid the subtractions here. My measurements show that the 63 | // converse is true. 64 | // TODO: Add a proper benchmark. 65 | let v0 = MVector3::broadcast(self.v0); 66 | 67 | // Note: broadcasting before doing the subtract, although it seems to 68 | // silly, improves performance by ~5 ns per intersection (25%). 69 | let e1 = MVector3::broadcast(self.v0) - MVector3::broadcast(self.v2); 70 | let e2 = MVector3::broadcast(self.v1) - MVector3::broadcast(self.v0); 71 | 72 | // All points P on the plane in which the triangle lies satisfy the 73 | // equation (P . normal) = c for a unique constant c determined by the 74 | // plane. (The dot denotes the dot product here.) To intersect the ray 75 | // with the plane, solve the equation (O + tD) . normal = c, where O 76 | // is the origin of the ray and D the direction. Note: if the ray 77 | // direction D is normalized, then t is the distance from the ray origin 78 | // to the plane. There is no need to normalize the triangle normal at 79 | // this point, because it appears both in the numerator and denominator. 80 | let normal_denorm = e1.cross(e2); 81 | let from_ray = v0 - ray.origin; 82 | 83 | // Use a true division (_mm256_div_ps), not the reciprocal approximation 84 | // (_mm256_rcp_ps) because the approximation is too inaccurate and 85 | // causes visual artifacts. The alternative is to use the approximation 86 | // with one Newton iteration, but that is slightly slower than just 87 | // doing the division. (Even though the microbenchmarks show that 88 | // `recip_precise` is faster than the division, when used in this 89 | // method, the division is faster.) 90 | let denom = Mf32::one() / ray.direction.dot(normal_denorm); 91 | let t = from_ray.dot(normal_denorm) * denom; 92 | 93 | // If the potential intersection is further away than the current 94 | // intersection for all of the rays, it is possible to early out. This 95 | // cranks up the number of branches from 209M/s to 256M/s and the 96 | // misprediction rate from 0.66% to 1.11%. Surprisingly, there is no 97 | // significant effect on the framerate. It appears that the early out 98 | // wins almost exactly cancel the mispredict penalty on my Skylake i7. 99 | // I opt for not poisioning the branch prediction cache here. 100 | 101 | // if (t - isect.distance).all_sign_bits_positive() { 102 | // return isect 103 | // } 104 | 105 | // Express the location of the intersection in terms of the basis for 106 | // the plane given by (-e1, e2). The computation of u and v is based on 107 | // the method in this paper (there they are called alpha and beta): 108 | // https://www.cs.utah.edu/~aek/research/triangle.pdf 109 | let cross = ray.direction.cross(from_ray); 110 | let u = cross.dot(e2) * denom; 111 | let v = cross.dot(e1) * denom; 112 | let w = (Mf32::one() - u) - v; 113 | 114 | // In this coordinate system, the triangle is the set of points such 115 | // { (u, v) in plane | u >= 0 and v >= 0 and u + v <= 1 } 116 | 117 | // We need t to be positive, because we should not intersect backwards. 118 | // Also, u and v need to be positive. We can abuse the vblendvps 119 | // instruction, which considers only the sign bit, so if t, u, v, and w 120 | // all have sign bit set to 0 (positive), then their bitwise or will 121 | // have so too. If w is positive then u + v < 1.0. 122 | let mask_positive = (t | u) | (v | w); 123 | 124 | // The intersection also needs to be closer than any previous 125 | // intersection. (Again, do the reverse comparison because sign bit 1 126 | // means discard intersection.) 127 | let mask_closer = t.geq(isect.distance); 128 | 129 | // Interpolate the texture coordinates. 130 | let (tx0x, tx0y) = (Mf32::broadcast(self.uv0.0), Mf32::broadcast(self.uv0.1)); 131 | let (tx1x, tx1y) = (Mf32::broadcast(self.uv1.0), Mf32::broadcast(self.uv1.1)); 132 | let (tx2x, tx2y) = (Mf32::broadcast(self.uv2.0), Mf32::broadcast(self.uv2.1)); 133 | let tex_x = tx0x.mul_add(w, tx1x.mul_add(v, tx2x * u)); 134 | let tex_y = tx0y.mul_add(w, tx1y.mul_add(v, tx2y * u)); 135 | 136 | let new_isect = MIntersection { 137 | position: ray.direction.mul_add(t, ray.origin), 138 | normal: normal_denorm.normalized(), 139 | distance: t, 140 | material: MMaterial::broadcast_material(self.material), 141 | tex_coords: (tex_x, tex_y), 142 | }; 143 | 144 | // Per ray, pick the new intersection if it is closer and if it was 145 | // indeed an intersection of the triangle, or pick the previous 146 | // intersection otherwise. 147 | new_isect.pick(&isect, mask_positive | (ray.active | mask_closer)) 148 | } 149 | 150 | /// Intersects the triangle to determine the probability density for the 151 | /// given ray. 152 | pub fn intersect_direct(&self, ray: &MRay) -> MDirectIntersection { 153 | // See `intersect()` for commented version. 154 | let v0 = MVector3::broadcast(self.v0); 155 | let e1 = MVector3::broadcast(self.v0) - MVector3::broadcast(self.v2); 156 | let e2 = MVector3::broadcast(self.v1) - MVector3::broadcast(self.v0); 157 | 158 | let normal_denorm = e1.cross(e2); 159 | let norm_sqr = normal_denorm.norm_squared(); 160 | let rnorm = norm_sqr.rsqrt(); 161 | let area = Mf32::broadcast(0.5) * rnorm.recip_fast(); 162 | let from_ray = v0 - ray.origin; 163 | 164 | // This version does not need to be as accurate as the regular intersect 165 | // because it is only used to estimate probability densities. Hence the 166 | // fast reciprocal approximation is fine here. 167 | let denom = ray.direction.dot(normal_denorm).recip_fast(); 168 | let t = from_ray.dot(normal_denorm) * denom; 169 | 170 | let cross = ray.direction.cross(from_ray); 171 | let u = cross.dot(e2) * denom; 172 | let v = cross.dot(e1) * denom; 173 | 174 | // If the sign bit of mask is 0 (positive), the triangle was 175 | // intersected. 176 | let mask_uv = (u + v).geq(Mf32::one()); 177 | let mask = (u | v) | (t | mask_uv); 178 | 179 | MDirectIntersection { 180 | normal: normal_denorm * rnorm, 181 | distance: t, 182 | area: area, 183 | mask: mask, 184 | } 185 | } 186 | } 187 | 188 | #[test] 189 | fn intersect_triangle() { 190 | use ray::SRay; 191 | 192 | let triangle = Triangle::new( 193 | SVector3::new(0.0, 1.0, 1.0), 194 | SVector3::new(-1.0, -1.0, 1.0), 195 | SVector3::new(1.0, -1.0, 1.0), 196 | SMaterial::white(), 197 | ); 198 | 199 | let r1 = SRay { 200 | origin: SVector3::zero(), 201 | direction: SVector3::new(0.0, 0.0, 1.0), 202 | }; 203 | 204 | let r2 = SRay { 205 | origin: SVector3::new(-1.0, 0.0, 0.0), 206 | direction: SVector3::new(0.0, 0.0, 1.0), 207 | }; 208 | 209 | let ray = MRay::generate(|i| if i % 2 == 0 { r1.clone() } else { r2.clone() }); 210 | 211 | let isect_far = MIntersection::with_max_distance(1e5); 212 | let isect = triangle.intersect(&ray, isect_far); 213 | 214 | println!("distance is {}", isect.distance.0); 215 | assert!(isect.distance.0 < 1.01); 216 | assert!(isect.distance.0 > 0.99); 217 | assert_eq!(isect.distance.1, 1e5); 218 | 219 | let up = MVector3::new(Mf32::zero(), Mf32::zero(), Mf32::one()); 220 | let should_be_origin = isect.position - up; 221 | let should_be_zero = should_be_origin.norm_squared(); 222 | assert!(should_be_zero.0 < 0.01); 223 | } 224 | 225 | #[test] 226 | fn intersect_triangle_direct() { 227 | use ray::SRay; 228 | 229 | let triangle = Triangle::new( 230 | SVector3::new(0.0, 1.0, 1.0), 231 | SVector3::new(-1.0, -1.0, 1.0), 232 | SVector3::new(1.0, -1.0, 1.0), 233 | SMaterial::white(), 234 | ); 235 | 236 | let r1 = SRay { 237 | origin: SVector3::zero(), 238 | direction: SVector3::new(0.0, 0.0, 1.0), 239 | }; 240 | 241 | let r2 = SRay { 242 | origin: SVector3::new(-1.0, 0.0, 0.0), 243 | direction: SVector3::new(0.0, 0.0, 1.0), 244 | }; 245 | 246 | let ray = MRay::generate(|i| if i % 2 == 0 { r1.clone() } else { r2.clone() }); 247 | 248 | let isect_direct = triangle.intersect_direct(&ray); 249 | assert!(isect_direct.distance.0 < 1.01); 250 | assert!(isect_direct.distance.0 > 0.99); 251 | assert!(isect_direct.distance.1 < 1.01); 252 | assert!(isect_direct.distance.1 > 0.99); 253 | 254 | let normal_norm = isect_direct.normal.norm_squared(); 255 | assert!(normal_norm.0 < 1.01); 256 | assert!(normal_norm.0 > 0.99); 257 | assert!(normal_norm.1 < 1.01); 258 | assert!(normal_norm.1 > 0.99); 259 | } 260 | 261 | #[bench] 262 | fn bench_intersect_8_mrays_per_tri(b: &mut test::Bencher) { 263 | let rays = bench::mrays_inward(4096 / 8); 264 | let tris = bench::triangles(4096); 265 | let mut rays_it = rays.iter().cycle(); 266 | let mut tris_it = tris.iter().cycle(); 267 | b.iter(|| { 268 | let triangle = tris_it.next().unwrap(); 269 | for _ in 0..8 { 270 | let ray = rays_it.next().unwrap(); 271 | let isect = MIntersection::with_max_distance(1e5); 272 | test::black_box(triangle.intersect(&ray, isect)); 273 | } 274 | }); 275 | } 276 | 277 | #[bench] 278 | fn bench_intersect_8_tris_per_mray(b: &mut test::Bencher) { 279 | let rays = bench::mrays_inward(4096 / 8); 280 | let tris = bench::triangles(4096); 281 | let mut rays_it = rays.iter().cycle(); 282 | let mut tris_it = tris.iter().cycle(); 283 | b.iter(|| { 284 | let ray = rays_it.next().unwrap(); 285 | let mut isect = MIntersection::with_max_distance(1e5); 286 | for _ in 0..8 { 287 | let triangle = tris_it.next().unwrap(); 288 | isect = triangle.intersect(&ray, isect); 289 | } 290 | test::black_box(isect); 291 | }); 292 | } 293 | 294 | #[bench] 295 | fn bench_intersect_direct_8_tris_per_mray(b: &mut test::Bencher) { 296 | let rays = bench::mrays_inward(4096 / 8); 297 | let tris = bench::triangles(8); 298 | let mut rays_it = rays.iter().cycle(); 299 | b.iter(|| { 300 | let ray = rays_it.next().unwrap(); 301 | for triangle in &tris { 302 | test::black_box(triangle.intersect_direct(&ray)); 303 | } 304 | }); 305 | } 306 | -------------------------------------------------------------------------------- /src/ui.rs: -------------------------------------------------------------------------------- 1 | // Convector -- An interactive CPU path tracer 2 | // Copyright 2016 Ruud van Asseldonk 3 | 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License version 3. A copy 6 | // of the License is available in the root of the repository. 7 | 8 | //! This module handles user input and getting pixels onto the screen. It uses 9 | //! the Glium library, a safe wrapper around OpenGL. 10 | 11 | use filebuffer::FileBuffer; 12 | use glium::{DisplayBuild, Program, Surface, VertexBuffer}; 13 | use glium::backend::Facade; 14 | use glium::backend::glutin_backend::GlutinFacade; 15 | use glium::glutin::{Event, WindowBuilder}; 16 | use glium::index::{NoIndices, PrimitiveType}; 17 | use glium::texture::{MipmapsOption, RawImage2d, SrgbTexture2d, Texture2d}; 18 | use stats::GlobalStats; 19 | use std::str; 20 | use time::PreciseTime; 21 | 22 | /// Vertex for the full-screen quad. 23 | #[derive(Copy, Clone)] 24 | struct Vertex { 25 | position: [f32; 2], 26 | tex_coords: [f32; 2], 27 | } 28 | 29 | implement_vertex!(Vertex, position, tex_coords); 30 | 31 | /// A full-screen quad that can be rendered by OpenGL. 32 | struct FullScreenQuad { 33 | vertex_buffer: VertexBuffer, 34 | indices: NoIndices, 35 | program_blend: Program, 36 | program_gbuffer: Program, 37 | program_id: Program, 38 | program_median: Program, 39 | } 40 | 41 | impl FullScreenQuad { 42 | /// Sets up the vertex buffer and shader for a full-screen quad. 43 | pub fn new(facade: &F) -> FullScreenQuad { 44 | let vertex1 = Vertex { position: [-1.0, -1.0], tex_coords: [0.0, 0.0] }; 45 | let vertex2 = Vertex { position: [ 1.0, -1.0], tex_coords: [1.0, 0.0] }; 46 | let vertex3 = Vertex { position: [-1.0, 1.0], tex_coords: [0.0, 1.0] }; 47 | let vertex4 = Vertex { position: [ 1.0, 1.0], tex_coords: [1.0, 1.0] }; 48 | let quad = vec![vertex1, vertex2, vertex3, vertex4]; 49 | let vertex_buffer = VertexBuffer::new(facade, &quad).unwrap(); 50 | let indices = NoIndices(PrimitiveType::TriangleStrip); 51 | 52 | let vertex_shader = FileBuffer::open("src/gpu/vertex.glsl") 53 | .expect("failed to load vertex shader source"); 54 | 55 | let program_blend = { 56 | let fragment_shader = FileBuffer::open("src/gpu/blend.glsl") 57 | .expect("failed to load fragment shader source"); 58 | 59 | Program::from_source(facade, 60 | str::from_utf8(&vertex_shader[..]).unwrap(), 61 | str::from_utf8(&fragment_shader[..]).unwrap(), 62 | None) 63 | .unwrap() 64 | }; 65 | 66 | let program_gbuffer = { 67 | let fragment_shader = FileBuffer::open("src/gpu/gbuffer.glsl") 68 | .expect("failed to load fragment shader source"); 69 | 70 | Program::from_source(facade, 71 | str::from_utf8(&vertex_shader[..]).unwrap(), 72 | str::from_utf8(&fragment_shader[..]).unwrap(), 73 | None) 74 | .unwrap() 75 | }; 76 | 77 | let program_id = { 78 | let fragment_shader = FileBuffer::open("src/gpu/id.glsl") 79 | .expect("failed to load fragment shader source"); 80 | 81 | Program::from_source(facade, 82 | str::from_utf8(&vertex_shader[..]).unwrap(), 83 | str::from_utf8(&fragment_shader[..]).unwrap(), 84 | None) 85 | .unwrap() 86 | }; 87 | 88 | let program_median = { 89 | let fragment_shader = FileBuffer::open("src/gpu/median.glsl") 90 | .expect("failed to load fragment shader source"); 91 | 92 | Program::from_source(facade, 93 | str::from_utf8(&vertex_shader[..]).unwrap(), 94 | str::from_utf8(&fragment_shader[..]).unwrap(), 95 | None) 96 | .unwrap() 97 | }; 98 | 99 | FullScreenQuad { 100 | vertex_buffer: vertex_buffer, 101 | indices: indices, 102 | program_blend: program_blend, 103 | program_gbuffer: program_gbuffer, 104 | program_id: program_id, 105 | program_median: program_median, 106 | } 107 | } 108 | 109 | /// Renders the frames blended to the target surface. 110 | pub fn draw_blended(&self, target: &mut S, frames: &[Texture2d]) { 111 | let uniforms = uniform! { 112 | frame0: &frames[0], 113 | frame1: &frames[1], 114 | frame2: &frames[2], 115 | frame3: &frames[3], 116 | frame4: &frames[4], 117 | frame5: &frames[5], 118 | frame6: &frames[6], 119 | frame7: &frames[7], 120 | }; 121 | target.draw(&self.vertex_buffer, 122 | &self.indices, 123 | &self.program_blend, 124 | &uniforms, 125 | &Default::default()) 126 | .expect("failed to draw quad"); 127 | } 128 | 129 | /// Renders a single frame to the target surface. 130 | pub fn draw_single(&self, target: &mut S, frame: &Texture2d) { 131 | // Draw blended as well, but blend between the same frame. 132 | let uniforms = uniform! { 133 | frame0: frame, 134 | frame1: frame, 135 | frame2: frame, 136 | frame3: frame, 137 | frame4: frame, 138 | frame5: frame, 139 | frame6: frame, 140 | frame7: frame, 141 | }; 142 | target.draw(&self.vertex_buffer, 143 | &self.indices, 144 | &self.program_blend, 145 | &uniforms, 146 | &Default::default()) 147 | .expect("failed to draw quad"); 148 | } 149 | 150 | /// Draws the source onto the target. 151 | /// 152 | /// This does not have the same effect as using `source.fill()`, because 153 | /// that does not apply the linear RGB -> sRGB conversion when the target is 154 | /// the framebuffer, whereas this method does (this is handled automatically 155 | /// by OpenGL). 156 | pub fn draw_id(&self, target: &mut S, source: &Texture2d) { 157 | let uniforms = uniform! { 158 | frame: source, 159 | }; 160 | target.draw(&self.vertex_buffer, 161 | &self.indices, 162 | &self.program_id, 163 | &uniforms, 164 | &Default::default()) 165 | .expect("failed to draw quad"); 166 | } 167 | 168 | /// Applies the gbuffer shader for texture filtering. 169 | pub fn draw_gbuffer(&self, 170 | target: &mut S, 171 | frame: &Texture2d, 172 | gbuffer: &Texture2d, 173 | textures: &[SrgbTexture2d]) { 174 | let uniforms = uniform! { 175 | frame: frame, 176 | gbuffer: gbuffer, 177 | texture1: &textures[0], 178 | texture2: &textures[1], 179 | }; 180 | target.draw(&self.vertex_buffer, 181 | &self.indices, 182 | &self.program_gbuffer, 183 | &uniforms, 184 | &Default::default()) 185 | .expect("failed to draw quad"); 186 | } 187 | 188 | /// Applies a median filter to the source and draws that to the target. 189 | pub fn draw_median(&self, 190 | target: &mut S, 191 | source: &Texture2d, 192 | width: u32, 193 | height: u32) { 194 | let uniforms = uniform! { 195 | frame: source, 196 | pixel_size: [1.0 / width as f32, 1.0 / height as f32], 197 | }; 198 | target.draw(&self.vertex_buffer, 199 | &self.indices, 200 | &self.program_median, 201 | &uniforms, 202 | &Default::default()) 203 | .expect("failed to draw quad"); 204 | } 205 | } 206 | 207 | pub struct Window { 208 | display: GlutinFacade, 209 | quad: FullScreenQuad, 210 | frames: [Texture2d; 8], 211 | scratch: Texture2d, 212 | gbuffer_texture: Texture2d, 213 | textures: Vec, 214 | frame_index: u32, 215 | enable_blend: bool, 216 | enable_median: bool, 217 | width: u32, 218 | height: u32, 219 | } 220 | 221 | pub enum Action { 222 | DumpTrace, 223 | None, 224 | PrintStats, 225 | Quit, 226 | ToggleDebugView, 227 | ToggleRealtime, 228 | } 229 | 230 | fn black_bitmap(width: u32, height: u32) -> Vec { 231 | let size = width * height * 4; 232 | let mut bitmap = Vec::with_capacity(size as usize); 233 | for _ in 0..size { 234 | bitmap.push(0); 235 | } 236 | bitmap 237 | } 238 | 239 | impl Window { 240 | /// Opens a new window using Glutin. 241 | pub fn new(width: u32, height: u32, title: &str) -> Window { 242 | use std::mem; 243 | 244 | // TODO: Proper HiDPI support. 245 | let display = WindowBuilder::new() 246 | .with_dimensions(width, height) 247 | .with_title(String::from(title)) 248 | .with_srgb(Some(true)) // Automatically convert RGB -> sRGB. 249 | .with_vsync() 250 | .build_glium() 251 | .expect("failed to create gl window"); 252 | 253 | let quad = FullScreenQuad::new(&display); 254 | 255 | let scratch = Texture2d::empty(&display, width, height) 256 | .expect("failed to create scratch texture"); 257 | 258 | let gbuffer_tex = Texture2d::empty(&display, width, height) 259 | .expect("failed to create scratch texture"); 260 | 261 | let mut window = Window { 262 | display: display, 263 | quad: quad, 264 | frames: unsafe { mem::uninitialized() }, 265 | scratch: scratch, 266 | gbuffer_texture: gbuffer_tex, 267 | textures: Vec::new(), 268 | frame_index: 0, 269 | enable_blend: true, 270 | enable_median: true, 271 | width: width, 272 | height: height, 273 | }; 274 | 275 | let f0 = window.upload_frame(black_bitmap(width, height)); 276 | let f1 = window.upload_frame(black_bitmap(width, height)); 277 | let f2 = window.upload_frame(black_bitmap(width, height)); 278 | let f3 = window.upload_frame(black_bitmap(width, height)); 279 | let f4 = window.upload_frame(black_bitmap(width, height)); 280 | let f5 = window.upload_frame(black_bitmap(width, height)); 281 | let f6 = window.upload_frame(black_bitmap(width, height)); 282 | let f7 = window.upload_frame(black_bitmap(width, height)); 283 | let frames = [f0, f1, f2, f3, f4, f5, f6, f7]; 284 | 285 | // Put the frames in place and avoid deallocating uninitialized memory. 286 | mem::forget(mem::replace(&mut window.frames, frames)); 287 | 288 | window 289 | } 290 | 291 | fn upload_frame(&mut self, bitmap: Vec) -> Texture2d { 292 | let dimensions = (self.width, self.height); 293 | let texture_data = RawImage2d::from_raw_rgba(bitmap, dimensions); 294 | let texture = Texture2d::with_mipmaps(&self.display, texture_data, MipmapsOption::NoMipmap) 295 | .expect("failed to create texture"); 296 | texture 297 | } 298 | 299 | /// Uploads a texture to the GPU. This is intended for the textures that are 300 | /// used for the scene, not the full-screen rendered frames. Texture 301 | /// dimensions must be 1024 x 1024. 302 | pub fn upload_texture(&mut self, bitmap: Vec) { 303 | assert_eq!(bitmap.len(), 1024 * 1024 * 3); 304 | 305 | let texture_data = RawImage2d::from_raw_rgb(bitmap, (1024, 1024)); 306 | let texture = 307 | SrgbTexture2d::with_mipmaps(&self.display, texture_data, MipmapsOption::NoMipmap) 308 | .expect("failed to create texture"); 309 | 310 | self.textures.push(texture); 311 | } 312 | 313 | pub fn display_buffer(&mut self, 314 | rgba_buffer: Vec, 315 | gbuffer: Vec, 316 | stats: &mut GlobalStats) { 317 | assert_eq!(rgba_buffer.len(), 318 | self.width as usize * self.height as usize * 4); 319 | 320 | let begin_texture = PreciseTime::now(); 321 | 322 | // Upload the render result to the GPU. It is not yet correct, it needs 323 | // a gbuffer pass to add the textures. 324 | self.scratch = self.upload_frame(rgba_buffer); 325 | self.gbuffer_texture = self.upload_frame(gbuffer); 326 | 327 | // TODO: Fix timers and trace here. 328 | 329 | // Apply the gbuffer pass and render into one of the eight frames that 330 | // are kept on the GPU. 331 | self.frame_index = (self.frame_index + 1) % 8; 332 | let frame_index = self.frame_index as usize; 333 | let mut target = self.frames[frame_index].as_surface(); 334 | self.quad.draw_gbuffer(&mut target, 335 | &self.scratch, 336 | &self.gbuffer_texture, 337 | &self.textures[..]); 338 | 339 | let begin_draw = PreciseTime::now(); 340 | 341 | // Blend the past eight frames together into the scratch texture. (Or 342 | // not, if blending is disabled.) 343 | let mut target = self.scratch.as_surface(); 344 | if self.enable_blend { 345 | self.quad.draw_blended(&mut target, &self.frames[..]); 346 | } else { 347 | self.quad.draw_single(&mut target, &self.frames[frame_index]); 348 | } 349 | 350 | // Apply a median filter to the scratch texture (or not if disabled) and 351 | // display that. Finishing drawing will swap the buffers and wait for a 352 | // vsync. 353 | let mut target = self.display.draw(); 354 | if self.enable_median { 355 | self.quad.draw_median(&mut target, &self.scratch, self.width, self.height); 356 | } else { 357 | self.quad.draw_id(&mut target, &self.scratch); 358 | } 359 | target.finish().expect("failed to swap buffers"); 360 | 361 | let end_draw = PreciseTime::now(); 362 | stats.tex_upload_us.insert_time_us(begin_texture.to(begin_draw)); 363 | stats.draw_vsync_us.insert_time_us(begin_draw.to(end_draw)); 364 | } 365 | 366 | /// Handles all window events and returns an action to be performed. 367 | pub fn handle_events(&mut self) -> Action { 368 | for ev in self.display.poll_events() { 369 | match ev { 370 | // Window was closed by the user. 371 | Event::Closed => return Action::Quit, 372 | // The user pressed 'b' to toggle blending. 373 | Event::ReceivedCharacter('b') => self.enable_blend = !self.enable_blend, 374 | // The user pressed 'd' to toggle debug view. 375 | Event::ReceivedCharacter('d') => return Action::ToggleDebugView, 376 | // The user pressed 'm' to toggle the median filter. 377 | Event::ReceivedCharacter('m') => self.enable_median = !self.enable_median, 378 | // The user pressed 'q' for quit. 379 | Event::ReceivedCharacter('q') => return Action::Quit, 380 | // The user pressed 'r' to toggle the render mode. 381 | Event::ReceivedCharacter('r') => return Action::ToggleRealtime, 382 | // The user pressed 's' for stats. 383 | Event::ReceivedCharacter('s') => return Action::PrintStats, 384 | // The user pressed 't' for trace. 385 | Event::ReceivedCharacter('t') => return Action::DumpTrace, 386 | // Something else. 387 | _ => (), 388 | } 389 | } 390 | Action::None 391 | } 392 | } 393 | -------------------------------------------------------------------------------- /src/util.rs: -------------------------------------------------------------------------------- 1 | // Convector -- An interactive CPU path tracer 2 | // Copyright 2016 Ruud van Asseldonk 3 | 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License version 3. A copy 6 | // of the License is available in the root of the repository. 7 | 8 | //! A mod with utility functions. 9 | 10 | use alloc::heap; 11 | use std::mem; 12 | 13 | /// Allocates a buffer for the specified number of elements, aligned to a cache 14 | /// line. 15 | pub fn cache_line_aligned_vec(len: usize) -> Vec { 16 | unsafe { 17 | let num_bytes = mem::size_of::() * len; 18 | let cache_line_len = 64; 19 | let buffer = heap::allocate(num_bytes, cache_line_len); 20 | let ptr: *mut T = mem::transmute(buffer); 21 | Vec::from_raw_parts(ptr, 0, len) 22 | } 23 | } 24 | 25 | /// Drops a vector that was constructed with `cache_line_aligned_vec` without 26 | /// crashing on Windows. 27 | pub fn drop_cache_line_aligned_vec(mut v: Vec) { 28 | unsafe { 29 | let ptr: *mut u8 = mem::transmute(v.as_mut_ptr()); 30 | let num_bytes = v.capacity() * mem::size_of::(); 31 | 32 | // Prevent the destructor from freeing anything. 33 | mem::forget(v); 34 | 35 | // Then free manually. 36 | let cache_line_len = 64; 37 | heap::deallocate(ptr, num_bytes, cache_line_len); 38 | } 39 | } 40 | 41 | /// Transmutes the buffer of a vector into a buffer of elements with a different 42 | /// type. The sizes of the types must be multiples of each other. 43 | pub unsafe fn transmute_vec(mut v: Vec) -> Vec { 44 | let cap_bytes = mem::size_of::() * v.capacity(); 45 | let len_bytes = mem::size_of::() * v.len(); 46 | 47 | let new_cap = cap_bytes / mem::size_of::(); 48 | let new_len = len_bytes / mem::size_of::(); 49 | 50 | assert_eq!(cap_bytes, new_cap * mem::size_of::()); 51 | assert_eq!(len_bytes, new_len * mem::size_of::()); 52 | 53 | let ptr: *mut U = mem::transmute(v.as_mut_ptr()); 54 | 55 | // Prevent running the destructor of v, we are going to reuse its internals. 56 | mem::forget(v); 57 | 58 | Vec::from_raw_parts(ptr, new_len, new_cap) 59 | } 60 | 61 | /// Transmutes an immutable slice into a mutable slice. 62 | #[allow(mutable_transmutes)] 63 | pub unsafe fn make_mutable(x: &[T]) -> &mut [T] { 64 | // UnsafeCell is a real pain to deal with; after 15 minutes I still did not 65 | // manage to write something that compiles. Just transmute the mutability 66 | // in. 67 | mem::transmute(x) 68 | } 69 | 70 | /// Builds a fixed-size slice by calling f for every index. 71 | pub fn generate_slice8(mut f: F) -> [T; 8] 72 | where F: FnMut(usize) -> T 73 | { 74 | [f(0), f(1), f(2), f(3), f(4), f(5), f(6), f(7)] 75 | } 76 | -------------------------------------------------------------------------------- /src/vector3.rs: -------------------------------------------------------------------------------- 1 | // Convector -- An interactive CPU path tracer 2 | // Copyright 2016 Ruud van Asseldonk 3 | 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License version 3. A copy 6 | // of the License is available in the root of the repository. 7 | 8 | //! Implements vectors in R3. 9 | 10 | use simd::{Mask, Mf32}; 11 | use std::f32; 12 | use std::fmt; 13 | use std::ops::{Add, Sub, Neg, Mul}; 14 | 15 | #[cfg(test)] 16 | use {bench, test}; 17 | 18 | #[derive(Copy, Clone, Debug, PartialEq)] 19 | pub struct SVector3 { 20 | pub x: f32, 21 | pub y: f32, 22 | pub z: f32, 23 | } 24 | 25 | #[derive(Copy, Clone, Debug, PartialEq)] 26 | pub struct MVector3 { 27 | pub x: Mf32, 28 | pub y: Mf32, 29 | pub z: Mf32, 30 | } 31 | 32 | #[derive(Copy, Clone, Debug)] 33 | pub enum Axis { 34 | X, 35 | Y, 36 | Z, 37 | } 38 | 39 | impl SVector3 { 40 | pub fn new(x: f32, y: f32, z: f32) -> SVector3 { 41 | SVector3 { x: x, y: y, z: z } 42 | } 43 | 44 | pub fn zero() -> SVector3 { 45 | SVector3::new(0.0, 0.0, 0.0) 46 | } 47 | 48 | pub fn one() -> SVector3 { 49 | SVector3::new(1.0, 1.0, 1.0) 50 | } 51 | 52 | #[inline(always)] 53 | pub fn cross_naive(self: SVector3, other: SVector3) -> SVector3 { 54 | let (a, b) = (self, other); 55 | SVector3 { 56 | x: a.y * b.z - a.z * b.y, 57 | y: a.z * b.x - a.x * b.z, 58 | z: a.x * b.y - a.y * b.x, 59 | } 60 | } 61 | 62 | #[inline(always)] 63 | pub fn cross_fma(self: SVector3, other: SVector3) -> SVector3 { 64 | let (a, b) = (self, other); 65 | SVector3 { 66 | x: a.y.mul_add(b.z, -a.z * b.y), 67 | y: a.z.mul_add(b.x, -a.x * b.z), 68 | z: a.x.mul_add(b.y, -a.y * b.x), 69 | } 70 | } 71 | 72 | pub fn cross(self, other: SVector3) -> SVector3 { 73 | // Benchmarks show that the FMA version is faster than the 74 | // naive version (1.9 ns vs 2.1 ns on my Skylake i7). **However** 75 | // the "fma" codegen feature must be enabled, otherwise the naive 76 | // version is faster. 77 | self.cross_fma(other) 78 | } 79 | 80 | #[inline(always)] 81 | pub fn dot_naive(self, other: SVector3) -> f32 { 82 | let (a, b) = (self, other); 83 | a.x * b.x + a.y * b.y + a.z * b.z 84 | } 85 | 86 | #[inline(always)] 87 | pub fn dot_fma(self, other: SVector3) -> f32 { 88 | let (a, b) = (self, other); 89 | a.x.mul_add(b.x, a.y.mul_add(b.y, a.z * b.z)) 90 | } 91 | 92 | pub fn dot(self, other: SVector3) -> f32 { 93 | // Benchmarks show that the naive version is faster than the FMA version 94 | // when the "fma" codegen feature is not enabled, but when it is the 95 | // performance is similar. The FMA version appears to be slightly more 96 | // stable. 97 | self.dot_fma(other) 98 | } 99 | 100 | pub fn norm_squared(self) -> f32 { 101 | self.dot(self) 102 | } 103 | 104 | pub fn normalized(self) -> SVector3 { 105 | let norm_squared = self.norm_squared(); 106 | if norm_squared == 0.0 { 107 | self 108 | } else { 109 | let rnorm = norm_squared.sqrt().recip(); 110 | SVector3 { 111 | x: self.x * rnorm, 112 | y: self.y * rnorm, 113 | z: self.z * rnorm, 114 | } 115 | } 116 | } 117 | 118 | pub fn get_coord(self, axis: Axis) -> f32 { 119 | match axis { 120 | Axis::X => self.x, 121 | Axis::Y => self.y, 122 | Axis::Z => self.z, 123 | } 124 | } 125 | 126 | /// Returns the coordinatewise minimum of the two vectors. 127 | pub fn min(self, other: SVector3) -> SVector3 { 128 | SVector3 { 129 | x: f32::min(self.x, other.x), 130 | y: f32::min(self.y, other.y), 131 | z: f32::min(self.z, other.z), 132 | } 133 | } 134 | 135 | /// Returns the coordinatewise maximum of the two vectors. 136 | pub fn max(self, other: SVector3) -> SVector3 { 137 | SVector3 { 138 | x: f32::max(self.x, other.x), 139 | y: f32::max(self.y, other.y), 140 | z: f32::max(self.z, other.z), 141 | } 142 | } 143 | } 144 | 145 | impl MVector3 { 146 | pub fn new(x: Mf32, y: Mf32, z: Mf32) -> MVector3 { 147 | MVector3 { x: x, y: y, z: z } 148 | } 149 | 150 | pub fn zero() -> MVector3 { 151 | MVector3::new(Mf32::zero(), Mf32::zero(), Mf32::zero()) 152 | } 153 | 154 | pub fn broadcast(a: SVector3) -> MVector3 { 155 | MVector3 { 156 | x: Mf32::broadcast(a.x), 157 | y: Mf32::broadcast(a.y), 158 | z: Mf32::broadcast(a.z), 159 | } 160 | } 161 | 162 | /// Builds an mvector by applying the function to the numbers 0..7. 163 | /// 164 | /// Note: this is essentially a transpose, avoid in hot code. 165 | pub fn generate(mut f: F) -> MVector3 166 | where F: FnMut(usize) -> SVector3 167 | { 168 | MVector3 { 169 | x: Mf32::generate(|i| f(i).x), 170 | y: Mf32::generate(|i| f(i).y), 171 | z: Mf32::generate(|i| f(i).z), 172 | } 173 | } 174 | 175 | #[inline(always)] 176 | pub fn cross_naive(self, other: MVector3) -> MVector3 { 177 | let (a, b) = (self, other); 178 | MVector3 { 179 | x: a.y * b.z - a.z * b.y, 180 | y: a.z * b.x - a.x * b.z, 181 | z: a.x * b.y - a.y * b.x, 182 | } 183 | } 184 | 185 | #[inline(always)] 186 | pub fn cross_fma(self, other: MVector3) -> MVector3 { 187 | let (a, b) = (self, other); 188 | MVector3 { 189 | x: a.y.mul_sub(b.z, a.z * b.y), 190 | y: a.z.mul_sub(b.x, a.x * b.z), 191 | z: a.x.mul_sub(b.y, a.y * b.x), 192 | } 193 | } 194 | 195 | pub fn cross(self, other: MVector3) -> MVector3 { 196 | // Benchmarks show that the FMA version is faster than the 197 | // naive version (2.1 ns vs 2.4 ns on my Skylake i7). 198 | self.cross_fma(other) 199 | } 200 | 201 | #[inline(always)] 202 | pub fn dot_naive(self, other: MVector3) -> Mf32 { 203 | let (a, b) = (self, other); 204 | a.x * b.x + a.y * b.y + a.z * b.z 205 | } 206 | 207 | #[inline(always)] 208 | pub fn dot_fma(self, other: MVector3) -> Mf32 { 209 | let (a, b) = (self, other); 210 | a.x.mul_add(b.x, a.y.mul_add(b.y, a.z * b.z)) 211 | } 212 | 213 | pub fn dot(self, other: MVector3) -> Mf32 { 214 | // Benchmarks show no performance difference between the naive version 215 | // and the FMA version. Use the naive one because it is more portable. 216 | self.dot_naive(other) 217 | } 218 | 219 | /// Given a vector in the hemisphere with pole at the positive z-axis, 220 | /// rotates the vector into the hemisphere with pole given by the normal. 221 | pub fn rotate_hemisphere(self, normal: MVector3) -> MVector3 { 222 | // If the z-component of the normal is near -1, we might divide by 0. To 223 | // avoid this, if the z-component is negative, flip the normal. Then we 224 | // end up in the wrong hemisphere, so at the end, flip the computed 225 | // vector again. 226 | let n = normal.pick(-normal, normal.z); 227 | 228 | // One option here would be to take the cross product of the normal and 229 | // an up vector, and the cross product of the normal with that vector, 230 | // to get a new orthonormal basis. Then use the old coordinates in this 231 | // new basis. The method below -- however not as simple -- requires less 232 | // arithmetic operations. 233 | // Based on https://math.stackexchange.com/a/61550/6873. 234 | let v = self; 235 | 236 | // Using the fast reciprocal instead of the precise one does hurt 237 | // precision, but this is used only after the first bounce, so it is 238 | // less of an issue, and the performance difference is significant. The 239 | // inaccurate version is about 15% faster. 240 | let rz = (Mf32::one() + n.z).recip_fast(); 241 | 242 | let c = n.x * n.y * rz; 243 | let x = v.x.mul_sub(n.y.mul_add(n.y * rz, n.z), v.y.mul_sub(c, v.z * n.x)); 244 | let y = v.x.neg_mul_add(c, v.y.mul_add(n.x.mul_add(n.x * rz, n.z), v.z * n.y)); 245 | let z = v.x.neg_mul_add(n.x, v.y.neg_mul_add(n.y, v.z * n.z)); 246 | 247 | let result = MVector3::new(x, y, z); 248 | 249 | // If we flipped the normal, flip the result too. 250 | result.pick(-result, normal.z) 251 | } 252 | 253 | /// Scalar multiplication and vector add using fused multiply-add. 254 | pub fn mul_add(self, factor: Mf32, other: MVector3) -> MVector3 { 255 | MVector3 { 256 | x: self.x.mul_add(factor, other.x), 257 | y: self.y.mul_add(factor, other.y), 258 | z: self.z.mul_add(factor, other.z), 259 | } 260 | } 261 | 262 | /// Scalar multiplication with -factor and vector add using fused multiply-add. 263 | pub fn neg_mul_add(self, factor: Mf32, other: MVector3) -> MVector3 { 264 | MVector3 { 265 | x: self.x.neg_mul_add(factor, other.x), 266 | y: self.y.neg_mul_add(factor, other.y), 267 | z: self.z.neg_mul_add(factor, other.z), 268 | } 269 | } 270 | 271 | /// Scalar multiplication and vector subtract using fused multiply-subtract. 272 | pub fn mul_sub(self, factor: Mf32, other: MVector3) -> MVector3 { 273 | MVector3 { 274 | x: self.x.mul_sub(factor, other.x), 275 | y: self.y.mul_sub(factor, other.y), 276 | z: self.z.mul_sub(factor, other.z), 277 | } 278 | } 279 | 280 | /// Multiplies two vectors coordinatewise. 281 | pub fn mul_coords(self, factors: MVector3) -> MVector3 { 282 | MVector3 { 283 | x: self.x * factors.x, 284 | y: self.y * factors.y, 285 | z: self.z * factors.z, 286 | } 287 | } 288 | 289 | /// Returns ||self|| * ||self||. 290 | pub fn norm_squared(self) -> Mf32 { 291 | self.dot(self) 292 | } 293 | 294 | /// Returns 1 / ||self||. 295 | pub fn rnorm(self) -> Mf32 { 296 | self.norm_squared().rsqrt() 297 | } 298 | 299 | pub fn normalized(self) -> MVector3 { 300 | let rnorm = self.rnorm(); 301 | MVector3 { 302 | x: self.x * rnorm, 303 | y: self.y * rnorm, 304 | z: self.z * rnorm, 305 | } 306 | } 307 | 308 | /// Clamps every coordinate to 1.0 if it exceeds 1.0. 309 | pub fn clamp_one(self) -> MVector3 { 310 | MVector3 { 311 | x: Mf32::one().min(self.x), 312 | y: Mf32::one().min(self.y), 313 | z: Mf32::one().min(self.z), 314 | } 315 | } 316 | 317 | /// Picks self if the sign bit of mask is 0, or picks other if it is 1. 318 | pub fn pick(self, other: MVector3, mask: Mask) -> MVector3 { 319 | MVector3 { 320 | x: self.x.pick(other.x, mask), 321 | y: self.y.pick(other.y, mask), 322 | z: self.z.pick(other.z, mask), 323 | } 324 | } 325 | 326 | /// Returns whether all components are finite. 327 | /// 328 | /// This is slow, use only for diagnostic purposes. 329 | pub fn all_finite(self) -> bool { 330 | self.x.all_finite() && self.y.all_finite() && self.z.all_finite() 331 | } 332 | } 333 | 334 | impl Add for SVector3 { 335 | type Output = SVector3; 336 | 337 | fn add(self, other: SVector3) -> SVector3 { 338 | SVector3 { 339 | x: self.x + other.x, 340 | y: self.y + other.y, 341 | z: self.z + other.z, 342 | } 343 | } 344 | } 345 | 346 | impl Add for MVector3 { 347 | type Output = MVector3; 348 | 349 | fn add(self, other: MVector3) -> MVector3 { 350 | MVector3 { 351 | x: self.x + other.x, 352 | y: self.y + other.y, 353 | z: self.z + other.z, 354 | } 355 | } 356 | } 357 | 358 | impl Sub for SVector3 { 359 | type Output = SVector3; 360 | 361 | fn sub(self, other: SVector3) -> SVector3 { 362 | SVector3 { 363 | x: self.x - other.x, 364 | y: self.y - other.y, 365 | z: self.z - other.z, 366 | } 367 | } 368 | } 369 | 370 | impl Sub for MVector3 { 371 | type Output = MVector3; 372 | 373 | fn sub(self, other: MVector3) -> MVector3 { 374 | MVector3 { 375 | x: self.x - other.x, 376 | y: self.y - other.y, 377 | z: self.z - other.z, 378 | } 379 | } 380 | } 381 | 382 | impl Neg for SVector3 { 383 | type Output = SVector3; 384 | 385 | fn neg(self) -> SVector3 { 386 | SVector3 { 387 | x: -self.x, 388 | y: -self.y, 389 | z: -self.z, 390 | } 391 | } 392 | } 393 | 394 | impl Neg for MVector3 { 395 | type Output = MVector3; 396 | 397 | fn neg(self) -> MVector3 { 398 | MVector3 { 399 | x: -self.x, 400 | y: -self.y, 401 | z: -self.z, 402 | } 403 | } 404 | } 405 | 406 | impl Mul for SVector3 { 407 | type Output = SVector3; 408 | 409 | fn mul(self, a: f32) -> SVector3 { 410 | SVector3 { 411 | x: self.x * a, 412 | y: self.y * a, 413 | z: self.z * a, 414 | } 415 | } 416 | } 417 | 418 | impl Mul for MVector3 { 419 | type Output = MVector3; 420 | 421 | fn mul(self, a: Mf32) -> MVector3 { 422 | MVector3 { 423 | x: self.x * a, 424 | y: self.y * a, 425 | z: self.z * a, 426 | } 427 | } 428 | } 429 | 430 | impl fmt::Display for SVector3 { 431 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 432 | write!(f, "({}, {}, {})", self.x, self.y, self.z) 433 | } 434 | } 435 | 436 | #[cfg(test)] 437 | fn assert_mvectors_equal(expected: MVector3, computed: MVector3, margin: f32) { 438 | // Test that the vectors are equal, to within floating point inaccuracy 439 | // margins. 440 | let error = (computed - expected).norm_squared(); 441 | assert!((Mf32::broadcast(margin * margin) - error).all_sign_bits_positive(), 442 | "expected: {:?}, computed: {:?}", expected, computed); 443 | } 444 | 445 | #[test] 446 | fn verify_rotate_hemisphere() { 447 | let x = MVector3::new(Mf32::one(), Mf32::zero(), Mf32::zero()); 448 | let y = MVector3::new(Mf32::zero(), Mf32::one(), Mf32::zero()); 449 | let z = MVector3::new(Mf32::zero(), Mf32::zero(), Mf32::one()); 450 | 451 | // If we rotate z -> y, then a vector along x does not change. 452 | assert_mvectors_equal(x, x.rotate_hemisphere(y), 1e-3); 453 | 454 | // Same for z -> x, then y does not change. 455 | assert_mvectors_equal(y, y.rotate_hemisphere(x), 1e-3); 456 | 457 | // If we rotate z -> y about the x-axis, then y rotates to -z. 458 | assert_mvectors_equal(-z, y.rotate_hemisphere(y), 1e-3); 459 | 460 | // If we rotate z -> x about the y-axis, then x rotates to -z. 461 | assert_mvectors_equal(-z, x.rotate_hemisphere(x), 1e-3); 462 | 463 | // A starting normal of positive z is assumed, so picking that should not 464 | // change anything. 465 | assert_mvectors_equal(x, x.rotate_hemisphere(z), 1e-3); 466 | assert_mvectors_equal(y, y.rotate_hemisphere(z), 1e-3); 467 | } 468 | 469 | #[test] 470 | fn rotate_hemisphere_is_orthogonal() { 471 | let ns = bench::mvectors_on_unit_sphere(4096); 472 | let xs = bench::mvectors_on_unit_sphere(4096); 473 | let ys = bench::mvectors_on_unit_sphere(4096); 474 | 475 | for (&n, (&x, &y)) in ns.iter().zip(xs.iter().zip(ys.iter())) { 476 | let sum_before = x + y; 477 | let sum_mapped = sum_before.rotate_hemisphere(n); 478 | let n2_before = sum_before.norm_squared(); 479 | let n2_after = sum_mapped.norm_squared(); 480 | 481 | // An orthogonal map does not change the length of vectors. 482 | let error = (n2_before - n2_after) * (n2_before - n2_after); 483 | assert!((Mf32::broadcast(1e-5) - error).all_sign_bits_positive(), 484 | "expected equal norm, got {:?} and {:?}", n2_before, n2_after); 485 | 486 | // Also, an orthogonal map is linear. 487 | let sum_after = x.rotate_hemisphere(n) + y.rotate_hemisphere(n); 488 | assert_mvectors_equal(sum_mapped, sum_after, 1e-3); 489 | } 490 | } 491 | 492 | #[test] 493 | fn rotate_hemisphere_extrema() { 494 | let x = MVector3::new(Mf32::one(), Mf32::zero(), Mf32::zero()); 495 | let y = MVector3::new(Mf32::zero(), Mf32::one(), Mf32::zero()); 496 | let z = MVector3::new(Mf32::zero(), Mf32::zero(), Mf32::one()); 497 | 498 | // The rotation code breaks down at a normal vector -z due to division by 499 | // zero. This edge case should be handled correctly. 500 | 501 | // If we rotate z -> -z, then a vector along x flips sign. 502 | assert_eq!(x.rotate_hemisphere(-z), -x); 503 | 504 | // A vector along y should change sign too, because the hemisphere is 505 | // flipped if the z-component of the normal is negative. 506 | assert_eq!(y.rotate_hemisphere(-z), -y); 507 | 508 | // The z-axis itself should just rotate along. 509 | assert_eq!(z.rotate_hemisphere(-z), -z); 510 | } 511 | 512 | #[test] 513 | fn rotate_hemisphere_random() { 514 | use random::Rng; 515 | let x = MVector3::new(Mf32::one(), Mf32::zero(), Mf32::zero()); 516 | let epsilon = Mf32::broadcast(0.0001); 517 | let mut rng = Rng::with_seed(1, 2, 3); 518 | let mut had_negative_y = false; 519 | let mut had_positive_y = false; 520 | let mut had_negative_z = false; 521 | let mut had_positive_z = false; 522 | for _ in 0..4096 { 523 | let v = rng.sample_hemisphere_vector(); 524 | let w = v.rotate_hemisphere(x); 525 | assert!((w.x + epsilon).all_sign_bits_positive(), 526 | "when rotating {:?} to the positive x-axis, no x-coordinate should be negative, \ 527 | but the result is {:?}", v, w); 528 | 529 | // After rotation, (y, z) should lie on a circle, and every sign should 530 | // occur for these two coordinates. 531 | had_negative_y = had_negative_y || !(epsilon + w.y).all_sign_bits_positive(); 532 | had_positive_y = had_positive_y || !(epsilon - w.y).all_sign_bits_positive(); 533 | had_negative_z = had_negative_z || !(epsilon + w.z).all_sign_bits_positive(); 534 | had_positive_z = had_positive_z || !(epsilon - w.z).all_sign_bits_positive(); 535 | } 536 | assert!(had_negative_y); 537 | assert!(had_positive_y); 538 | assert!(had_negative_z); 539 | assert!(had_positive_z); 540 | } 541 | 542 | macro_rules! unroll_10 { 543 | { $x: block } => { 544 | $x $x $x $x $x $x $x $x $x $x 545 | } 546 | } 547 | 548 | #[bench] 549 | fn bench_scross_naive_1000(bencher: &mut test::Bencher) { 550 | let vectors = bench::svector3_pairs(4096); 551 | let mut vectors_it = vectors.iter().cycle(); 552 | bencher.iter(|| { 553 | let &(a, b) = vectors_it.next().unwrap(); 554 | for _ in 0..100 { 555 | unroll_10! {{ 556 | test::black_box(test::black_box(a).cross_naive(test::black_box(b))); 557 | }}; 558 | } 559 | }); 560 | } 561 | 562 | #[bench] 563 | fn bench_scross_fma_1000(bencher: &mut test::Bencher) { 564 | let vectors = bench::svector3_pairs(4096); 565 | let mut vectors_it = vectors.iter().cycle(); 566 | bencher.iter(|| { 567 | let &(a, b) = vectors_it.next().unwrap(); 568 | for _ in 0..100 { 569 | unroll_10! {{ 570 | test::black_box(test::black_box(a).cross_fma(test::black_box(b))); 571 | }}; 572 | } 573 | }); 574 | } 575 | 576 | #[bench] 577 | fn bench_mcross_naive_1000(bencher: &mut test::Bencher) { 578 | let vectors = bench::mvector3_pairs(4096 / 8); 579 | let mut vectors_it = vectors.iter().cycle(); 580 | bencher.iter(|| { 581 | let &(a, b) = vectors_it.next().unwrap(); 582 | for _ in 0..100 { 583 | unroll_10! {{ 584 | test::black_box(test::black_box(a).cross_naive(test::black_box(b))); 585 | }}; 586 | } 587 | }); 588 | } 589 | 590 | #[bench] 591 | fn bench_mcross_fma_1000(bencher: &mut test::Bencher) { 592 | let vectors = bench::mvector3_pairs(4096 / 8); 593 | let mut vectors_it = vectors.iter().cycle(); 594 | bencher.iter(|| { 595 | let &(a, b) = vectors_it.next().unwrap(); 596 | for _ in 0..100 { 597 | unroll_10! {{ 598 | test::black_box(test::black_box(a).cross_fma(test::black_box(b))); 599 | }}; 600 | } 601 | }); 602 | } 603 | 604 | #[bench] 605 | fn bench_sdot_naive_1000(bencher: &mut test::Bencher) { 606 | let vectors = bench::svector3_pairs(4096); 607 | let mut vectors_it = vectors.iter().cycle(); 608 | bencher.iter(|| { 609 | let &(a, b) = vectors_it.next().unwrap(); 610 | for _ in 0..100 { 611 | unroll_10! {{ 612 | test::black_box(test::black_box(a).dot_naive(test::black_box(b))); 613 | }}; 614 | } 615 | }); 616 | } 617 | 618 | #[bench] 619 | fn bench_sdot_fma_1000(bencher: &mut test::Bencher) { 620 | let vectors = bench::svector3_pairs(4096); 621 | let mut vectors_it = vectors.iter().cycle(); 622 | bencher.iter(|| { 623 | let &(a, b) = vectors_it.next().unwrap(); 624 | for _ in 0..100 { 625 | unroll_10! {{ 626 | test::black_box(test::black_box(a).dot_fma(test::black_box(b))); 627 | }}; 628 | } 629 | }); 630 | } 631 | 632 | #[bench] 633 | fn bench_mdot_naive_1000(bencher: &mut test::Bencher) { 634 | let vectors = bench::mvector3_pairs(4096 / 8); 635 | let mut vectors_it = vectors.iter().cycle(); 636 | bencher.iter(|| { 637 | let &(a, b) = vectors_it.next().unwrap(); 638 | for _ in 0..100 { 639 | unroll_10! {{ 640 | test::black_box(test::black_box(a).dot_naive(test::black_box(b))); 641 | }}; 642 | } 643 | }); 644 | } 645 | 646 | #[bench] 647 | fn bench_mdot_fma_1000(bencher: &mut test::Bencher) { 648 | let vectors = bench::mvector3_pairs(4096 / 8); 649 | let mut vectors_it = vectors.iter().cycle(); 650 | bencher.iter(|| { 651 | let &(a, b) = vectors_it.next().unwrap(); 652 | for _ in 0..100 { 653 | unroll_10! {{ 654 | test::black_box(test::black_box(a).dot_fma(test::black_box(b))); 655 | }}; 656 | } 657 | }); 658 | } 659 | 660 | #[bench] 661 | fn bench_rotate_hemisphere_1000(bencher: &mut test::Bencher) { 662 | let vectors = bench::mvector3_pairs(4096 / 8); 663 | let mut vectors_it = vectors.iter().cycle(); 664 | bencher.iter(|| { 665 | let &(v, n) = vectors_it.next().unwrap(); 666 | for _ in 0..100 { 667 | unroll_10! {{ 668 | test::black_box(test::black_box(v).rotate_hemisphere(test::black_box(n))); 669 | }}; 670 | } 671 | }); 672 | } 673 | -------------------------------------------------------------------------------- /src/wavefront.rs: -------------------------------------------------------------------------------- 1 | // Convector -- An interactive CPU path tracer 2 | // Copyright 2016 Ruud van Asseldonk 3 | 4 | // This program is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU General Public License version 3. A copy 6 | // of the License is available in the root of the repository. 7 | 8 | //! This module reads Wavefront OBJ files. There are crates for that, but 9 | //! reinventing the wheel is much more fun. 10 | 11 | use filebuffer::FileBuffer; 12 | use material::SMaterial; 13 | use std::collections::HashMap; 14 | use std::path::Path; 15 | use std::str::{FromStr, from_utf8}; 16 | use vector3::SVector3; 17 | 18 | pub struct Triangle { 19 | pub vertices: (u32, u32, u32), 20 | pub tex_coords: Option<(u32, u32, u32)>, 21 | pub material: SMaterial, 22 | } 23 | 24 | pub struct Mesh { 25 | pub vertices: Vec, 26 | pub tex_coords: Vec<(f32, f32)>, 27 | pub triangles: Vec, 28 | } 29 | 30 | fn assert_nondegenerate(vertices: &[SVector3], line: u32, i0: u32, i1: u32, i2: u32) { 31 | let v0 = vertices[i0 as usize]; 32 | let v1 = vertices[i1 as usize]; 33 | let v2 = vertices[i2 as usize]; 34 | 35 | // The cross product of two edges must not be zero. If it is, the three 36 | // vertices are collinear. 37 | let e1 = v0 - v2; 38 | let e2 = v1 - v0; 39 | if e1.cross(e2).norm_squared() == 0.0 { 40 | println!("encountered degenerate triangle while loading mesh"); 41 | println!(" line: {}", line); 42 | println!(" vertices: {}, {}, {}", v0, v1, v2); 43 | println!(" indices: {}, {}, {}", i0 + 1, i1 + 1, i2 + 1); 44 | panic!("go clean your geometry"); 45 | } 46 | } 47 | 48 | /// Returns the vertex index, and the texture coordinate index if there is one. 49 | fn parse_vertex_index(index: &str) -> (u32, Option) { 50 | let mut parts = index.split('/').map(|i| u32::from_str(i).unwrap()); 51 | let vidx = parts.next().expect("missing vertex index"); 52 | let tidx = parts.next(); 53 | // Indices in the obj file are 1-based, but Rust is 0-based. 54 | (vidx - 1, tidx.map(|i| i - 1)) 55 | } 56 | 57 | pub fn push_triangle(vertices: &[SVector3], 58 | triangles: &mut Vec, 59 | i0: (u32, Option), 60 | i1: (u32, Option), 61 | i2: (u32, Option), 62 | material: SMaterial, 63 | line_nr: u32) { 64 | assert_nondegenerate(&vertices, line_nr, i0.0, i1.0, i2.0); 65 | let vidxs = (i0.0, i1.0, i2.0); 66 | let tidxs = match (i0.1, i1.1, i2.1) { 67 | (Some(t0), Some(t1), Some(t2)) => Some((t0, t1, t2)), 68 | _ => None, 69 | }; 70 | let triangle = Triangle { 71 | vertices: vidxs, 72 | tex_coords: tidxs, 73 | material: material, 74 | }; 75 | triangles.push(triangle); 76 | } 77 | 78 | impl Mesh { 79 | pub fn load>(path: P) -> Mesh { 80 | Mesh::load_with_materials(path, &HashMap::new()) 81 | } 82 | 83 | pub fn load_with_materials>(path: P, 84 | materials: &HashMap<&str, SMaterial>) 85 | -> Mesh { 86 | let fbuffer = FileBuffer::open(path).expect("failed to open file"); 87 | let input = from_utf8(&fbuffer[..]).expect("obj must be valid utf-8"); 88 | 89 | let mut vertices = Vec::new(); 90 | let mut tex_coords = Vec::new(); 91 | let mut triangles = Vec::new(); 92 | let mut material = SMaterial::white(); // The default material. 93 | 94 | for (line, line_nr) in input.lines().zip(1u32..) { 95 | if line.is_empty() { 96 | continue; 97 | } 98 | 99 | let mut pieces = line.split_whitespace(); 100 | match pieces.next() { 101 | Some("v") => { 102 | let mut coords = pieces.map(|v| f32::from_str(v).unwrap()); 103 | let vertex = SVector3 { 104 | x: coords.next().expect("missing x coordinate"), 105 | y: coords.next().expect("missing y coordinate"), 106 | z: coords.next().expect("missing z coordinate"), 107 | }; 108 | vertices.push(vertex); 109 | } 110 | Some("vt") => { 111 | let mut coords = pieces.map(|v| f32::from_str(v).unwrap()); 112 | let u = coords.next().expect("missing u coordinate"); 113 | let v = coords.next().expect("missing v coordinate"); 114 | tex_coords.push((u, v)); 115 | } 116 | Some("usemtl") => { 117 | let material_name = pieces.next().expect("missing material name"); 118 | if let Some(&new_mat) = materials.get(material_name) { 119 | material = new_mat; 120 | } else { 121 | panic!("material '{}' not present in material dictionary", 122 | material_name); 123 | } 124 | } 125 | Some("f") => { 126 | // Indices stored are 1-based, convert to 0-based. 127 | let mut indices = pieces.map(parse_vertex_index); 128 | let i0 = indices.next().expect("missing triangle index"); 129 | let i1 = indices.next().expect("missing triangle index"); 130 | let mut i2 = indices.next().expect("missing triangle index"); 131 | 132 | push_triangle(&vertices, &mut triangles, i0, i1, i2, material, line_nr); 133 | 134 | // There might be a quad or n-gon. Assuming it is convex, we 135 | // can triangulate it at import time. 136 | while let Some(i3) = indices.next() { 137 | push_triangle(&vertices, &mut triangles, i0, i2, i3, material, line_nr); 138 | i2 = i3; 139 | } 140 | } 141 | _ => { 142 | // Anything else is not supported. 143 | } 144 | } 145 | } 146 | 147 | Mesh { 148 | vertices: vertices, 149 | triangles: triangles, 150 | tex_coords: tex_coords, 151 | } 152 | } 153 | } 154 | 155 | // The loader should be able to load all of these files without crashing. The 156 | // files are known to be well-formed and without degenerate faces. 157 | 158 | #[test] 159 | fn read_indoor() { 160 | let mut materials = HashMap::new(); 161 | materials.insert("wall", SMaterial::white()); 162 | materials.insert("glass", SMaterial::sky()); 163 | Mesh::load_with_materials("models/box_walls.obj", &materials); 164 | } 165 | 166 | #[test] 167 | fn read_stanford_bunny() { 168 | Mesh::load("models/stanford_bunny.obj"); 169 | } 170 | 171 | #[test] 172 | fn read_stanford_dragon() { 173 | Mesh::load("models/stanford_dragon.obj"); 174 | } 175 | 176 | #[test] 177 | fn read_suzanne() { 178 | Mesh::load("models/suzanne.obj"); 179 | } 180 | -------------------------------------------------------------------------------- /textures/floor.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruuda/convector/2f5f2428fa6c54002bd2ee8ce3d0f2188aab49f8/textures/floor.jpg -------------------------------------------------------------------------------- /textures/license.md: -------------------------------------------------------------------------------- 1 | The textures here have been taken from [textures.com](http://www.textures.com) 2 | and are bundled here with my default scene. They can be downloaded for free from 3 | textures.com but redistribution in the form of texture packs is not allowed. See 4 | [their terms of use](http://textures.com/terms_of_use.html) for details. 5 | -------------------------------------------------------------------------------- /textures/wood_light.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruuda/convector/2f5f2428fa6c54002bd2ee8ce3d0f2188aab49f8/textures/wood_light.jpg -------------------------------------------------------------------------------- /tools/approx_acos.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Convector -- An interactive CPU path tracer 4 | # Copyright 2016 Ruud van Asseldonk 5 | 6 | # This program is free software: you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License version 3. A copy 8 | # of the License is available in the root of the repository. 9 | 10 | # The goal is to approximate acos(x) with a rational function f such that the 11 | # worst absolute error is minimal. That is, pick the function that performs best 12 | # in the worst case. Furthermore, I impose the following restrictions: 13 | # 14 | # * f(0) = pi/2. This implies that the constant term is pi/2. 15 | # * f(1) = 0 and f(-1) = pi. This implies that (a + b) / (1 + c + d) = -pi/2. 16 | 17 | from mpmath import mp, fabs, acos 18 | from scipy.optimize import minimize 19 | 20 | mp.prec = 64 21 | 22 | def d(a, b, c): 23 | return -1 - 2 * (a + b) / mp.pi - c 24 | 25 | def f(x, a, b, c): 26 | return mp.pi/2 + (a * x + b * x**3) / (1 + c * x**2 + d(a, b, c) * x**4) 27 | 28 | def error(coefs, progress=True): 29 | (a, b, c) = coefs 30 | xs = (x / mp.mpf(4096) for x in range(-4096, 4097)) 31 | err = max(fabs(acos(x) - f(x, a, b, c)) for x in xs) 32 | if progress: 33 | print('(a, b, c, d): ({}, {}, {}, {})'.format(a, b, c, d(a, b, c))) 34 | print('evaluated error: ', err) 35 | print() 36 | return float(err) 37 | 38 | initial_guess = (-0.9823, 0.9421, -1.1851) 39 | coefs = minimize(error, initial_guess).x 40 | print('a:', coefs[0]) 41 | print('b:', coefs[1]) 42 | print('c:', coefs[2]) 43 | print('d:', d(*coefs)) 44 | print('max error:', error(coefs, progress=False)) 45 | 46 | # Output: 47 | # 48 | # a: -0.939115566365855, 49 | # b: 0.9217841528914573, 50 | # c: -1.2845906244690837, 51 | # d: 0.295624144969963174 52 | # max error: 0.0167244179117447796 53 | -------------------------------------------------------------------------------- /tools/approx_cos.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Convector -- An interactive CPU path tracer 4 | # Copyright 2016 Ruud van Asseldonk 5 | 6 | # This program is free software: you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License version 3. A copy 8 | # of the License is available in the root of the repository. 9 | 10 | # The goal is to approximate cos(x) with a polynomial f on the domain (-pi, pi), 11 | # such that the worst absolute error is minimal. That is, pick the function that 12 | # performs best in the worst case. Furthermore, I impose the following 13 | # restrictions: 14 | # 15 | # * f(0) = 1. This implies that the constant term is 1. 16 | # * f(pi) = -1 and f(-pi) = -1. This implies that 17 | # c = -(2 + a*pi^2 + b*pi^4) / pi^6. 18 | 19 | from mpmath import mp, cos, fabs 20 | from scipy.optimize import minimize 21 | 22 | mp.prec = 64 23 | 24 | def c(a, b): 25 | return - (2.0 + a * mp.pi**2 + b * mp.pi**4) / mp.pi**6 26 | 27 | def f(x, a, b): 28 | return 1.0 + a * x**2 + b * x**4 + c(a, b) * x**6 29 | 30 | def error(coefs, progress=True): 31 | (a, b) = coefs 32 | xs = (x * mp.pi / mp.mpf(4096) for x in range(-4096, 4097)) 33 | err = max(fabs(cos(x) - f(x, a, b)) for x in xs) 34 | if progress: 35 | print('(a, b, c): ({}, {}, {})'.format(a, b, c(a, b))) 36 | print('evaluated error: ', err) 37 | print() 38 | return float(err) 39 | 40 | initial_guess = (-0.4960, 0.03926) 41 | coefs = minimize(error, initial_guess).x 42 | print('a:', coefs[0]) 43 | print('b:', coefs[1]) 44 | print('c:', c(*coefs)) 45 | print('max error:', error(coefs, progress=False)) 46 | 47 | # Output: 48 | # 49 | # a: -0.496000299455 50 | # b: 0.0392596924214 51 | # c: -0.000966231179636657107 52 | # max error: 0.0020164493561441203 53 | -------------------------------------------------------------------------------- /tools/approx_sin.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Convector -- An interactive CPU path tracer 4 | # Copyright 2016 Ruud van Asseldonk 5 | 6 | # This program is free software: you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License version 3. A copy 8 | # of the License is available in the root of the repository. 9 | 10 | # The goal is to approximate sin(x) with a polynomial f on the domain (-pi, pi), 11 | # such that the worst absolute error is minimal. That is, pick the function that 12 | # performs best in the worst case. Furthermore, I impose the following 13 | # restrictions: 14 | # 15 | # * f(0) = 0. This implies that the constant term is 0. 16 | # * f(pi) = 0 and f(-pi) = 0. This implies that c = -(a*pi + b*pi^3) / pi^5. 17 | 18 | from mpmath import mp, fabs, sin 19 | from scipy.optimize import minimize 20 | 21 | mp.prec = 64 22 | 23 | def c(a, b): 24 | return - (a * mp.pi + b * mp.pi**3) / mp.pi**5 25 | 26 | def f(x, a, b): 27 | return a * x + b * x**3 + c(a, b) * x**5 28 | 29 | def error(coefs, progress=True): 30 | (a, b) = coefs 31 | xs = (x * mp.pi / mp.mpf(4096) for x in range(-4096, 4097)) 32 | err = max(fabs(sin(x) - f(x, a, b)) for x in xs) 33 | if progress: 34 | print('(a, b, c): ({}, {}, {})'.format(a, b, c(a, b))) 35 | print('evaluated error: ', err) 36 | print() 37 | return float(err) 38 | 39 | initial_guess = (0.9820, -0.1522) 40 | coefs = minimize(error, initial_guess).x 41 | print('a:', coefs[0]) 42 | print('b:', coefs[1]) 43 | print('c:', c(*coefs)) 44 | print('max error:', error(coefs, progress=False)) 45 | 46 | # Output: 47 | # 48 | # a: 0.982012145975 49 | # b: -0.152178468117 50 | # c: 0.00533758325004438232 51 | # max error: 0.008109495819698682 52 | --------------------------------------------------------------------------------