├── .appveyor.yml
├── .cargo
    └── config
├── .gitignore
├── Cargo.lock
├── Cargo.toml
├── license
├── models
    ├── box_walls.obj
    ├── box_windows.obj
    ├── indoor.obj
    ├── plane.obj
    ├── stanford_bunny.obj
    ├── stanford_dragon.obj
    ├── suzanne.obj
    └── suzannes_in_box.obj
├── readme.md
├── rust-toolchain
├── screenshots
    ├── accumulative.png
    └── interactive.png
├── src
    ├── aabb.rs
    ├── bench.rs
    ├── bvh.rs
    ├── gpu
    │   ├── blend.glsl
    │   ├── gbuffer.glsl
    │   ├── id.glsl
    │   ├── median.glsl
    │   └── vertex.glsl
    ├── main.rs
    ├── material.rs
    ├── quaternion.rs
    ├── random.rs
    ├── ray.rs
    ├── renderer.rs
    ├── scene.rs
    ├── simd.rs
    ├── stats.rs
    ├── trace.rs
    ├── triangle.rs
    ├── ui.rs
    ├── util.rs
    ├── vector3.rs
    └── wavefront.rs
├── textures
    ├── floor.jpg
    ├── license.md
    └── wood_light.jpg
└── tools
    ├── approx_acos.py
    ├── approx_cos.py
    └── approx_sin.py


/.appveyor.yml:
--------------------------------------------------------------------------------
 1 | environment:
 2 |   matrix:
 3 |     - target: nightly-x86_64-pc-windows-msvc
 4 | 
 5 | install:
 6 |   # Download the Rust and Cargo installer.
 7 |   - ps: Start-FileDownload "https://static.rust-lang.org/dist/rust-${env:target}.msi"
 8 | 
 9 |   # Install Rust and Cargo and wait for installation to finish by using Write-Output.
10 |   - ps: msiexec /package "rust-${env:target}.msi" /quiet /norestart | Write-Output
11 | 
12 |   # Pick up the new Path variable after the installer modified it.
13 |   - ps: $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine")
14 | 
15 |   # Print versions for future reference.
16 |   - rustc --version
17 |   - cargo --version
18 | 
19 | build_script:
20 |   - cargo build
21 | 
22 | test_script:
23 |   - cargo test
24 | 


--------------------------------------------------------------------------------
/.cargo/config:
--------------------------------------------------------------------------------
1 | [build]
2 | rustflags = ["-C", "target-feature=+avx,+fma"]
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Cargo files
 2 | /target
 3 | 
 4 | # Editor files
 5 | *.swp
 6 | *.swo
 7 | 
 8 | # Written trace output
 9 | /trace.json
10 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name    = "convector"
 3 | version = "0.2.0"
 4 | authors = ["Ruud van Asseldonk <dev@veniogames.com>"]
 5 | 
 6 | [dependencies]
 7 | filebuffer        = "0.1"
 8 | glium             = "0.16"
 9 | imagefmt          = "4.0"
10 | num_cpus          = "1.0"
11 | rand              = "0.3"
12 | rayon             = "0.6"
13 | scoped_threadpool = "0.1"
14 | thread-id         = "3.0"
15 | time              = "0.1"
16 | 


--------------------------------------------------------------------------------
/models/box_walls.obj:
--------------------------------------------------------------------------------
  1 | # A box with a few holes in it, like an interior scene.
  2 | 
  3 | v 8.000000 0.000000 -8.000000
  4 | v 8.000000 0.000000 8.000000
  5 | v -8.000001 0.000000 7.999998
  6 | v -7.999997 0.000000 -8.000003
  7 | v 8.000004 7.000000 -7.999996
  8 | v 7.999995 7.000000 8.000005
  9 | v -8.000003 7.000000 7.999997
 10 | v -7.999999 7.000000 -8.000000
 11 | v 8.000000 0.000000 -4.000000
 12 | v 8.000000 0.000000 0.000000
 13 | v 8.000000 0.000000 4.000000
 14 | v 4.000001 0.000000 -8.000001
 15 | v 0.000001 0.000000 -8.000002
 16 | v -3.999998 0.000000 -8.000002
 17 | v 8.000001 1.750000 -7.999999
 18 | v 8.000002 3.500000 -7.999998
 19 | v 8.000003 5.250000 -7.999997
 20 | v 4.000000 0.000000 8.000000
 21 | v -0.000000 0.000000 7.999999
 22 | v -4.000000 0.000000 7.999999
 23 | v 7.999999 1.750000 8.000001
 24 | v 7.999998 3.500000 8.000002
 25 | v 7.999996 5.250000 8.000004
 26 | v -8.000000 0.000000 3.999998
 27 | v -7.999999 0.000000 -0.000002
 28 | v -7.999998 0.000000 -4.000002
 29 | v -8.000002 1.750000 7.999998
 30 | v -8.000002 3.500000 7.999998
 31 | v -8.000002 5.250000 7.999997
 32 | v -7.999997 1.750000 -8.000002
 33 | v -7.999998 3.500000 -8.000001
 34 | v -7.999998 5.250000 -8.000000
 35 | v 8.000002 7.000000 -3.999996
 36 | v 8.000000 7.000000 0.000004
 37 | v 7.999997 7.000000 4.000004
 38 | v 4.000003 7.000000 -7.999997
 39 | v 0.000002 7.000000 -7.999998
 40 | v -3.999999 7.000000 -7.999999
 41 | v 3.999996 7.000000 8.000003
 42 | v -0.000004 7.000000 8.000001
 43 | v -4.000003 7.000000 7.999999
 44 | v -8.000002 7.000000 3.999998
 45 | v -8.000001 7.000000 -0.000002
 46 | v -8.000000 7.000000 -4.000001
 47 | v 4.000003 5.250000 -7.999998
 48 | v 0.000002 5.250000 -7.999999
 49 | v -3.999998 5.250000 -7.999999
 50 | v 4.000002 3.500000 -7.999999
 51 | v 0.000002 3.500000 -8.000000
 52 | v -3.999998 3.500000 -8.000000
 53 | v 4.000001 1.750000 -8.000000
 54 | v 0.000002 1.750000 -8.000001
 55 | v -3.999998 1.750000 -8.000002
 56 | v -8.000001 1.750000 3.999998
 57 | v -8.000000 1.750000 -0.000002
 58 | v -7.999998 1.750000 -4.000002
 59 | v -8.000001 3.500000 3.999998
 60 | v -8.000000 3.500000 -0.000002
 61 | v -7.999999 3.500000 -4.000001
 62 | v -8.000001 5.250000 3.999998
 63 | v -8.000000 5.250000 -0.000002
 64 | v -7.999999 5.250000 -4.000001
 65 | v 3.999999 1.750000 8.000000
 66 | v -0.000001 1.750000 8.000000
 67 | v -4.000001 1.750000 7.999999
 68 | v 3.999998 3.500000 8.000001
 69 | v -0.000002 3.500000 8.000000
 70 | v -4.000002 3.500000 7.999999
 71 | v 3.999997 5.250000 8.000002
 72 | v -0.000003 5.250000 8.000000
 73 | v -4.000002 5.250000 7.999999
 74 | v 8.000000 1.750000 -3.999999
 75 | v 8.000000 1.750000 0.000001
 76 | v 7.999999 1.750000 4.000001
 77 | v 8.000001 3.500000 -3.999998
 78 | v 8.000000 3.500000 0.000002
 79 | v 7.999999 3.500000 4.000002
 80 | v 8.000001 5.250000 -3.999997
 81 | v 8.000000 5.250000 0.000003
 82 | v 7.999998 5.250000 4.000003
 83 | v 4.000001 7.000000 -3.999997
 84 | v 3.999999 7.000000 0.000003
 85 | v 3.999998 7.000000 4.000003
 86 | v 0.000000 7.000000 -3.999998
 87 | v -0.000001 7.000000 0.000001
 88 | v -0.000002 7.000000 4.000001
 89 | v -4.000000 7.000000 -4.000000
 90 | v -4.000001 7.000000 0.000000
 91 | v -4.000002 7.000000 4.000000
 92 | v 4.000000 0.000000 -4.000000
 93 | v 0.000001 0.000000 -4.000001
 94 | v -3.999999 0.000000 -4.000002
 95 | v 4.000000 0.000000 -0.000001
 96 | v 0.000000 0.000000 -0.000001
 97 | v -3.999999 0.000000 -0.000002
 98 | v 4.000000 0.000000 4.000000
 99 | v 0.000000 0.000000 3.999999
100 | v -4.000000 0.000000 3.999999
101 | v 6.000000 0.000000 -8.000000
102 | v 6.000000 0.000000 8.000000
103 | v 6.000003 7.000000 -7.999997
104 | v 5.999995 7.000000 8.000004
105 | v 6.000003 5.250000 -7.999998
106 | v 6.000002 3.500000 -7.999999
107 | v 6.000001 1.750000 -8.000000
108 | v 5.999998 3.500000 8.000002
109 | v 6.000000 0.000000 -4.000000
110 | v 6.000000 0.000000 -0.000000
111 | v 6.000000 0.000000 4.000000
112 | f 98 24 3 20
113 | f 89 41 7 42
114 | f 76 77 80 35 34 79
115 | f 20 3 27 28 68
116 | f 100 18 63 66 106 22 21 2
117 | f 38 8 32 31 50 47
118 | f 26 4 30 31 59 56
119 | f 37 38 47 50 49
120 | f 49 50 53 14 13 52
121 | f 36 37 46 49 48 45
122 | f 30 4 14 53 50 31
123 | f 104 105 15 16 17 103
124 | f 25 26 56 59 58 55
125 | f 28 57 60 42 7 29
126 | f 3 24 54 57 28 27
127 | f 24 25 55 58 57 54
128 | f 58 59 62 44 43 61
129 | f 32 8 44 62 59 31
130 | f 60 61 43 42
131 | f 105 51 12 99 1 15
132 | f 18 19 64 67 66 63
133 | f 66 67 70 40 39 69
134 | f 67 68 71 41 40 70
135 | f 19 20 65 68 67 64
136 | f 29 7 41 71 68 28
137 | f 1 9 72 75 16
138 | f 23 6 35 80 77 22
139 | f 9 10 73 76 75 72
140 | f 10 11 74 77 76 73
141 | f 11 2 21 22 77 74
142 | f 48 49 52 13 12 51
143 | f 16 75 78 33 5
144 | f 75 76 79 34 33 78
145 | f 99 12 90 107 9 1
146 | f 83 39 40 86
147 | f 86 40 41 89
148 | f 35 6 102 39 83
149 | f 36 81 84 37
150 | f 81 82 85 84
151 | f 82 83 86 85
152 | f 37 84 87 38
153 | f 84 85 88 87
154 | f 85 86 89 88
155 | f 38 87 44 8
156 | f 87 88 43 44
157 | f 88 89 42 43
158 | f 14 4 26 92
159 | f 92 26 25 95
160 | f 95 25 24 98
161 | f 107 90 93 108 10 9
162 | f 12 13 91 90
163 | f 13 14 92 91
164 | f 108 93 96 109 11 10
165 | f 90 91 94 93
166 | f 91 92 95 94
167 | f 109 96 18 100 2 11
168 | f 93 94 97 96
169 | f 94 95 98 97
170 | f 96 97 19 18
171 | f 97 98 20 19
172 | f 35 83 82 34
173 | f 101 36 45 103 17 5
174 | f 34 82 81 33
175 | f 33 81 36 101 5
176 | f 106 66 69 39 102 6 23 22
177 | 


--------------------------------------------------------------------------------
/models/box_windows.obj:
--------------------------------------------------------------------------------
  1 | # A box with a few holes in it, like an interior scene.
  2 | # The holes are filled with glass windows.
  3 | 
  4 | v 8.000000 0.000000 -8.000000
  5 | v 8.000000 0.000000 8.000000
  6 | v -8.000001 0.000000 7.999998
  7 | v -7.999997 0.000000 -8.000003
  8 | v 8.000004 7.000000 -7.999996
  9 | v 7.999995 7.000000 8.000005
 10 | v -8.000003 7.000000 7.999997
 11 | v -7.999999 7.000000 -8.000000
 12 | v 8.000000 0.000000 -4.000000
 13 | v 8.000000 0.000000 0.000000
 14 | v 8.000000 0.000000 4.000000
 15 | v 4.000001 0.000000 -8.000001
 16 | v 0.000001 0.000000 -8.000002
 17 | v -3.999998 0.000000 -8.000002
 18 | v 8.000001 1.750000 -7.999999
 19 | v 8.000002 3.500000 -7.999998
 20 | v 8.000003 5.250000 -7.999997
 21 | v 4.000000 0.000000 8.000000
 22 | v -0.000000 0.000000 7.999999
 23 | v -4.000000 0.000000 7.999999
 24 | v 7.999999 1.750000 8.000001
 25 | v 7.999998 3.500000 8.000002
 26 | v 7.999996 5.250000 8.000004
 27 | v -8.000000 0.000000 3.999998
 28 | v -7.999999 0.000000 -0.000002
 29 | v -7.999998 0.000000 -4.000002
 30 | v -8.000002 1.750000 7.999998
 31 | v -8.000002 3.500000 7.999998
 32 | v -8.000002 5.250000 7.999997
 33 | v -7.999997 1.750000 -8.000002
 34 | v -7.999998 3.500000 -8.000001
 35 | v -7.999998 5.250000 -8.000000
 36 | v 8.000002 7.000000 -3.999996
 37 | v 8.000000 7.000000 0.000004
 38 | v 7.999997 7.000000 4.000004
 39 | v 4.000003 7.000000 -7.999997
 40 | v 0.000002 7.000000 -7.999998
 41 | v -3.999999 7.000000 -7.999999
 42 | v 3.999996 7.000000 8.000003
 43 | v -0.000004 7.000000 8.000001
 44 | v -4.000003 7.000000 7.999999
 45 | v -8.000002 7.000000 3.999998
 46 | v -8.000001 7.000000 -0.000002
 47 | v -8.000000 7.000000 -4.000001
 48 | v 4.000003 5.250000 -7.999998
 49 | v 0.000002 5.250000 -7.999999
 50 | v -3.999998 5.250000 -7.999999
 51 | v 4.000002 3.500000 -7.999999
 52 | v 0.000002 3.500000 -8.000000
 53 | v -3.999998 3.500000 -8.000000
 54 | v 4.000001 1.750000 -8.000000
 55 | v 0.000002 1.750000 -8.000001
 56 | v -3.999998 1.750000 -8.000002
 57 | v -8.000001 1.750000 3.999998
 58 | v -8.000000 1.750000 -0.000002
 59 | v -7.999998 1.750000 -4.000002
 60 | v -8.000001 3.500000 3.999998
 61 | v -8.000000 3.500000 -0.000002
 62 | v -7.999999 3.500000 -4.000001
 63 | v -8.000001 5.250000 3.999998
 64 | v -8.000000 5.250000 -0.000002
 65 | v -7.999999 5.250000 -4.000001
 66 | v 3.999999 1.750000 8.000000
 67 | v -0.000001 1.750000 8.000000
 68 | v -4.000001 1.750000 7.999999
 69 | v 3.999998 3.500000 8.000001
 70 | v -0.000002 3.500000 8.000000
 71 | v -4.000002 3.500000 7.999999
 72 | v 3.999997 5.250000 8.000002
 73 | v -0.000003 5.250000 8.000000
 74 | v -4.000002 5.250000 7.999999
 75 | v 8.000000 1.750000 -3.999999
 76 | v 8.000000 1.750000 0.000001
 77 | v 7.999999 1.750000 4.000001
 78 | v 8.000001 3.500000 -3.999998
 79 | v 8.000000 3.500000 0.000002
 80 | v 7.999999 3.500000 4.000002
 81 | v 8.000001 5.250000 -3.999997
 82 | v 8.000000 5.250000 0.000003
 83 | v 7.999998 5.250000 4.000003
 84 | v 4.000001 7.000000 -3.999997
 85 | v 3.999999 7.000000 0.000003
 86 | v 3.999998 7.000000 4.000003
 87 | v 0.000000 7.000000 -3.999998
 88 | v -0.000001 7.000000 0.000001
 89 | v -0.000002 7.000000 4.000001
 90 | v -4.000000 7.000000 -4.000000
 91 | v -4.000001 7.000000 0.000000
 92 | v -4.000002 7.000000 4.000000
 93 | v 4.000000 0.000000 -4.000000
 94 | v 0.000001 0.000000 -4.000001
 95 | v -3.999999 0.000000 -4.000002
 96 | v 4.000000 0.000000 -0.000001
 97 | v 0.000000 0.000000 -0.000001
 98 | v -3.999999 0.000000 -0.000002
 99 | v 4.000000 0.000000 4.000000
100 | v 0.000000 0.000000 3.999999
101 | v -4.000000 0.000000 3.999999
102 | v 6.000000 0.000000 -8.000000
103 | v 6.000000 0.000000 8.000000
104 | v 6.000003 7.000000 -7.999997
105 | v 5.999995 7.000000 8.000004
106 | v 6.000003 5.250000 -7.999998
107 | v 6.000002 3.500000 -7.999999
108 | v 6.000001 1.750000 -8.000000
109 | v 5.999998 3.500000 8.000002
110 | v 6.000000 0.000000 -4.000000
111 | v 6.000000 0.000000 -0.000000
112 | v 6.000000 0.000000 4.000000
113 | 
114 | # Window vertices
115 | v 4.000003 5.250000 -7.999998
116 | v 4.000002 3.500000 -7.999999
117 | v 4.000001 1.750000 -8.000000
118 | v -8.000001 3.500000 3.999998
119 | v -8.000000 3.500000 -0.000002
120 | v -8.000001 5.250000 3.999998
121 | v -8.000000 5.250000 -0.000002
122 | v 6.000003 5.250000 -7.999998
123 | v 6.000002 3.500000 -7.999999
124 | 
125 | usemtl wall
126 | f 98 24 3 20
127 | f 89 41 7 42
128 | f 76 77 80 35 34 79
129 | f 20 3 27 28 68
130 | f 100 18 63 66 106 22 21 2
131 | f 38 8 32 31 50 47
132 | f 26 4 30 31 59 56
133 | f 37 38 47 50 49
134 | f 49 50 53 14 13 52
135 | f 36 37 46 49 48 45
136 | f 30 4 14 53 50 31
137 | f 104 105 15 16 17 103
138 | f 25 26 56 59 58 55
139 | f 28 57 60 42 7 29
140 | f 3 24 54 57 28 27
141 | f 24 25 55 58 57 54
142 | f 58 59 62 44 43 61
143 | f 32 8 44 62 59 31
144 | f 60 61 43 42
145 | f 105 51 12 99 1 15
146 | f 18 19 64 67 66 63
147 | f 66 67 70 40 39 69
148 | f 67 68 71 41 40 70
149 | f 19 20 65 68 67 64
150 | f 29 7 41 71 68 28
151 | f 1 9 72 75 16
152 | f 23 6 35 80 77 22
153 | f 9 10 73 76 75 72
154 | f 10 11 74 77 76 73
155 | f 11 2 21 22 77 74
156 | f 48 49 52 13 12 51
157 | f 16 75 78 33 5
158 | f 75 76 79 34 33 78
159 | f 99 12 90 107 9 1
160 | f 83 39 40 86
161 | f 86 40 41 89
162 | f 35 6 102 39 83
163 | f 36 81 84 37
164 | f 81 82 85 84
165 | f 82 83 86 85
166 | f 37 84 87 38
167 | f 84 85 88 87
168 | f 85 86 89 88
169 | f 38 87 44 8
170 | f 87 88 43 44
171 | f 88 89 42 43
172 | f 14 4 26 92
173 | f 92 26 25 95
174 | f 95 25 24 98
175 | f 107 90 93 108 10 9
176 | f 12 13 91 90
177 | f 13 14 92 91
178 | f 108 93 96 109 11 10
179 | f 90 91 94 93
180 | f 91 92 95 94
181 | f 109 96 18 100 2 11
182 | f 93 94 97 96
183 | f 94 95 98 97
184 | f 96 97 19 18
185 | f 97 98 20 19
186 | f 35 83 82 34
187 | f 101 36 45 103 17 5
188 | f 34 82 81 33
189 | f 33 81 36 101 5
190 | f 106 66 69 39 102 6 23 22
191 | 
192 | usemtl glass
193 | f 117 110 111 112 118
194 | f 113 114 116 115
195 | 


--------------------------------------------------------------------------------
/models/indoor.obj:
--------------------------------------------------------------------------------
  1 | # An indoor scene, modelled manually so the geometry is optimized for my path
  2 | # tracer. For instance, many objects do not have back sides because they would
  3 | # not be visible anyway. The number of primitives has been kept low: 142
  4 | # vertices, 100 triangles. That is less than the low-poly Suzanne mesh.
  5 | 
  6 | v 4.000000 0.100000 4.015000
  7 | v -4.000000 0.100000 4.015000
  8 | v 4.000000 -0.100000 3.985000
  9 | v 4.000000 0.100000 3.985000
 10 | v -4.000000 -0.100000 3.985000
 11 | v -4.000000 0.100000 3.985000
 12 | usemtl baseboard
 13 | f 4 5 6
 14 | f 6 1 4
 15 | f 4 3 5
 16 | f 6 2 1
 17 | v -4.000000 -0.200000 4.000000
 18 | v 4.000000 -0.200000 4.000000
 19 | v -4.000000 3.300000 4.000000
 20 | v 4.000000 3.300000 4.000000
 21 | usemtl wall
 22 | f 9 8 7
 23 | f 9 10 8
 24 | v -3.166173 0.494651 1.363312
 25 | v -3.327058 0.973895 1.430704
 26 | v -3.229106 0.097355 1.389674
 27 | v -3.500403 0.905491 1.503315
 28 | v -3.513893 0.494651 0.533197
 29 | v -3.674779 0.973895 0.600590
 30 | v -3.576827 0.097355 0.559559
 31 | v -3.848124 0.905491 0.673201
 32 | v -3.338880 0.979871 1.435656
 33 | v -3.494891 0.918308 1.501006
 34 | v -3.842611 0.918308 0.670892
 35 | v -3.686601 0.979871 0.605542
 36 | v -3.843320 0.908911 0.660347
 37 | v -3.364755 0.501423 1.446495
 38 | v -3.687310 0.970474 0.594997
 39 | v -3.526424 0.491231 0.527604
 40 | v -3.170976 0.491231 1.376166
 41 | v -3.331862 0.970474 1.443558
 42 | v -3.487873 0.908911 1.508909
 43 | v -3.707672 0.504843 0.603526
 44 | v -3.712475 0.501423 0.616380
 45 | v -3.352224 0.504843 1.452088
 46 | usemtl fauteuil
 47 | f 31 13 24
 48 | f 16 11 15
 49 | f 14 29 20
 50 | f 22 20 19
 51 | f 12 22 19
 52 | f 14 21 18
 53 | f 23 18 21
 54 | f 25 30 23
 55 | f 22 16 25
 56 | f 31 23 30
 57 | f 16 26 25
 58 | f 28 32 27
 59 | f 11 28 27
 60 | f 21 25 23
 61 | f 20 28 19
 62 | f 19 28 12
 63 | f 14 32 29
 64 | f 24 14 31
 65 | f 31 17 13
 66 | f 14 18 31
 67 | f 16 12 11
 68 | f 22 21 20
 69 | f 12 16 22
 70 | f 14 20 21
 71 | f 25 26 30
 72 | f 31 18 23
 73 | f 16 15 26
 74 | f 28 29 32
 75 | f 11 12 28
 76 | f 21 22 25
 77 | f 20 29 28
 78 | f 14 24 32
 79 | v -2.883356 -0.020000 0.269076
 80 | v -2.892579 0.740000 0.272940
 81 | v -3.814929 0.740000 0.659296
 82 | v -2.960627 -0.020000 0.084607
 83 | v -2.969851 0.740000 0.088470
 84 | v -3.892200 0.740000 0.474826
 85 | v -3.896064 -0.020000 0.465603
 86 | v -2.973714 -0.020000 0.079247
 87 | v -2.888716 -0.020000 0.282164
 88 | v -3.811065 -0.020000 0.668520
 89 | v -2.883356 0.730000 0.269076
 90 | v -2.960627 0.730000 0.084607
 91 | v -3.896064 0.730000 0.465603
 92 | v -2.973714 0.730000 0.079247
 93 | v -2.888716 0.730000 0.282164
 94 | v -3.811065 0.730000 0.668520
 95 | usemtl fauteuil
 96 | f 36 46 44
 97 | f 43 36 44
 98 | f 38 34 37
 99 | f 46 39 45
100 | f 46 37 44
101 | f 48 41 47
102 | f 34 47 43
103 | f 43 41 33
104 | f 37 43 44
105 | f 37 45 38
106 | f 35 47 34
107 | f 42 45 39
108 | f 38 48 35
109 | f 36 40 46
110 | f 43 33 36
111 | f 38 35 34
112 | f 46 40 39
113 | f 48 42 41
114 | f 43 47 41
115 | f 37 34 43
116 | f 37 46 45
117 | f 35 48 47
118 | f 42 48 45
119 | f 38 45 48
120 | v -2.458364 -0.020000 1.283661
121 | v -2.467588 0.740000 1.287525
122 | v -3.389937 0.740000 1.673881
123 | v -2.535635 -0.020000 1.099191
124 | v -2.544859 0.740000 1.103055
125 | v -3.467209 0.740000 1.489411
126 | v -3.471072 -0.020000 1.480187
127 | v -2.548723 -0.020000 1.093831
128 | v -2.463724 -0.020000 1.296748
129 | v -3.386074 -0.020000 1.683104
130 | v -2.458364 0.730000 1.283661
131 | v -2.535635 0.730000 1.099191
132 | v -3.471072 0.730000 1.480187
133 | v -2.548723 0.730000 1.093831
134 | v -2.463724 0.730000 1.296748
135 | v -3.386074 0.730000 1.683104
136 | usemtl fauteuil
137 | f 52 62 60
138 | f 59 52 60
139 | f 54 50 53
140 | f 62 55 61
141 | f 62 53 60
142 | f 64 57 63
143 | f 50 63 59
144 | f 59 57 49
145 | f 53 59 60
146 | f 53 61 54
147 | f 50 64 63
148 | f 58 61 55
149 | f 51 61 64
150 | f 52 56 62
151 | f 59 49 52
152 | f 54 51 50
153 | f 62 56 55
154 | f 64 58 57
155 | f 59 63 57
156 | f 53 50 59
157 | f 53 62 61
158 | f 50 51 64
159 | f 58 64 61
160 | f 51 54 61
161 | v -2.563306 0.100000 1.110782
162 | v -2.544859 0.500000 1.103055
163 | v -3.227398 0.100000 1.388958
164 | v -2.911026 0.100000 0.280667
165 | v -2.892579 0.500000 0.272940
166 | v -3.575118 0.100000 0.558843
167 | v -2.544859 0.380000 1.103055
168 | v -2.892579 0.380000 0.272940
169 | v -2.911026 0.376955 0.280667
170 | v -2.563306 0.376955 1.110782
171 | v -2.554082 0.510000 1.106918
172 | v -3.181280 0.510000 1.369640
173 | v -2.901803 0.510000 0.276804
174 | v -3.529001 0.510000 0.539526
175 | usemtl fauteuil
176 | f 74 68 73
177 | f 67 68 65
178 | f 69 75 66
179 | f 66 72 69
180 | f 72 74 73
181 | f 78 75 77
182 | f 74 65 68
183 | f 67 70 68
184 | f 69 77 75
185 | f 66 71 72
186 | f 72 71 74
187 | f 78 76 75
188 | v -3.384160 0.494651 -0.905824
189 | v -3.523827 0.973895 -1.010318
190 | v -3.438793 0.097355 -0.946699
191 | v -3.674310 0.905491 -1.122905
192 | v -2.845004 0.494651 -1.626455
193 | v -2.984670 0.973895 -1.730950
194 | v -2.899637 0.097355 -1.667330
195 | v -3.135153 0.905491 -1.843537
196 | v -3.534090 0.979871 -1.017997
197 | v -3.669524 0.918308 -1.119325
198 | v -3.130367 0.918308 -1.839957
199 | v -2.994933 0.979871 -1.738628
200 | v -3.121638 0.908911 -1.845915
201 | v -3.556551 0.501423 -1.034802
202 | v -2.986204 0.970474 -1.744586
203 | v -2.846537 0.491231 -1.640092
204 | v -3.397675 0.491231 -0.903446
205 | v -3.537342 0.970474 -1.007941
206 | v -3.672776 0.908911 -1.109269
207 | v -3.003880 0.504843 -1.757811
208 | v -3.017395 0.501423 -1.755434
209 | v -3.555018 0.504843 -1.021166
210 | usemtl fauteuil
211 | f 99 81 92
212 | f 80 83 84
213 | f 82 97 88
214 | f 89 87 90
215 | f 84 87 80
216 | f 86 88 89
217 | f 91 86 89
218 | f 93 98 91
219 | f 90 84 93
220 | f 86 98 99
221 | f 83 93 84
222 | f 96 100 95
223 | f 79 96 95
224 | f 89 93 91
225 | f 88 96 87
226 | f 87 96 80
227 | f 82 100 97
228 | f 92 82 99
229 | f 99 85 81
230 | f 82 86 99
231 | f 80 79 83
232 | f 89 88 87
233 | f 84 90 87
234 | f 86 82 88
235 | f 93 94 98
236 | f 86 91 98
237 | f 83 94 93
238 | f 96 97 100
239 | f 79 80 96
240 | f 89 90 93
241 | f 88 97 96
242 | f 82 92 100
243 | v -2.297627 -0.020000 -1.216924
244 | v -2.305634 0.740000 -1.222914
245 | v -3.106336 0.740000 -1.821977
246 | v -2.177815 -0.020000 -1.377064
247 | v -2.185822 0.740000 -1.383055
248 | v -2.986524 0.740000 -1.982118
249 | v -2.980533 -0.020000 -1.990125
250 | v -2.179831 -0.020000 -1.391062
251 | v -2.311625 -0.020000 -1.214907
252 | v -3.112327 -0.020000 -1.813970
253 | v -2.297627 0.730000 -1.216924
254 | v -2.177815 0.730000 -1.377064
255 | v -2.980533 0.730000 -1.990125
256 | v -2.179831 0.730000 -1.391062
257 | v -2.311625 0.730000 -1.214907
258 | v -3.112327 0.730000 -1.813970
259 | usemtl fauteuil
260 | f 104 114 112
261 | f 111 104 112
262 | f 106 102 105
263 | f 114 107 113
264 | f 114 105 112
265 | f 116 109 115
266 | f 102 115 111
267 | f 111 109 101
268 | f 102 112 105
269 | f 105 113 106
270 | f 102 116 115
271 | f 110 113 107
272 | f 106 116 103
273 | f 104 108 114
274 | f 111 101 104
275 | f 106 103 102
276 | f 114 108 107
277 | f 116 110 109
278 | f 111 115 109
279 | f 102 111 112
280 | f 105 114 113
281 | f 102 103 116
282 | f 110 116 113
283 | f 106 113 116
284 | v -2.956596 -0.020000 -0.336152
285 | v -2.964603 0.740000 -0.342142
286 | v -3.765305 0.740000 -0.941205
287 | v -2.836784 -0.020000 -0.496292
288 | v -2.844791 0.740000 -0.502283
289 | v -3.645493 0.740000 -1.101346
290 | v -3.639502 -0.020000 -1.109353
291 | v -2.838800 -0.020000 -0.510290
292 | v -2.970594 -0.020000 -0.334135
293 | v -3.771296 -0.020000 -0.933198
294 | v -2.956596 0.730000 -0.336152
295 | v -2.836784 0.730000 -0.496292
296 | v -3.639502 0.730000 -1.109353
297 | v -2.838800 0.730000 -0.510290
298 | v -2.970594 0.730000 -0.334135
299 | v -3.771296 0.730000 -0.933198
300 | usemtl fauteuil
301 | f 120 130 128
302 | f 127 120 128
303 | f 121 119 118
304 | f 130 123 129
305 | f 130 121 128
306 | f 132 125 131
307 | f 118 131 127
308 | f 127 125 117
309 | f 121 127 128
310 | f 121 129 122
311 | f 119 131 118
312 | f 126 129 123
313 | f 122 132 119
314 | f 120 124 130
315 | f 127 117 120
316 | f 121 122 119
317 | f 130 124 123
318 | f 132 126 125
319 | f 127 131 125
320 | f 121 118 127
321 | f 121 130 129
322 | f 119 132 131
323 | f 126 132 129
324 | f 122 129 132
325 | v -2.860805 0.100000 -0.514264
326 | v -2.844791 0.500000 -0.502283
327 | v -3.437310 0.100000 -0.945589
328 | v -2.321648 0.100000 -1.234896
329 | v -2.305634 0.500000 -1.222914
330 | v -2.898154 0.100000 -1.666221
331 | v -2.844791 0.380000 -0.502283
332 | v -2.305634 0.380000 -1.222914
333 | v -2.321648 0.376955 -1.234896
334 | v -2.860805 0.376955 -0.514264
335 | v -2.852798 0.510000 -0.508273
336 | v -3.397275 0.510000 -0.915636
337 | v -2.313641 0.510000 -1.228905
338 | v -2.858119 0.510000 -1.636268
339 | usemtl fauteuil
340 | f 142 136 141
341 | f 135 136 133
342 | f 134 145 143
343 | f 134 140 137
344 | f 140 142 141
345 | f 146 143 145
346 | f 142 133 136
347 | f 135 138 136
348 | f 134 137 145
349 | f 134 139 140
350 | f 140 139 142
351 | f 146 144 143
352 | v -3.850000 3.126285 -3.106950
353 | v 3.850000 3.126285 -3.106950
354 | v -3.850000 0.176997 -4.180403
355 | v 3.850000 0.176997 -4.180403
356 | v -1.850000 3.126285 -3.106950
357 | v 0.150000 3.126285 -3.106950
358 | v 1.850000 3.126285 -3.106950
359 | v 1.850000 0.176997 -4.180403
360 | v -0.150000 0.176997 -4.180403
361 | v -1.850000 0.176997 -4.180403
362 | v 2.150000 3.126285 -3.106950
363 | v 2.150000 0.176997 -4.180403
364 | v -0.150000 3.126285 -3.106950
365 | v 0.150000 0.176997 -4.180403
366 | v -2.150000 3.126285 -3.106950
367 | v -2.150000 0.176997 -4.180403
368 | usemtl glass
369 | f 158 148 157
370 | f 149 161 147
371 | f 156 159 151
372 | f 160 153 152
373 | f 158 150 148
374 | f 149 162 161
375 | f 156 155 159
376 | f 160 154 153
377 | v -4.000000 3.100000 -3.140000
378 | v -4.000000 3.100000 -2.700000
379 | v -4.000000 3.500000 -2.700000
380 | v 4.000000 3.100000 -3.140000
381 | v 4.000000 3.100000 -2.700000
382 | v 4.000000 3.500000 -2.700000
383 | vt 0.305169 0.000000
384 | vt 0.456596 2.000000
385 | vt 0.305169 2.000000
386 | vt 0.855107 -0.000000
387 | vt 0.705756 2.000000
388 | vt 0.705756 0.000000
389 | vt 0.456596 0.000000
390 | vt 0.855107 2.000000
391 | usemtl wood_light
392 | f 164/1 166/2 167/3
393 | f 168/4 164/5 167/6
394 | f 164/1 163/7 166/2
395 | f 168/4 165/8 164/5
396 | v -4.000000 3.300000 4.000000
397 | v 4.000000 3.300000 4.000000
398 | v -4.000000 3.300000 -2.710000
399 | v 4.000000 3.300000 -2.710000
400 | usemtl ceiling
401 | f 171 170 169
402 | f 171 172 170
403 | v 4.120000 1.902521 -3.518132
404 | v 4.120000 3.509489 -1.603021
405 | v 4.120000 1.718670 -3.363863
406 | v 4.120000 3.325639 -1.448752
407 | v 3.880000 1.902521 -3.518132
408 | v 3.880000 3.509489 -1.603021
409 | v 3.880000 1.718670 -3.363863
410 | v 3.880000 3.325639 -1.448752
411 | vt 0.619141 1.000000
412 | vt 0.542969 0.000000
413 | vt 0.542969 1.000000
414 | vt 0.464844 1.000000
415 | vt 0.541992 0.000000
416 | vt 0.541992 1.000000
417 | vt 0.100586 1.000000
418 | vt 0.177734 0.000000
419 | vt 0.177734 1.000000
420 | vt 0.619141 0.000000
421 | vt 0.464844 0.000000
422 | vt 0.100586 0.000000
423 | usemtl wood_light
424 | f 180/9 175/10 176/11
425 | f 178/12 179/13 180/14
426 | f 174/15 177/16 178/17
427 | f 180/9 179/18 175/10
428 | f 178/12 177/19 179/13
429 | f 174/15 173/20 177/16
430 | v 4.150000 -0.239241 -4.027450
431 | v 4.150000 3.707468 -2.590966
432 | v 3.850000 -0.136635 -4.309358
433 | v 3.850000 3.810074 -2.872873
434 | v 3.850000 -0.239241 -4.027450
435 | v 3.850000 3.707468 -2.590966
436 | vt 0.619141 1.000000
437 | vt 0.542969 0.000000
438 | vt 0.542969 1.000000
439 | vt 0.464844 1.000000
440 | vt 0.541992 0.000000
441 | vt 0.541992 1.000000
442 | vt 0.619141 0.000000
443 | vt 0.464844 0.000000
444 | usemtl wood_light
445 | f 186/21 181/22 182/23
446 | f 184/24 185/25 186/26
447 | f 186/21 185/27 181/22
448 | f 184/24 183/28 185/25
449 | v 1.850000 -0.239241 -4.027450
450 | v 1.850000 3.707468 -2.590966
451 | v 1.850000 -0.136635 -4.309358
452 | v 1.850000 3.810074 -2.872873
453 | v 2.150000 -0.239241 -4.027450
454 | v 2.150000 3.707468 -2.590966
455 | v 2.150000 -0.136635 -4.309358
456 | v 2.150000 3.810074 -2.872873
457 | vt 0.702637 1.000000
458 | vt 0.623047 0.000000
459 | vt 0.701660 0.000000
460 | vt 0.464844 1.000000
461 | vt 0.541992 0.000000
462 | vt 0.541992 1.000000
463 | vt 0.100586 1.000000
464 | vt 0.177734 0.000000
465 | vt 0.177734 1.000000
466 | vt 0.623047 1.000000
467 | vt 0.464844 0.000000
468 | vt 0.100586 0.000000
469 | usemtl wood_light
470 | f 188/29 189/30 187/31
471 | f 192/32 193/33 194/34
472 | f 188/35 191/36 192/37
473 | f 188/29 190/38 189/30
474 | f 192/32 191/39 193/33
475 | f 188/35 187/40 191/36
476 | v 2.120000 1.718670 -3.363863
477 | v 2.120001 3.325639 -1.448752
478 | v 1.880000 1.718670 -3.363863
479 | v 1.880001 3.325639 -1.448752
480 | v 2.120000 1.902521 -3.518132
481 | v 2.120001 3.509489 -1.603021
482 | v 1.880000 1.902521 -3.518132
483 | v 1.880001 3.509489 -1.603021
484 | vt 0.623047 1.000000
485 | vt 0.701660 0.000000
486 | vt 0.702637 1.000000
487 | vt 0.542969 1.000000
488 | vt 0.619141 0.000000
489 | vt 0.542969 0.000000
490 | vt 0.541992 1.000000
491 | vt 0.464844 0.000000
492 | vt 0.541992 0.000000
493 | vt 0.100586 1.000000
494 | vt 0.177734 0.000000
495 | vt 0.177734 1.000000
496 | vt 0.623047 0.000000
497 | vt 0.619141 1.000000
498 | vt 0.464844 1.000000
499 | vt 0.100586 0.000000
500 | usemtl wood_light
501 | f 198/41 195/42 196/43
502 | f 198/44 201/45 197/46
503 | f 202/47 199/48 201/49
504 | f 196/50 199/51 200/52
505 | f 198/41 197/53 195/42
506 | f 198/44 202/54 201/45
507 | f 202/47 200/55 199/48
508 | f 196/50 195/56 199/51
509 | v -2.120000 1.902521 -3.518132
510 | v -2.120000 3.509489 -1.603021
511 | v -1.880000 1.902521 -3.518132
512 | v -1.880000 3.509489 -1.603021
513 | v -2.120000 1.718670 -3.363863
514 | v -2.120000 3.325639 -1.448752
515 | v -1.880000 1.718670 -3.363863
516 | v -1.880000 3.325639 -1.448752
517 | vt 0.623047 1.000000
518 | vt 0.701660 0.000000
519 | vt 0.702637 1.000000
520 | vt 0.619141 1.000000
521 | vt 0.542969 0.000000
522 | vt 0.542969 1.000000
523 | vt 0.464844 1.000000
524 | vt 0.541992 0.000000
525 | vt 0.541992 1.000000
526 | vt 0.177734 1.000000
527 | vt 0.100586 0.000000
528 | vt 0.177734 0.000000
529 | vt 0.623047 0.000000
530 | vt 0.619141 0.000000
531 | vt 0.464844 0.000000
532 | vt 0.100586 1.000000
533 | usemtl wood_light
534 | f 206/57 203/58 204/59
535 | f 210/60 205/61 206/62
536 | f 208/63 209/64 210/65
537 | f 208/66 203/67 207/68
538 | f 206/57 205/69 203/58
539 | f 210/60 209/70 205/61
540 | f 208/63 207/71 209/64
541 | f 208/66 204/72 203/67
542 | v -1.850000 -0.136635 -4.309358
543 | v -1.850000 3.810074 -2.872873
544 | v -1.850000 -0.239241 -4.027450
545 | v -1.850000 3.707468 -2.590966
546 | v -2.150000 -0.136635 -4.309358
547 | v -2.150000 3.810074 -2.872873
548 | v -2.150000 -0.239241 -4.027450
549 | v -2.150000 3.707468 -2.590966
550 | vt 0.623047 1.000000
551 | vt 0.701660 0.000000
552 | vt 0.702637 1.000000
553 | vt 0.619141 1.000000
554 | vt 0.542969 0.000000
555 | vt 0.542969 1.000000
556 | vt 0.464844 1.000000
557 | vt 0.541992 0.000000
558 | vt 0.541992 1.000000
559 | vt 0.623047 0.000000
560 | vt 0.619141 0.000000
561 | vt 0.464844 0.000000
562 | usemtl wood_light
563 | f 214/73 211/74 212/75
564 | f 218/76 213/77 214/78
565 | f 216/79 217/80 218/81
566 | f 214/73 213/82 211/74
567 | f 218/76 217/83 213/77
568 | f 216/79 215/84 217/80
569 | v -3.850000 -0.136635 -4.309358
570 | v -3.850000 3.810074 -2.872873
571 | v -4.150000 -0.239241 -4.027450
572 | v -4.150000 3.707468 -2.590966
573 | v -3.850000 -0.239241 -4.027450
574 | v -3.850000 3.707468 -2.590966
575 | vt 0.619141 1.000000
576 | vt 0.542969 0.000000
577 | vt 0.542969 1.000000
578 | vt 0.464844 1.000000
579 | vt 0.541992 0.000000
580 | vt 0.541992 1.000000
581 | vt 0.619141 0.000000
582 | vt 0.464844 0.000000
583 | usemtl wood_light
584 | f 224/85 219/86 220/87
585 | f 222/88 223/89 224/90
586 | f 224/85 223/91 219/86
587 | f 222/88 221/92 223/89
588 | v -3.880000 1.902521 -3.518132
589 | v -3.880000 3.509489 -1.603021
590 | v -3.880000 1.718670 -3.363863
591 | v -3.880000 3.325639 -1.448752
592 | v -4.120000 1.902521 -3.518132
593 | v -4.120000 3.509489 -1.603021
594 | v -4.120000 1.718670 -3.363863
595 | v -4.120000 3.325639 -1.448752
596 | vt 0.623047 1.000000
597 | vt 0.701660 0.000000
598 | vt 0.702637 1.000000
599 | vt 0.619141 1.000000
600 | vt 0.542969 0.000000
601 | vt 0.542969 1.000000
602 | vt 0.100586 1.000000
603 | vt 0.177734 0.000000
604 | vt 0.177734 1.000000
605 | vt 0.623047 0.000000
606 | vt 0.619141 0.000000
607 | vt 0.100586 0.000000
608 | usemtl wood_light
609 | f 228/93 225/94 226/95
610 | f 232/96 227/97 228/98
611 | f 226/99 229/100 230/101
612 | f 228/93 227/102 225/94
613 | f 232/96 231/103 227/97
614 | f 226/99 225/104 229/100
615 | v -0.120000 1.718670 -3.363863
616 | v -0.120000 3.325639 -1.448752
617 | v -0.120000 1.902521 -3.518132
618 | v -0.120000 3.509489 -1.603021
619 | v 0.120000 1.718670 -3.363863
620 | v 0.120000 3.325639 -1.448752
621 | v 0.120000 1.902521 -3.518132
622 | v 0.120000 3.509489 -1.603021
623 | vt 0.702637 1.000000
624 | vt 0.623047 0.000000
625 | vt 0.701660 0.000000
626 | vt 0.619141 1.000000
627 | vt 0.542969 0.000000
628 | vt 0.542969 1.000000
629 | vt 0.464844 1.000000
630 | vt 0.541992 0.000000
631 | vt 0.541992 1.000000
632 | vt 0.100586 1.000000
633 | vt 0.177734 0.000000
634 | vt 0.177734 1.000000
635 | vt 0.623047 1.000000
636 | vt 0.619141 0.000000
637 | vt 0.464844 0.000000
638 | vt 0.100586 0.000000
639 | usemtl wood_light
640 | f 234/105 235/106 233/107
641 | f 240/108 235/109 236/110
642 | f 238/111 239/112 240/113
643 | f 234/114 237/115 238/116
644 | f 234/105 236/117 235/106
645 | f 240/108 239/118 235/109
646 | f 238/111 237/119 239/112
647 | f 234/114 233/120 237/115
648 | v 4.000000 3.300000 4.000000
649 | v 4.000000 -0.200000 4.000000
650 | v 4.000000 3.300000 -2.750000
651 | v 4.000000 -0.200000 -4.020000
652 | usemtl wall
653 | f 243 242 241
654 | f 243 244 242
655 | v -4.000000 3.300000 4.000000
656 | v -4.000000 -0.200000 4.000000
657 | v -4.000000 3.300000 -2.750000
658 | v -4.000000 -0.200000 -4.020000
659 | usemtl wall
660 | f 246 247 245
661 | f 246 248 247
662 | v -0.150000 -0.136635 -4.309358
663 | v -0.150000 3.810074 -2.872873
664 | v 0.150000 -0.136635 -4.309358
665 | v 0.150000 3.810074 -2.872873
666 | v -0.150000 -0.239241 -4.027450
667 | v -0.150000 3.707468 -2.590966
668 | v 0.150000 -0.239241 -4.027450
669 | v 0.150000 3.707468 -2.590966
670 | vt 0.619141 1.000000
671 | vt 0.542969 0.000000
672 | vt 0.542969 1.000000
673 | vt 0.464844 1.000000
674 | vt 0.541992 0.000000
675 | vt 0.541992 1.000000
676 | vt 0.100586 1.000000
677 | vt 0.177734 0.000000
678 | vt 0.177734 1.000000
679 | vt 0.619141 0.000000
680 | vt 0.464844 0.000000
681 | vt 0.100586 0.000000
682 | usemtl wood_light
683 | f 256/121 251/122 252/123
684 | f 254/124 255/125 256/126
685 | f 250/127 253/128 254/129
686 | f 256/121 255/130 251/122
687 | f 254/124 253/131 255/125
688 | f 250/127 249/132 253/128
689 | v -4.000000 -0.200000 -3.800000
690 | v -4.000000 0.200000 -3.800000
691 | v -4.000000 0.200000 -4.200000
692 | v 4.000000 -0.200000 -3.800000
693 | v 4.000000 0.200000 -3.800000
694 | v 4.000000 0.200000 -4.200000
695 | vt 0.305169 0.000000
696 | vt 0.456596 2.000000
697 | vt 0.305169 2.000000
698 | vt 0.855107 -0.000000
699 | vt 0.705756 2.000000
700 | vt 0.705756 0.000000
701 | vt 0.456596 0.000000
702 | vt 0.855107 2.000000
703 | usemtl wood_light
704 | f 258/133 260/134 261/135
705 | f 262/136 258/137 261/138
706 | f 258/133 257/139 260/134
707 | f 262/136 259/140 258/137
708 | v 3.985000 -0.100000 4.000000
709 | v 3.985000 0.100000 4.000000
710 | v 3.985000 -0.100000 -4.000000
711 | v 3.985000 0.100000 -4.000000
712 | v 4.015000 0.100000 4.000000
713 | v 4.015000 0.100000 -4.000000
714 | usemtl baseboard
715 | f 266 263 264
716 | f 268 264 267
717 | f 266 265 263
718 | f 268 266 264
719 | v -4.015000 0.100000 4.000000
720 | v -4.015000 0.100000 -4.000000
721 | v -3.985000 -0.100000 4.000000
722 | v -3.985000 0.100000 4.000000
723 | v -3.985000 -0.100000 -4.000000
724 | v -3.985000 0.100000 -4.000000
725 | usemtl baseboard
726 | f 272 273 274
727 | f 274 269 272
728 | f 272 271 273
729 | f 274 270 269
730 | v -4.000000 -0.010000 4.000000
731 | v 4.000000 -0.010000 4.000000
732 | v -4.000000 -0.010000 -4.000000
733 | v 4.000000 -0.010000 -4.000000
734 | vt 0.000100 0.000100
735 | vt 0.999900 0.999900
736 | vt 0.000100 0.999900
737 | vt 0.999900 0.000100
738 | usemtl floor
739 | f 276/141 277/142 275/143
740 | f 276/141 278/144 277/142
741 | 


--------------------------------------------------------------------------------
/models/plane.obj:
--------------------------------------------------------------------------------
 1 | # A simple xy-plane with normal in the positive z-direction.
 2 | # 4 vertices, 2 triangles
 3 | 
 4 | v -9.0 0.0 9.0
 5 | v 9.0 0.0 9.0
 6 | v 9.0 0.0 -9.0
 7 | v -9.0 0.0 -9.0
 8 | 
 9 | f 1 2 3 4
10 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | Convector
 2 | =========
 3 | 
 4 | Interactive CPU path tracer.
 5 | 
 6 | [![Build Status][ci-img]][ci]
 7 | 
 8 | On the left: interactive mode, running at about 10 fps on my machine. On the
 9 | right: accumulative mode, after rendering for a few minutes.
10 | 
11 | | ![Interactive][interactive] | ![Accumulative][accumulative] |
12 | |-----------------------------|-------------------------------|
13 | 
14 | Requirements
15 | ------------
16 | 
17 | Hardware: a CPU that supports the AVX instructions is required. In practice this
18 | means Sandy Bridge or later. FMA instructions can be taken advantage of too,
19 | those are Haswell or later.
20 | 
21 | Software: a recent nightly version of the
22 | [Rust programming language](https://rust-lang.org) is required. Version 1.10 is
23 | recommended. On Windows you need the version with the MSVC ABI.
24 | 
25 | Compiling and Running
26 | ---------------------
27 | 
28 |  * `cargo run --release` to build and run the release executable.
29 |  * `cargo build --release` to build in release mode without running.
30 |  * `cargo bench` to build and run all benchmarks in release mode.
31 |  * `cargo test` to build and run all tests in debug mode.
32 | 
33 | If you do not want to use the FMA instructions, remove the `+fma` from the
34 | codegen options in `.cargo/config`.
35 | 
36 | Controls
37 | --------
38 | 
39 |  * Press `b` to toggle blending recent frames.
40 |  * Press `d` to toggle debug view.
41 |    The green channel shows the number of primary AABB intersections,
42 |    the blue channel shows the number of primary triangle intersections.
43 |  * Press `m` to toggle the median filter for noise reduction.
44 |  * Press `q` to quit the application.
45 |  * Press `r` to switch between realtime and accumulative rendering.
46 |  * Press `s` to print statistics to the console.
47 |  * Press `t` to write a trace to trace.json.
48 |    It can be opened with Chrome by going to chrome://tracing.
49 | 
50 | About the code
51 | --------------
52 | 
53 | Many structs represent eight instances at once for SIMD. In that case the name
54 | has been prefixed with `M` (for “multi”). The single-instance struct types have
55 | the prefix `S` instead (for “single”).
56 | 
57 | The most interesting stuff is in `src/triangle.rs`, `src/material.rs`,
58 | and `src/renderer.rs`, and `src/bvh.rs`. Shaders are in `src/gpu`.
59 | 
60 | License
61 | -------
62 | 
63 | Convector is free software. It is licensed under the
64 | [GNU General Public License][gplv3], version 3.
65 | 
66 | [gplv3]:        https://www.gnu.org/licenses/gpl-3.0.html
67 | [interactive]:  https://raw.githubusercontent.com/ruuda/convector/master/screenshots/interactive.png
68 | [accumulative]: https://raw.githubusercontent.com/ruuda/convector/master/screenshots/accumulative.png
69 | [ci-img]:       https://ci.appveyor.com/api/projects/status/nkqhoi829382i1ow?svg=true
70 | [ci]:           https://ci.appveyor.com/project/ruuda/convector
71 | 


--------------------------------------------------------------------------------
/rust-toolchain:
--------------------------------------------------------------------------------
1 | nightly-2017-04-15
2 | 


--------------------------------------------------------------------------------
/screenshots/accumulative.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ruuda/convector/2f5f2428fa6c54002bd2ee8ce3d0f2188aab49f8/screenshots/accumulative.png


--------------------------------------------------------------------------------
/screenshots/interactive.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ruuda/convector/2f5f2428fa6c54002bd2ee8ce3d0f2188aab49f8/screenshots/interactive.png


--------------------------------------------------------------------------------
/src/aabb.rs:
--------------------------------------------------------------------------------
  1 | // Convector -- An interactive CPU path tracer
  2 | // Copyright 2016 Ruud van Asseldonk
  3 | 
  4 | // This program is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License version 3. A copy
  6 | // of the License is available in the root of the repository.
  7 | 
  8 | //! This module implements axis-aligned bounding boxes and related functions.
  9 | 
 10 | use ray::MRay;
 11 | use simd::{Mask, Mf32};
 12 | use vector3::{MVector3, SVector3};
 13 | 
 14 | #[cfg(test)]
 15 | use {bench, test};
 16 | 
 17 | /// An axis-aligned bounding box.
 18 | #[derive(Clone, Debug)]
 19 | pub struct Aabb {
 20 |     pub origin: SVector3,
 21 | 
 22 |     /// The origin plus the size.
 23 |     pub far: SVector3,
 24 | }
 25 | 
 26 | /// Caches AABB intersection distances.
 27 | pub struct MAabbIntersection {
 28 |     // The AABB was intersected by the line defined by the ray if tmax > tmin.
 29 |     // The mask contains the result of this comparison. If tmax is negative, the
 30 |     // AABB lies behind the ray entirely.
 31 |     tmin: Mf32,
 32 |     tmax: Mf32,
 33 | 
 34 |     // The mask can be computed from tmin and tmax, but benchmarks show that it
 35 |     // is slightly faster to store it, than to re-compute it when needed.
 36 |     mask: Mask,
 37 | }
 38 | 
 39 | impl Aabb {
 40 |     pub fn new(origin: SVector3, far: SVector3) -> Aabb {
 41 |         Aabb {
 42 |             origin: origin,
 43 |             far: far,
 44 |         }
 45 |     }
 46 | 
 47 |     pub fn zero() -> Aabb {
 48 |         Aabb {
 49 |             origin: SVector3::zero(),
 50 |             far: SVector3::zero(),
 51 |         }
 52 |     }
 53 | 
 54 |     /// Returns the smalles axis-aligned bounding box that contains all input
 55 |     /// points.
 56 |     pub fn enclose_points<'a, I>(points: I) -> Aabb
 57 |         where I: IntoIterator<Item = &'a SVector3>
 58 |     {
 59 |         let mut it = points.into_iter();
 60 |         let &first = it.next().expect("enclosure must encluse at least one point");
 61 | 
 62 |         let mut min = first;
 63 |         let mut max = first;
 64 | 
 65 |         while let Some(&point) = it.next() {
 66 |             min = SVector3::min(min, point);
 67 |             max = SVector3::max(max, point);
 68 |         }
 69 | 
 70 |         Aabb::new(min, max)
 71 |     }
 72 | 
 73 |     /// Returns the smallest bounding box that contains all input boxes.
 74 |     pub fn enclose_aabbs<'a, I>(aabbs: I) -> Aabb
 75 |         where I: IntoIterator<Item = &'a Aabb>
 76 |     {
 77 |         let mut it = aabbs.into_iter();
 78 |         let first = it.next().expect("enclosure must enclose at least one AABB");
 79 | 
 80 |         let mut min = first.origin;
 81 |         let mut max = first.far;
 82 | 
 83 |         while let Some(aabb) = it.next() {
 84 |             min = SVector3::min(min, aabb.origin);
 85 |             max = SVector3::max(max, aabb.far);
 86 |         }
 87 | 
 88 |         Aabb::new(min, max)
 89 |     }
 90 | 
 91 |     /// Returns the size of the bounding box.
 92 |     pub fn size(&self) -> SVector3 {
 93 |         self.far - self.origin
 94 |     }
 95 | 
 96 |     /// Returns the surface area of the bounding box.
 97 |     pub fn area(&self) -> f32 {
 98 |         let s = self.size();
 99 |         let x = s.y * s.z;
100 |         let y = s.z * s.x;
101 |         let z = s.x * s.y;
102 |         2.0 * (x + y + z)
103 |     }
104 | 
105 |     pub fn intersect(&self, ray: &MRay) -> MAabbIntersection {
106 |         // Note: this method, in combination with `MAabbIntersection::any()`
107 |         // compiles down to ~65 instructions, taking up ~168 bytes of
108 |         // instruction cache; 3 cache lines.
109 | 
110 |         // Note: the compiler is smart enough to inline this method and compute
111 |         // these reciprocals only once per ray, so there is no need to clutter
112 |         // the code by passing around precomputed values.
113 |         let xinv = ray.direction.x.recip_fast();
114 |         let yinv = ray.direction.y.recip_fast();
115 |         let zinv = ray.direction.z.recip_fast();
116 | 
117 |         let d1 = MVector3::broadcast(self.origin) - ray.origin;
118 |         let d2 = MVector3::broadcast(self.far) - ray.origin;
119 | 
120 |         let (tx1, tx2) = (d1.x * xinv, d2.x * xinv);
121 |         let txmin = tx1.min(tx2);
122 |         let txmax = tx1.max(tx2);
123 | 
124 |         let (ty1, ty2) = (d1.y * yinv, d2.y * yinv);
125 |         let tymin = ty1.min(ty2);
126 |         let tymax = ty1.max(ty2);
127 | 
128 |         let (tz1, tz2) = (d1.z * zinv, d2.z * zinv);
129 |         let tzmin = tz1.min(tz2);
130 |         let tzmax = tz1.max(tz2);
131 | 
132 |         // The minimum t in all dimension is the maximum of the per-axis minima.
133 |         let tmin = txmin.max(tymin.max(tzmin));
134 |         let tmax = txmax.min(tymax.min(tzmax));
135 | 
136 |         MAabbIntersection {
137 |             tmin: tmin,
138 |             tmax: tmax,
139 |             mask: tmax.geq(tmin),
140 |         }
141 |     }
142 | }
143 | 
144 | impl MAabbIntersection {
145 |     /// Returns whether any of the active rays intersected the AABB.
146 |     pub fn any(&self) -> bool {
147 |         // If there was an intersection in front of the ray, then tmax will
148 |         // definitely be positive. The mask is only set for the rays that
149 |         // actually intersected the bounding box.
150 |         self.tmax.any_sign_bit_positive_masked(self.mask)
151 |     }
152 | 
153 |     /// Returns whether any of the active rays intersected the AABB.
154 |     // TODO: I should get rid of one of these `any` methods.
155 |     pub fn any_masked(&self, active: Mask) -> bool {
156 |         // If there was an intersection in front of the ray, then tmax will
157 |         // definitely be positive. The mask is only set for the rays that
158 |         // actually intersected the bounding box.
159 |         //
160 |         // The active mask has an unfortunate sign for this purpose: its sign
161 |         // bit is 0 for rays that should be considered, and 1 for rays that that
162 |         // can be ignored. To get (mask & !active), we can do (mask ^ active) &
163 |         // mask.
164 |         let mask = (self.mask ^ active) & self.mask;
165 |         self.tmax.any_sign_bit_positive_masked(mask)
166 |     }
167 | 
168 |     /// Returns whether for all rays that intersect the AABB and for which the
169 |     /// sign bit in the active mask is 0 (positive) the given distance is
170 |     /// smaller than the distance to the AABB.
171 |     pub fn is_further_away_than(&self, distance: Mf32, active: Mask) -> bool {
172 |         // If distance < self.tmin (when false should be returned for the ray),
173 |         // the comparison results in positive 0.0. If distance < self.min for
174 |         // any of the values for which the mask is set, then for that ray the
175 |         // AABB is not further away. Hence all sign bits must be negative.
176 |         (self.tmin.geq(distance) | active).all_sign_bits_negative_masked(self.mask)
177 |     }
178 | 
179 |     /// Returns whether this AABB should be visited before the other one.
180 |     pub fn should_try_before(&self, other: &MAabbIntersection) -> bool {
181 |         (self.tmin - other.tmin).all_sign_bits_positive()
182 |     }
183 | }
184 | 
185 | #[test]
186 | fn aabb_enclose_aabbs() {
187 |     let a = Aabb::new(SVector3::new(1.0, 2.0, 3.0), SVector3::new(5.0, 7.0, 9.0));
188 |     let b = Aabb::new(SVector3::new(0.0, 3.0, 2.0), SVector3::new(9.0, 6.0, 9.0));
189 |     let ab = Aabb::enclose_aabbs(&[a, b]);
190 |     assert_eq!(ab.origin, SVector3::new(0.0, 2.0, 2.0));
191 |     assert_eq!(ab.far, SVector3::new(9.0, 7.0, 9.0));
192 | }
193 | 
194 | #[test]
195 | fn aabb_area() {
196 |     // Width: 4, height: 5, depth: 6.
197 |     let aabb = Aabb::new(SVector3::new(1.0, 2.0, 3.0), SVector3::new(5.0, 7.0, 9.0));
198 |     assert_eq!(40.0 + 60.0 + 48.0, aabb.area());
199 | }
200 | 
201 | #[test]
202 | fn intersect_aabb() {
203 |     use ray::SRay;
204 | 
205 |     let aabb = Aabb {
206 |         origin: SVector3::new(0.0, 1.0, 2.0),
207 |         far: SVector3::new(1.0, 3.0, 5.0),
208 |     };
209 | 
210 |     // Intersects forwards but not backwards.
211 |     let r1 = SRay {
212 |         origin: SVector3::zero(),
213 |         direction: SVector3::new(2.0, 3.0, 5.0).normalized(),
214 |     };
215 |     let mr1 = MRay::broadcast(&r1);
216 |     assert!(aabb.intersect(&mr1).any());
217 |     assert!(!aabb.intersect(&-mr1).any());
218 | 
219 |     // Intersects forwards but not backwards.
220 |     let r2 = SRay {
221 |         origin: SVector3::zero(),
222 |         direction: SVector3::new(1.0, 4.0, 5.0).normalized(),
223 |     };
224 |     let mr2 = MRay::broadcast(&r2);
225 |     assert!(aabb.intersect(&mr2).any());
226 |     assert!(!aabb.intersect(&-mr2).any());
227 | 
228 |     // Intersects neither forwards nor backwards.
229 |     let r3 = SRay {
230 |         origin: SVector3::zero(),
231 |         direction: SVector3::new(2.0, 3.0, 0.0).normalized(),
232 |     };
233 |     let mr3 = MRay::broadcast(&r3);
234 |     assert!(!aabb.intersect(&mr3).any());
235 |     assert!(!aabb.intersect(&-mr3).any());
236 | 
237 |     // Intersects both forwards and backwards (origin is inside the aabb).
238 |     let r4 = SRay {
239 |         origin: SVector3::new(0.2, 1.2, 2.2),
240 |         direction: SVector3::new(1.0, 1.0, 0.0).normalized(),
241 |     };
242 |     let mr4 = MRay::broadcast(&r4);
243 |     assert!(aabb.intersect(&mr4).any());
244 |     assert!(aabb.intersect(&-mr4).any());
245 | 
246 |     // Intersects both forwards and backwards (origin is inside the aabb).
247 |     let r5 = SRay {
248 |         origin: SVector3::new(0.01, 2.0, 3.5),
249 |         direction: SVector3::new(0.0, 0.0, 1.0).normalized(),
250 |     };
251 |     let mr5 = MRay::broadcast(&r5);
252 |     assert!(aabb.intersect(&mr5).any());
253 |     assert!(aabb.intersect(&-mr5).any());
254 | }
255 | 
256 | #[bench]
257 | fn bench_intersect_p100(b: &mut test::Bencher) {
258 |     let (aabb, rays) = bench::aabb_with_mrays(4096, 4096);
259 |     let mut rays_it = rays.iter().cycle();
260 |     b.iter(|| {
261 |         let isect = aabb.intersect(rays_it.next().unwrap());
262 |         test::black_box(isect.any());
263 |     });
264 | }
265 | 
266 | #[bench]
267 | fn bench_intersect_p50(b: &mut test::Bencher) {
268 |     let (aabb, rays) = bench::aabb_with_mrays(4096, 2048);
269 |     let mut rays_it = rays.iter().cycle();
270 |     b.iter(|| {
271 |         let isect = aabb.intersect(rays_it.next().unwrap());
272 |         test::black_box(isect.any());
273 |     });
274 | }
275 | 
276 | #[bench]
277 | fn bench_intersect_8_mrays_per_aabb(b: &mut test::Bencher) {
278 |     let rays = bench::mrays_inward(4096 / 8);
279 |     let aabbs = bench::aabbs(4096);
280 |     let mut rays_it = rays.iter().cycle();
281 |     let mut aabbs_it = aabbs.iter().cycle();
282 |     b.iter(|| {
283 |         let aabb = aabbs_it.next().unwrap();
284 |         for _ in 0..8 {
285 |             let isect = aabb.intersect(rays_it.next().unwrap());
286 |             test::black_box(isect.any());
287 |         }
288 |     });
289 | }
290 | 
291 | #[bench]
292 | fn bench_intersect_8_aabbs_per_mray(b: &mut test::Bencher) {
293 |     let rays = bench::mrays_inward(4096 / 8);
294 |     let aabbs = bench::aabbs(4096);
295 |     let mut rays_it = rays.iter().cycle();
296 |     let mut aabbs_it = aabbs.iter().cycle();
297 |     b.iter(|| {
298 |         let ray = rays_it.next().unwrap();
299 |         for _ in 0..8 {
300 |             let aabb = aabbs_it.next().unwrap();
301 |             let isect = aabb.intersect(ray);
302 |             test::black_box(isect.any());
303 |         }
304 |     });
305 | }
306 | 


--------------------------------------------------------------------------------
/src/bench.rs:
--------------------------------------------------------------------------------
  1 | // Convector -- An interactive CPU path tracer
  2 | // Copyright 2016 Ruud van Asseldonk
  3 | 
  4 | // This program is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License version 3. A copy
  6 | // of the License is available in the root of the repository.
  7 | 
  8 | //! This module generates test data for the benchmarks.
  9 | 
 10 | use aabb::Aabb;
 11 | use material::SMaterial;
 12 | use quaternion::{MQuaternion, SQuaternion};
 13 | use rand;
 14 | use rand::Rng;
 15 | use rand::distributions::{IndependentSample, Range};
 16 | use ray::{MRay, SRay};
 17 | use simd::Mf32;
 18 | use std::f32::consts;
 19 | use triangle::Triangle;
 20 | use vector3::{MVector3, SVector3};
 21 | 
 22 | /// Generates n random Mf32s in the range [0, 1).
 23 | pub fn mf32_unit(n: usize) -> Vec<Mf32> {
 24 |     let mut mf32s = Vec::with_capacity(n);
 25 |     let mut rng = rand::thread_rng();
 26 |     let range = Range::new(0.0, 1.0);
 27 |     for _ in 0..n {
 28 |         mf32s.push(Mf32::generate(|_| range.ind_sample(&mut rng)));
 29 |     }
 30 |     mf32s
 31 | }
 32 | 
 33 | /// Generates n random Mf32s in the range [-1, 1).
 34 | pub fn mf32_biunit(n: usize) -> Vec<Mf32> {
 35 |     let mut mf32s = Vec::with_capacity(n);
 36 |     let mut rng = rand::thread_rng();
 37 |     let range = Range::new(-1.0, 1.0);
 38 |     for _ in 0..n {
 39 |         mf32s.push(Mf32::generate(|_| range.ind_sample(&mut rng)));
 40 |     }
 41 |     mf32s
 42 | }
 43 | 
 44 | /// Generates n vectors distributed uniformly on the unit sphere.
 45 | pub fn svectors_on_unit_sphere(n: usize) -> Vec<SVector3> {
 46 |     let mut rng = rand::thread_rng();
 47 |     let phi_range = Range::new(0.0, 2.0 * consts::PI);
 48 |     let cos_theta_range = Range::new(-1.0_f32, 1.0);
 49 |     let mut vectors = Vec::with_capacity(n);
 50 |     for _ in 0..n {
 51 |         let phi = phi_range.ind_sample(&mut rng);
 52 |         let theta = cos_theta_range.ind_sample(&mut rng).acos();
 53 |         let vector = SVector3 {
 54 |             x: phi.cos() * theta.sin(),
 55 |             y: phi.sin() * theta.sin(),
 56 |             z: theta.cos(),
 57 |         };
 58 |         vectors.push(vector);
 59 |     }
 60 |     vectors
 61 | }
 62 | 
 63 | /// Generates n times 8 vectors distributed uniformly on the unit sphere.
 64 | pub fn mvectors_on_unit_sphere(n: usize) -> Vec<MVector3> {
 65 |     let mut vectors = Vec::with_capacity(n);
 66 |     for _ in 0..n {
 67 |         let p = svectors_on_unit_sphere(8);
 68 |         let x = Mf32::generate(|i| p[i].x);
 69 |         let y = Mf32::generate(|i| p[i].y);
 70 |         let z = Mf32::generate(|i| p[i].z);
 71 |         vectors.push(MVector3::new(x, y, z));
 72 |     }
 73 |     vectors
 74 | }
 75 | 
 76 | /// Generates n quaternions uniformly distributed over the unit sphere.
 77 | pub fn unit_squaternions(n: usize) -> Vec<SQuaternion> {
 78 |     let mut rng = rand::thread_rng();
 79 |     let range = Range::new(-1.0_f32, 1.0);
 80 |     let mut quaternions = Vec::with_capacity(n);
 81 | 
 82 |     let mut i = 0;
 83 |     while i < n {
 84 |         let a = range.ind_sample(&mut rng);
 85 |         let b = range.ind_sample(&mut rng);
 86 |         let c = range.ind_sample(&mut rng);
 87 |         let d = range.ind_sample(&mut rng);
 88 | 
 89 |         // Use rejection sampling because I do not know how to sample a 4D unit
 90 |         // sphere uniformly.
 91 |         let norm_squared = a * a + b * b + c * c + d * d;
 92 |         if norm_squared > 1.0 {
 93 |             continue;
 94 |         }
 95 | 
 96 |         let norm = norm_squared.sqrt();
 97 |         let q = SQuaternion::new(a / norm, b / norm, c / norm, d / norm);
 98 |         quaternions.push(q);
 99 | 
100 |         i += 1;
101 |     }
102 | 
103 |     quaternions
104 | }
105 | 
106 | /// Generates n times 8 quaternions uniformly distributed over the unit sphere.
107 | pub fn unit_mquaternions(n: usize) -> Vec<MQuaternion> {
108 |     let mut quaternions = Vec::with_capacity(n);
109 |     for _ in 0..n {
110 |         let q = unit_squaternions(8);
111 |         let a = Mf32::generate(|i| q[i].a);
112 |         let b = Mf32::generate(|i| q[i].b);
113 |         let c = Mf32::generate(|i| q[i].c);
114 |         let d = Mf32::generate(|i| q[i].d);
115 |         quaternions.push(MQuaternion::new(a, b, c, d));
116 |     }
117 |     quaternions
118 | }
119 | 
120 | /// Generates n pairs of nonzero vectors.
121 | pub fn svector3_pairs(n: usize) -> Vec<(SVector3, SVector3)> {
122 |     let mut a = svectors_on_unit_sphere(n);
123 |     let mut b = svectors_on_unit_sphere(n);
124 |     let pairs = a.drain(..).zip(b.drain(..)).collect();
125 |     pairs
126 | }
127 | 
128 | /// Generates n times 8 pairs of nonzero vectors.
129 | pub fn mvector3_pairs(n: usize) -> Vec<(MVector3, MVector3)> {
130 |     let mut a = mvectors_on_unit_sphere(n);
131 |     let mut b = mvectors_on_unit_sphere(n);
132 |     let pairs = a.drain(..).zip(b.drain(..)).collect();
133 |     pairs
134 | }
135 | 
136 | /// Generates rays with origin on a sphere, pointing to the origin.
137 | pub fn srays_inward(radius: f32, n: usize) -> Vec<SRay> {
138 |     svectors_on_unit_sphere(n).iter().map(|&x| SRay::new(x * radius, -x)).collect()
139 | }
140 | 
141 | /// Generates a random AABB and n rays of which m intersect the box.
142 | pub fn aabb_with_srays(n: usize, m: usize) -> (Aabb, Vec<SRay>) {
143 |     let origin = SVector3::new(-1.0, -1.0, -1.0);
144 |     let far = SVector3::new(1.0, 1.0, 1.0);
145 |     let aabb = Aabb::new(origin, far);
146 |     let up = SVector3::new(0.0, 0.0, 1.0);
147 |     let mut rays = srays_inward(16.0, n);
148 | 
149 |     // Offset the m-n rays that should not intersect the box in a direction
150 |     // perpendicular to the ray.
151 |     for i in m..n {
152 |         let p = rays[i].origin + up.cross(rays[i].direction).normalized() * 16.0;
153 |         rays[i].origin = p;
154 |     }
155 | 
156 |     // Shuffle the intersecting and non-intersecting rays to confuse the branch
157 |     // predictor.
158 |     rand::thread_rng().shuffle(&mut rays[..]);
159 | 
160 |     (aabb, rays)
161 | }
162 | 
163 | /// Generates a random AABB and n rays of which m intersect the box,
164 | /// packed per 8 rays. N must be a multiple of 8.
165 | pub fn aabb_with_mrays(n: usize, m: usize) -> (Aabb, Vec<MRay>) {
166 |     assert_eq!(0, n & 7); // Must be a multiple of 8.
167 |     let (aabb, srays) = aabb_with_srays(n, m);
168 |     let mrays = srays.chunks(8)
169 |                      .map(|rs| MRay::generate(|i| rs[i].clone()))
170 |                      .collect();
171 |     (aabb, mrays)
172 | }
173 | 
174 | /// Generates n triangles with vertices on the unit sphere.
175 | pub fn triangles(n: usize) -> Vec<Triangle> {
176 |     let v0s = svectors_on_unit_sphere(n);
177 |     let v1s = svectors_on_unit_sphere(n);
178 |     let v2s = svectors_on_unit_sphere(n);
179 |     v0s.iter()
180 |         .zip(v1s.iter().zip(v2s.iter()))
181 |         .map(|(&v0, (&v1, &v2))| Triangle::new(v0, v1, v2, SMaterial::white()))
182 |         .collect()
183 | }
184 | 
185 | /// Generates n bounding boxes with two vertices on the unit sphere.
186 | pub fn aabbs(n: usize) -> Vec<Aabb> {
187 |     let v0s = svectors_on_unit_sphere(n);
188 |     let v1s = svectors_on_unit_sphere(n);
189 |     v0s.iter()
190 |         .zip(v1s.iter())
191 |         .map(|(&v0, &v1)| Aabb::new(SVector3::min(v0, v1), SVector3::max(v0, v1)))
192 |         .collect()
193 | }
194 | 
195 | /// Generates n mrays originating from a sphere of radius 10, pointing inward.
196 | pub fn mrays_inward(n: usize) -> Vec<MRay> {
197 |     let origins = mvectors_on_unit_sphere(n);
198 |     let dests = mvectors_on_unit_sphere(n);
199 |     origins.iter()
200 |         .zip(dests.iter())
201 |         .map(|(&from, &to)| {
202 |             let origin = from * Mf32::broadcast(10.0);
203 |             let direction = (to - origin).normalized();
204 |             MRay::new(origin, direction)
205 |         })
206 |         .collect()
207 | }
208 | 
209 | /// Generates n mrays originating from a sphere of radius 10, pointing inward.
210 | /// The rays share the origin and point roughly in the same direction.
211 | pub fn mrays_inward_coherent(n: usize) -> Vec<MRay> {
212 |     let origins = svectors_on_unit_sphere(n);
213 |     let dests = mvectors_on_unit_sphere(n);
214 |     origins.iter()
215 |         .zip(dests.iter())
216 |         .map(|(&from, &to)| {
217 |             let origin = MVector3::broadcast(from * 10.0);
218 |             let dest = to * Mf32::broadcast(0.5);
219 |             let direction = (dest - origin).normalized();
220 |             MRay::new(origin, direction)
221 |         })
222 |         .collect()
223 | }
224 | 
225 | #[test]
226 | fn aabb_with_srays_respects_probability() {
227 |     let (aabb, rays) = aabb_with_srays(4096, 2048);
228 |     let mut n = 0;
229 |     for ray in &rays {
230 |         let mray = MRay::broadcast(ray);
231 |         if aabb.intersect(&mray).any() {
232 |             n += 1;
233 |         }
234 |     }
235 |     assert_eq!(2048, n);
236 | }
237 | 


--------------------------------------------------------------------------------
/src/gpu/blend.glsl:
--------------------------------------------------------------------------------
 1 | // Convector -- An interactive CPU path tracer
 2 | // Copyright 2016 Ruud van Asseldonk
 3 | 
 4 | // This program is free software: you can redistribute it and/or modify
 5 | // it under the terms of the GNU General Public License version 3. A copy
 6 | // of the License is available in the root of the repository.
 7 | 
 8 | #version 140
 9 | 
10 | in vec2 v_tex_coords;
11 | out vec4 color;
12 | 
13 | uniform sampler2D frame0;
14 | uniform sampler2D frame1;
15 | uniform sampler2D frame2;
16 | uniform sampler2D frame3;
17 | uniform sampler2D frame4;
18 | uniform sampler2D frame5;
19 | uniform sampler2D frame6;
20 | uniform sampler2D frame7;
21 | 
22 | void main() {
23 |     vec4 c0 = texture(frame0, v_tex_coords);
24 |     vec4 c1 = texture(frame1, v_tex_coords);
25 |     vec4 c2 = texture(frame2, v_tex_coords);
26 |     vec4 c3 = texture(frame3, v_tex_coords);
27 |     vec4 c4 = texture(frame4, v_tex_coords);
28 |     vec4 c5 = texture(frame5, v_tex_coords);
29 |     vec4 c6 = texture(frame6, v_tex_coords);
30 |     vec4 c7 = texture(frame7, v_tex_coords);
31 | 
32 |     // Take the mean of the eight frames.
33 |     color = (c0 + c1 + c2 + c3 + c4 + c5 + c6 + c7) * 0.125f;
34 |     color.a = 1.0f;
35 | }
36 | 


--------------------------------------------------------------------------------
/src/gpu/gbuffer.glsl:
--------------------------------------------------------------------------------
 1 | // Convector -- An interactive CPU path tracer
 2 | // Copyright 2016 Ruud van Asseldonk
 3 | 
 4 | // This program is free software: you can redistribute it and/or modify
 5 | // it under the terms of the GNU General Public License version 3. A copy
 6 | // of the License is available in the root of the repository.
 7 | 
 8 | #version 140
 9 | 
10 | in vec2 v_tex_coords;
11 | out vec4 color;
12 | 
13 | uniform sampler2D frame;
14 | uniform sampler2D gbuffer;
15 | uniform sampler2D texture1;
16 | uniform sampler2D texture2;
17 | 
18 | void main() {
19 |     color = texture(frame, v_tex_coords);
20 |     vec4 data = texture(gbuffer, v_tex_coords);
21 | 
22 |     float fresnel = data.b;
23 |     vec4 white = vec4(1.0f, 1.0f, 1.0f, 1.0f);
24 | 
25 |     // The alpha channel contains the texture index. Texture index 0 indicates
26 |     // that the texture is not used, so the pixel is already correct. For the
27 |     // other textures, sample them and blend according to the Fresnel factor.
28 | 
29 |     if (data.a == 1.0f / 255.0f) {
30 |         vec4 tex_color = texture(texture1, data.xy);
31 |         vec4 surface_color = white * fresnel + tex_color * (1.0f - fresnel);
32 |         color = color * surface_color;
33 |     }
34 | 
35 |     if (data.a == 2.0f / 255.0f) {
36 |         vec4 tex_color = texture(texture2, data.xy);
37 |         vec4 surface_color = white * fresnel + tex_color * (1.0f - fresnel);
38 |         color = color * surface_color;
39 |     }
40 | 
41 |     // Texture index 3 is currently not used.
42 | }
43 | 


--------------------------------------------------------------------------------
/src/gpu/id.glsl:
--------------------------------------------------------------------------------
 1 | // Convector -- An interactive CPU path tracer
 2 | // Copyright 2016 Ruud van Asseldonk
 3 | 
 4 | // This program is free software: you can redistribute it and/or modify
 5 | // it under the terms of the GNU General Public License version 3. A copy
 6 | // of the License is available in the root of the repository.
 7 | 
 8 | #version 140
 9 | 
10 | in vec2 v_tex_coords;
11 | out vec4 color;
12 | 
13 | uniform sampler2D frame;
14 | 
15 | void main() {
16 |     color = texture(frame, v_tex_coords);
17 | }
18 | 


--------------------------------------------------------------------------------
/src/gpu/median.glsl:
--------------------------------------------------------------------------------
 1 | // Convector -- An interactive CPU path tracer
 2 | // Copyright 2016 Ruud van Asseldonk
 3 | 
 4 | // This program is free software: you can redistribute it and/or modify
 5 | // it under the terms of the GNU General Public License version 3. A copy
 6 | // of the License is available in the root of the repository.
 7 | 
 8 | #version 140
 9 | 
10 | in vec2 v_tex_coords;
11 | out vec4 color;
12 | 
13 | uniform sampler2D frame;
14 | uniform vec2 pixel_size;
15 | 
16 | void sort2(inout vec4 a0, inout vec4 a1) {
17 |     vec4 b0 = min(a0, a1);
18 |     vec4 b1 = max(a0, a1);
19 |     a0 = b0;
20 |     a1 = b1;
21 | }
22 | 
23 | void sort(inout vec4 a0, inout vec4 a1, inout vec4 a2, inout vec4 a3, inout vec4 a4) {
24 |     sort2(a0, a1);
25 |     sort2(a3, a4);
26 |     sort2(a0, a2);
27 |     sort2(a1, a2);
28 |     sort2(a0, a3);
29 |     sort2(a2, a3);
30 |     sort2(a1, a4);
31 |     sort2(a1, a2);
32 |     sort2(a3, a4);
33 | }
34 | 
35 | vec4 rgb_to_xyz(vec4 c) {
36 |     mat3 conv = mat3(0.49f, 0.17697f, 0.0f,
37 |                      0.31f, 0.81240f, 0.01f,
38 |                      0.20f, 0.01063f, 0.99f) * (1.0f / 0.17697f);
39 |     c.xyz = conv * c.rgb;
40 |     return c;
41 | }
42 | 
43 | vec4 xyz_to_rgb(vec4 c) {
44 |     mat3 conv = mat3(0.41847f, -0.091169f, 0.00092090f,
45 |                      -0.15866f, 0.25243f, -0.0025498f,
46 |                      -0.082835, 0.015708, 0.17860);
47 |     c.rgb = conv * c.xyz;
48 |     return c;
49 | }
50 | 
51 | void main() {
52 |     // Sample 5 pixels in a "+" shape.
53 |     vec4 c0 = texture(frame, v_tex_coords);
54 |     vec4 c1 = texture(frame, v_tex_coords + vec2(pixel_size.x, 0.0f));
55 |     vec4 c2 = texture(frame, v_tex_coords + vec2(0.0f, pixel_size.y));
56 |     vec4 c3 = texture(frame, v_tex_coords - vec2(pixel_size.x, 0.0f));
57 |     vec4 c4 = texture(frame, v_tex_coords - vec2(0.0f, pixel_size.y));
58 | 
59 |     // Convert all the pixels from CIE 1931 to the CIE XYZ color space before
60 |     // taking the median. This ensures that lightness is better preserved.
61 |     c0 = rgb_to_xyz(c0);
62 |     c1 = rgb_to_xyz(c1);
63 |     c2 = rgb_to_xyz(c2);
64 |     c3 = rgb_to_xyz(c3);
65 |     c4 = rgb_to_xyz(c4);
66 | 
67 |     // Take the sort-of-median of those pixels. The true median is c2, but do
68 |     // weigh in a bit of the other pixels as well for a more balanced result.
69 |     sort(c0, c1, c2, c3, c4);
70 |     vec4 median = c2 * 0.667f + c1 * 0.1666f + c3 * 0.1666f;
71 | 
72 |     // Convert back from CIE XYZ to CIE 1931 (which is a linear RGB color
73 |     // space).
74 |     color = xyz_to_rgb(median);
75 | }
76 | 


--------------------------------------------------------------------------------
/src/gpu/vertex.glsl:
--------------------------------------------------------------------------------
 1 | // Convector -- An interactive CPU path tracer
 2 | // Copyright 2016 Ruud van Asseldonk
 3 | 
 4 | // This program is free software: you can redistribute it and/or modify
 5 | // it under the terms of the GNU General Public License version 3. A copy
 6 | // of the License is available in the root of the repository.
 7 | 
 8 | #version 140
 9 | 
10 | in vec2 position;
11 | in vec2 tex_coords;
12 | out vec2 v_tex_coords;
13 | 
14 | void main() {
15 |     gl_Position = vec4(position, 0.0, 1.0);
16 |     v_tex_coords = tex_coords;
17 | }
18 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
  1 | // Convector -- An interactive CPU path tracer
  2 | // Copyright 2016 Ruud van Asseldonk
  3 | 
  4 | // This program is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License version 3. A copy
  6 | // of the License is available in the root of the repository.
  7 | 
  8 | //! An interactive path tracer.
  9 | 
 10 | #![allow(dead_code)] // TODO: Remove at some point.
 11 | 
 12 | #![feature(alloc, cfg_target_feature, heap_api, platform_intrinsics, repr_simd, test)]
 13 | 
 14 | extern crate alloc;
 15 | extern crate filebuffer;
 16 | extern crate imagefmt;
 17 | extern crate num_cpus;
 18 | extern crate rand;
 19 | extern crate rayon;
 20 | extern crate scoped_threadpool;
 21 | extern crate test;
 22 | extern crate thread_id;
 23 | extern crate time;
 24 | 
 25 | #[macro_use]
 26 | extern crate glium;
 27 | 
 28 | mod aabb;
 29 | mod bvh;
 30 | mod material;
 31 | mod quaternion;
 32 | mod random;
 33 | mod ray;
 34 | mod renderer;
 35 | mod scene;
 36 | mod simd;
 37 | mod stats;
 38 | mod trace;
 39 | mod triangle;
 40 | mod ui;
 41 | mod util;
 42 | mod vector3;
 43 | mod wavefront;
 44 | 
 45 | #[cfg(test)]
 46 | mod bench;
 47 | 
 48 | use material::SMaterial;
 49 | use renderer::{RenderBuffer, Renderer};
 50 | use scene::Scene;
 51 | use stats::GlobalStats;
 52 | use std::collections::HashMap;
 53 | use std::mem;
 54 | use time::PreciseTime;
 55 | use ui::{Action, Window};
 56 | use wavefront::Mesh;
 57 | 
 58 | fn load_textures() -> Vec<Vec<u8>> {
 59 |     use imagefmt::ColFmt;
 60 | 
 61 |     println!("loading textures");
 62 |     let tex_floor = imagefmt::read("textures/floor.jpg", ColFmt::RGB);
 63 |     let tex_wood = imagefmt::read("textures/wood_light.jpg", ColFmt::RGB);
 64 |     let mut textures = Vec::with_capacity(2);
 65 |     textures.push(tex_floor.expect("failed to read floor.jpeg").buf);
 66 |     textures.push(tex_wood.expect("failed to read wood_light.jpg").buf);
 67 |     textures
 68 | }
 69 | 
 70 | fn build_scene() -> Scene {
 71 |     println!("loading geometry");
 72 |     let mut materials = HashMap::new();
 73 |     materials.insert("baseboard", SMaterial::white().with_glossiness(4));
 74 |     materials.insert("ceiling", SMaterial::white().with_glossiness(1));
 75 |     materials.insert("fauteuil", SMaterial::diffuse(1.0, 0.1, 0.4));
 76 |     materials.insert("floor", SMaterial::diffuse(0.569, 0.494, 0.345).with_glossiness(4).with_texture(1));
 77 |     materials.insert("glass", SMaterial::sky());
 78 |     materials.insert("wall", SMaterial::diffuse(0.65, 0.7, 0.9).with_glossiness(1));
 79 |     materials.insert("wood_light", SMaterial::diffuse(0.6, 0.533, 0.455).with_glossiness(3).with_texture(2));
 80 |     let indoor = Mesh::load_with_materials("models/indoor.obj", &materials);
 81 |     let meshes = [indoor];
 82 | 
 83 |     println!("building bvh");
 84 |     let scene = Scene::from_meshes(&meshes);
 85 |     scene.print_stats();
 86 | 
 87 |     scene
 88 | }
 89 | 
 90 | fn main() {
 91 |     // The patch size has been tuned for 8 cores. With a resolution of 1280x736 there are 920
 92 |     // patches to be rendered by the worker pool. Increasing the patch size to 64 results in 230
 93 |     // patches, but some patches are very heavy to render and some are practically a no-op, so all
 94 |     // threads might stall because one thread did not yet finish the frame. A patch width of 32 is
 95 |     // a good balance between throughput and latency.
 96 |     let width = 1280;
 97 |     let height = 736;
 98 |     let patch_width = 32;
 99 | 
100 |     let mut window = Window::new(width, height, "Convector interactive path tracer");
101 |     let mut renderer = Renderer::new(build_scene(), width, height);
102 |     let mut stats = GlobalStats::new();
103 |     let mut trace_log = trace::TraceLog::with_limit(6 * 1024);
104 |     let mut threadpool = scoped_threadpool::Pool::new(num_cpus::get() as u32);
105 |     let mut backbuffer = RenderBuffer::new(width, height);
106 |     let mut backbuffer_g = RenderBuffer::new(width, height);
107 |     let mut f32_buffer = renderer.new_buffer_f32(); // TODO: Consistency.
108 |     let mut f32_buffer_samples = 0;
109 |     let mut should_continue = true;
110 |     let mut render_realtime = true;
111 | 
112 |     for texture in load_textures() {
113 |         window.upload_texture(texture);
114 |     }
115 | 
116 |     backbuffer.fill_black();
117 |     let epoch = PreciseTime::now();
118 | 
119 |     // Insert one fake value so we have an initial guess for the time delta.
120 |     stats.frame_us.insert(16_667);
121 | 
122 |     println!("scene and renderer initialized, entering render loop");
123 | 
124 |     while should_continue {
125 |         let frame_number = trace_log.inc_frame_number();
126 |         let stw_frame = trace_log.scoped("render_frame", 0);
127 | 
128 |         let time = epoch.to(PreciseTime::now()).num_milliseconds() as f32 * 1e-3;
129 |         let time_delta = (stats.frame_us.median() as f32) * 1e-6;
130 | 
131 |         match window.handle_events() {
132 |             Action::DumpTrace => {
133 |                 trace_log.export_to_file("trace.json").expect("failed to write trace");
134 |                 println!("wrote trace to trace.json");
135 |             }
136 |             Action::Quit => should_continue = false,
137 |             Action::PrintStats => stats.print(),
138 |             Action::ToggleDebugView => renderer.toggle_debug_view(),
139 |             Action::ToggleRealtime => {
140 |                 render_realtime = !render_realtime;
141 |                 f32_buffer = renderer.new_buffer_f32();
142 |                 f32_buffer_samples = 0;
143 |                 // In accumulative mode the time is fixed and there is no motion
144 |                 // blur.
145 |                 renderer.set_time(time, 0.0);
146 |             }
147 |             Action::None => {}
148 |         }
149 | 
150 |         if render_realtime {
151 |             renderer.set_time(time, time_delta);
152 |         }
153 |         renderer.update_scene();
154 | 
155 |         // When rendering in accumulation mode, first copy the current state
156 |         // into the backbuffer (which will immediately after this become the new
157 |         // front buffer) so we can display it later.
158 |         if !render_realtime {
159 |             let n = if f32_buffer_samples > 0 { f32_buffer_samples } else { 1 };
160 |             renderer.buffer_f32_into_render_buffer(&f32_buffer, &mut backbuffer, n);
161 |             f32_buffer_samples += 1;
162 |         }
163 | 
164 |         let new_backbuffer = RenderBuffer::new(width, height);
165 |         let new_backbuffer_g = RenderBuffer::new(width, height);
166 |         let frontbuffer = mem::replace(&mut backbuffer, new_backbuffer);
167 |         let frontbuffer_g = mem::replace(&mut backbuffer_g, new_backbuffer_g);
168 |         let renderer_ref = &renderer;
169 |         let trace_log_ref = &trace_log;
170 |         let backbuffer_ref = &backbuffer;
171 |         let backbuffer_g_ref = &backbuffer_g;
172 |         let f32_buffer_ref = &f32_buffer[..];
173 | 
174 |         threadpool.scoped(|scope| {
175 | 
176 |             let w = width / patch_width;
177 |             let h = height / patch_width;
178 | 
179 |             // Queue tasks for the worker threads to render patches.
180 |             for i in 0..w {
181 |                 for j in 0..h {
182 |                     scope.execute(move || {
183 |                         let x = i * patch_width;
184 |                         let y = j * patch_width;
185 | 
186 |                         // Multiple threads mutably borrow the buffer below,
187 |                         // which could cause races, but all of the patches are
188 |                         // disjoint, hence it is safe.
189 | 
190 |                         if render_realtime {
191 |                             let _stw = trace_log_ref.scoped("render_patch_u8", j * w + i);
192 |                             let bitmap = unsafe { backbuffer_ref.get_mut_slice() };
193 |                             let gbuffer = unsafe { backbuffer_g_ref.get_mut_slice() };
194 |                             renderer_ref.render_patch_u8(bitmap, gbuffer, patch_width, x, y, frame_number);
195 |                         } else {
196 |                             let _stw = trace_log_ref.scoped("accumulate_patch_f32", j * w + i);
197 |                             let buffer = unsafe { util::make_mutable(f32_buffer_ref) };
198 |                             let gbuffer = unsafe { backbuffer_g_ref.get_mut_slice() };
199 |                             renderer_ref.accumulate_patch_f32(buffer, gbuffer, patch_width, x, y, frame_number);
200 |                         }
201 |                     });
202 |                 }
203 |             }
204 | 
205 |             // In the mean time upload the previous frame to the GPU
206 |             // and display it.
207 |             let _stw_display = trace_log.scoped("display_buffer", 0);
208 |             window.display_buffer(frontbuffer.into_bitmap(),
209 |                                   frontbuffer_g.into_bitmap(),
210 |                                   &mut stats);
211 | 
212 |             // The scope automatically waits for all tasks to complete
213 |             // before the loop continues.
214 |         });
215 | 
216 |         stats.frame_us.insert_time_us(stw_frame.take_duration());
217 |     }
218 | }
219 | 


--------------------------------------------------------------------------------
/src/quaternion.rs:
--------------------------------------------------------------------------------
  1 | // Convector -- An interactive CPU path tracer
  2 | // Copyright 2016 Ruud van Asseldonk
  3 | 
  4 | // This program is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License version 3. A copy
  6 | // of the License is available in the root of the repository.
  7 | 
  8 | //! Implements quaternion utilities to handle rotation.
  9 | 
 10 | use simd::Mf32;
 11 | use vector3::MVector3;
 12 | 
 13 | #[cfg(test)]
 14 | use {bench, test};
 15 | 
 16 | #[derive(Copy, Clone, Debug)]
 17 | pub struct SQuaternion {
 18 |     pub a: f32,
 19 |     pub b: f32,
 20 |     pub c: f32,
 21 |     pub d: f32,
 22 | }
 23 | 
 24 | pub struct MQuaternion {
 25 |     pub a: Mf32,
 26 |     pub b: Mf32,
 27 |     pub c: Mf32,
 28 |     pub d: Mf32,
 29 | }
 30 | 
 31 | impl SQuaternion {
 32 |     pub fn new(a: f32, b: f32, c: f32, d: f32) -> SQuaternion {
 33 |         SQuaternion {
 34 |             a: a,
 35 |             b: b,
 36 |             c: c,
 37 |             d: d,
 38 |         }
 39 |     }
 40 | }
 41 | 
 42 | impl MQuaternion {
 43 |     pub fn new(a: Mf32, b: Mf32, c: Mf32, d: Mf32) -> MQuaternion {
 44 |         MQuaternion {
 45 |             a: a,
 46 |             b: b,
 47 |             c: c,
 48 |             d: d,
 49 |         }
 50 |     }
 51 | 
 52 |     pub fn broadcast(q: SQuaternion) -> MQuaternion {
 53 |         MQuaternion {
 54 |             a: Mf32::broadcast(q.a),
 55 |             b: Mf32::broadcast(q.b),
 56 |             c: Mf32::broadcast(q.c),
 57 |             d: Mf32::broadcast(q.d),
 58 |         }
 59 |     }
 60 | 
 61 |     /// Interpolates two quaternions and normalizes the result.
 62 |     pub fn interpolate(&self, delta: &MQuaternion, t: Mf32) -> MQuaternion {
 63 |         // The hypersphere of unit quaternions forms a double cover of SO3(R).
 64 |         // Every rotation is represented by two antipodal points on the
 65 |         // hypersphere. If we naively run over the arc subtended by the two
 66 |         // quaternions, then we could make an arc of more than pi/2 radians, but
 67 |         // that means that we could make a shorter arc by taking the antipodal
 68 |         // point of one of the quaternions. The shortest arc corresponds to the
 69 |         // interpolation we want, the longer arc rotates too much. So for
 70 |         // correct interpolation, compute the dot product of the two
 71 |         // quaternions, and if it is negative, negate one of the two.
 72 |         // Fortunately, in my demo I get to pick the quaternions, so I can
 73 |         // choose them so they get interpolated correctly, and there is no need
 74 |         // to negate anything.
 75 | 
 76 |         // Interpolate linearly between the two quaternions, and then project
 77 |         // the result onto the unit hypersphere. This is not entirely correct
 78 |         // because the rotation will not have a constant angular velocity. For a
 79 |         // proper interpolation with constant velocity, a spherical linear
 80 |         // interpolation is required, but that is expensive to compute. (It
 81 |         // involves an inverse cosine, two sines and two divisions.) For small
 82 |         // angles the error is very small, so do the fast thing here.
 83 |         let a = delta.a.mul_add(t, self.a);
 84 |         let b = delta.b.mul_add(t, self.b);
 85 |         let c = delta.c.mul_add(t, self.c);
 86 |         let d = delta.d.mul_add(t, self.d);
 87 | 
 88 |         let norm_squared = a.mul_add(a, b * b) + c.mul_add(c, d * d);
 89 | 
 90 |         // Using a full square root and division here makes this method about
 91 |         // 17% slower in comparison to using an `rsqrt()`. However, this is also
 92 |         // more accurate. The `rsqrt()` approach has a relatively big error, and
 93 |         // as this code is used to generate camera rays, it had better be
 94 |         // accurate. If after a few bounces the ray direction norm is 1.01, then
 95 |         // that will result in wrong intersection tests, but the difference is
 96 |         // probably not noticeable due to randomness anyway. However, the first
 97 |         // intersection should be correct, otherwise the geometry gets
 98 |         // distorted. Therefore the camera rays must be accurate.
 99 |         let rnorm = Mf32::one() / norm_squared.sqrt();
100 | 
101 |         MQuaternion {
102 |             a: a * rnorm,
103 |             b: b * rnorm,
104 |             c: c * rnorm,
105 |             d: d * rnorm,
106 |         }
107 |     }
108 | }
109 | 
110 | pub fn rotate(vector: &MVector3, rotation: &MQuaternion) -> MVector3 {
111 |     let v = vector;
112 |     let q = rotation;
113 | 
114 |     // For a unit quaternion q and a vector in R3 identified with the subspace
115 |     // of the quaternion algebra spanned by (i, j, k), the rotated vector is
116 |     // given by q * v * q^-1. (And because q is a unit quaternion, its inverse
117 |     // is its conjugate.) This means that we can compute the rotation in two
118 |     // steps: p = v * q^-1, and q * p. The first step is simpler than generic
119 |     // quaternion multiplication because we know that v is pure imaginary. The
120 |     // second step simpler than generic quaternion multiplication because we know
121 |     // that the result is pure imaginary, so the real component does not have to
122 |     // be computed.
123 | 
124 |     // For q = a + b*i + c*j + d*k and v = x*i + y*j + c*z, v * q^-1 is given
125 |     // by
126 |     //
127 |     //     b*x + c*y + d*z +
128 |     //     ((a - b)*x + (c - d)*(y + z) + b*x - c*y + d*z)*i +
129 |     //     (d*x + a*y - b*z)*j +
130 |     //     (-(c + d)*x + (a + b)*(y + z) + d*x - a*y - b*z)*k
131 |     //
132 |     // I did not bother with using `mul_add` or eliminating common
133 |     // subexpressions below because the code is unreadable enough as it is ...
134 | 
135 |     let pa = q.b * v.x + q.c * v.y + q.d * v.z;
136 |     let pb = q.b * v.x - q.c * v.y + q.d * v.z + (q.a - q.b) * v.x + (q.c - q.d) * (v.y + v.z);
137 |     let pc = q.d * v.x + q.a * v.y - q.b * v.z;
138 |     let pd = q.d * v.x - q.a * v.y - q.b * v.z - (q.c + q.d) * v.x + (q.a + q.b) * (v.y + v.z);
139 | 
140 |     // The product of q = qa + qb*i + qc*j + qd*k and
141 |     // p = pa + pb*i + pc*j + pd*k is given by
142 |     //
143 |     //    pa*qa - pb*qb - pc*qc - pd*qd +
144 |     //    ((pa + pb)*(qa + qb) - (pc - pd)*(qc + qd) - pa*qa - pb*qb + pc*qc - pd*qd)*i +
145 |     //    (pc*qa - pd*qb + pa*qc + pb*qd)*j +
146 |     //    ((pc + pd)*(qa + qb) + (pa - pb)*(qc + qd) - pc*qa - pd*qb - pa*qc + pb*qd)*k
147 | 
148 |     let rb = (pa + pb) * (q.a + q.b) - (pc - pd) * (q.c + q.d) - pa * q.a - pb * q.b + pc * q.c - pd * q.d;
149 |     let rc = pc * q.a - pd * q.b + pa * q.c + pb * q.d;
150 |     let rd = (pc + pd) * (q.a + q.b) + (pa - pb) * (q.c + q.d) - pc * q.a - pd * q.b - pa * q.c + pb * q.d;
151 | 
152 |     MVector3::new(rb, rc, rd)
153 | }
154 | 
155 | #[cfg(test)]
156 | fn assert_mvectors_equal(expected: MVector3, computed: MVector3, margin: f32) {
157 |     // Test that the vectors are equal, to within floating point inaccuracy
158 |     // margins.
159 |     let error = (computed - expected).norm_squared();
160 |     assert!((Mf32::broadcast(margin * margin) - error).all_sign_bits_positive(),
161 |             "expected: ({}, {}, {}), computed: ({}, {}, {})",
162 |             expected.x.0, expected.y.0, expected.z.0,
163 |             computed.x.0, computed.y.0, computed.z.0);
164 | }
165 | 
166 | #[test]
167 | fn rotate_identity() {
168 |     let identity = SQuaternion::new(1.0, 0.0, 0.0, 0.0);
169 |     let vectors = bench::mvectors_on_unit_sphere(32);
170 |     for v in &vectors {
171 |         assert_mvectors_equal(*v, rotate(v, &MQuaternion::broadcast(identity)), 1e-7);
172 |     }
173 | }
174 | 
175 | #[test]
176 | fn rotate_x() {
177 |     let half_sqrt_2 = 0.5 * 2.0_f32.sqrt();
178 |     let rotation = SQuaternion::new(half_sqrt_2, half_sqrt_2, 0.0, 0.0);
179 |     let vectors = bench::mvectors_on_unit_sphere(32);
180 |     for v in &vectors {
181 |         // Rotate the vector by pi/2 radians around the x-axis. This is
182 |         // equivalent to y <- -z, z <- y, so compute the rotation in two
183 |         // different ways, and verify that the result is the same to within the
184 |         // floating point inaccuracy margin.
185 |         let computed = rotate(v, &MQuaternion::broadcast(rotation));
186 |         let expected = MVector3::new(v.x, -v.z, v.y);
187 |         assert_mvectors_equal(expected, computed, 1e-6);
188 |     }
189 | }
190 | 
191 | #[test]
192 | fn rotate_y() {
193 |     let half_sqrt_2 = 0.5 * 2.0_f32.sqrt();
194 |     let rotation = SQuaternion::new(half_sqrt_2, 0.0, half_sqrt_2, 0.0);
195 |     let vectors = bench::mvectors_on_unit_sphere(32);
196 |     for v in &vectors {
197 |         // Rotate the vector by pi/2 radians around the y-axis. This is
198 |         // equivalent to x <- z, z <- -x, so compute the rotation in two
199 |         // different ways, and verify that the result is the same to within the
200 |         // floating point inaccuracy margin.
201 |         let computed = rotate(v, &MQuaternion::broadcast(rotation));
202 |         let expected = MVector3::new(v.z, v.y, -v.x);
203 |         assert_mvectors_equal(expected, computed, 1e-6);
204 |     }
205 | }
206 | 
207 | #[test]
208 | fn rotate_z() {
209 |     let half_sqrt_2 = 0.5 * 2.0_f32.sqrt();
210 |     let rotation = SQuaternion::new(half_sqrt_2, 0.0, 0.0, half_sqrt_2);
211 |     let vectors = bench::mvectors_on_unit_sphere(32);
212 |     for v in &vectors {
213 |         // Rotate the vector by pi/2 radians around the y-axis. This is
214 |         // equivalent to y <- x, x <- -y, so compute the rotation in two
215 |         // different ways, and verify that the result is the same to within the
216 |         // floating point inaccuracy margin.
217 |         let computed = rotate(v, &MQuaternion::broadcast(rotation));
218 |         let expected = MVector3::new(-v.y, v.x, v.z);
219 |         assert_mvectors_equal(expected, computed, 1e-6);
220 |     }
221 | }
222 | 
223 | #[test]
224 | fn interpolate() {
225 |     use vector3::SVector3;
226 |     let half_sqrt_2 = 0.5 * 2.0_f32.sqrt();
227 |     let identity = MQuaternion::broadcast(SQuaternion::new(1.0, 0.0, 0.0, 0.0));
228 |     let rotate_z_delta = MQuaternion::broadcast(SQuaternion::new(half_sqrt_2 - 1.0, 0.0, 0.0, half_sqrt_2));
229 |     let rotation = identity.interpolate(&rotate_z_delta, Mf32::broadcast(0.5));
230 |     let v = MVector3::broadcast(SVector3::new(1.0, 0.0, 0.0));
231 |     let expected = MVector3::broadcast(SVector3::new(half_sqrt_2, half_sqrt_2, 0.0));
232 |     let computed = rotate(&v, &rotation);
233 |     assert_mvectors_equal(expected, computed, 1e-6);
234 | }
235 | 
236 | macro_rules! unroll_10 {
237 |     { $x: block } => {
238 |         $x $x $x $x $x $x $x $x $x $x
239 |     }
240 | }
241 | 
242 | #[bench]
243 | fn bench_rotate_1000(b: &mut test::Bencher) {
244 |     let vectors = bench::mvectors_on_unit_sphere(4096 / 8);
245 |     let quaternions = bench::unit_mquaternions(4096 / 8);
246 |     let mut it = vectors.iter().cycle().zip(quaternions.iter().cycle());
247 |     b.iter(|| {
248 |         let (v, q) = it.next().unwrap();
249 |         for _ in 0..100 {
250 |             unroll_10! {{
251 |                 test::black_box(rotate(test::black_box(v), test::black_box(q)));
252 |             }};
253 |         }
254 |     });
255 | }
256 | 
257 | #[bench]
258 | fn bench_interpolate_1000(b: &mut test::Bencher) {
259 |     let q0s = bench::unit_mquaternions(4096 / 8);
260 |     let q1s = bench::unit_mquaternions(4096 / 8);
261 |     let ts = bench::mf32_unit(4096 / 8);
262 |     let mut it = q0s.iter().cycle().zip(q1s.iter().cycle()).zip(ts.iter().cycle());
263 |     b.iter(|| {
264 |         let ((q0, q1), &t) = it.next().unwrap();
265 |         for _ in 0..100 {
266 |             unroll_10! {{
267 |                 test::black_box(
268 |                     test::black_box(q0)
269 |                     .interpolate(test::black_box(q1), test::black_box(t)));
270 |             }};
271 |         }
272 |     });
273 | }
274 | 


--------------------------------------------------------------------------------
/src/random.rs:
--------------------------------------------------------------------------------
  1 | // Convector -- An interactive CPU path tracer
  2 | // Copyright 2016 Ruud van Asseldonk
  3 | 
  4 | // This program is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License version 3. A copy
  6 | // of the License is available in the root of the repository.
  7 | 
  8 | //! Functions for generating random numbers fast.
  9 | //!
 10 | //! To do Monte Carlo integration you need random numbers. Lots of them, but not
 11 | //! necessarily high-quality random numbers. Not online casino or cryptography-
 12 | //! grade random numbers. So it is possible to do a lot better than conventional
 13 | //! RNGs.
 14 | 
 15 | use simd::{Mf32, Mi32, Mu64};
 16 | use std::f32::consts;
 17 | use std::i32;
 18 | use vector3::MVector3;
 19 | 
 20 | #[cfg(test)]
 21 | use test;
 22 | 
 23 | // A theorem that is used intensively in this file: if n and m are coprime, then
 24 | // the map x -> n * x is a bijection of Z/mZ. In practice m is a power of two
 25 | // (2^64 in this case), so anything not divisible by two will do for n, but we
 26 | // might as well take a prime.
 27 | //
 28 | // With that you can build a simple and fast hash function for integers:
 29 | // multiply with a number coprime to 2. On a computer you get the "modulo a
 30 | // power of two" for free. For more details on why this works pretty well,
 31 | // Knuth has an entire section devoted to it in Volume 3 of TAOCP.
 32 | 
 33 | pub struct Rng {
 34 |     state: Mu64,
 35 | }
 36 | 
 37 | impl Rng {
 38 |     /// Creates a new random number generator.
 39 |     ///
 40 |     /// The generator is seeded from three 32-bit integers, suggestively called
 41 |     /// x, y, and i (for frame number). These three values are hashed together,
 42 |     /// and that is used as the seed.
 43 |     pub fn with_seed(x: u32, y: u32, i: u32) -> Rng {
 44 |         // The constants here are all primes. It is important that the four
 45 |         // values in the final multiplication are distinct, otherwise the
 46 |         // sequences will produce the same values. Also, the primes should not
 47 |         // be close together, otherwise correlations will be apparent. The
 48 |         // values `x`, `y`, and `i` are hashed with different functions to
 49 |         // ensure that a permutation of (x, y, i) results in a different seed,
 50 |         // otherwise patterns would appear because the range of x and y is
 51 |         // similar.
 52 |         let a = (x as u64).wrapping_mul(12276630456901467871);
 53 |         let b = (y as u64).wrapping_mul(7661526868048087387);
 54 |         let c = (i as u64).wrapping_mul(2268244495640532043);
 55 |         let seed = a.wrapping_add(b).wrapping_add(c);
 56 | 
 57 |         // If I only use the above scheme, the seed has a severe bias modulo
 58 |         // small powers of two. (For instance, x and y are always multiples of
 59 |         // 16 and 4, so modulo 8, a + b is always 0 or 4.) To avoid this, take
 60 |         // the seed modulo a prime. This removes the correlation modulo small
 61 |         // powers of two.
 62 |         let seed = seed.wrapping_add(seed % 9358246936573323101);
 63 | 
 64 |         let primes = Mu64(14491630826648200009,
 65 |                           13149596372461506851,
 66 |                           6119410235796056053,
 67 |                           14990141545859273719);
 68 | 
 69 |         Rng { state: Mu64(seed, seed, seed, seed) * primes }
 70 |     }
 71 | 
 72 |     /// Updates the state and returns the old state.
 73 |     fn next(&mut self) -> Mu64 {
 74 |         let old_state = self.state;
 75 | 
 76 |         // Again, this is really nothing more than iteratively hashing the
 77 |         // state. It is faster than e.g. xorshift, and the quality of the
 78 |         // random numbers is still good enough. To demonstrate that it is
 79 |         // sufficient that the factor is coprime to 2 I picked a composite
 80 |         // number here. Try multiplying it by two and observe how the state
 81 |         // reaches 0 after a few iterations.
 82 | 
 83 |         let f1 = 3 * 1073243692214514217;
 84 |         let f2 = 5 * 3335100457702756523;
 85 |         let f3 = 7 * 8789056573444181;
 86 |         let f4 = 11 * 781436371140792079;
 87 |         self.state = self.state * Mu64(f1, f2, f3, f4);
 88 | 
 89 |         old_state
 90 |     }
 91 | 
 92 |     /// Returns 8 random 32-bit integers.
 93 |     ///
 94 |     /// Note: a sequence of generated numbers is not random modulo small
 95 |     /// composite numbers. Take the high order bits of this random number to
 96 |     /// avoid bias and correlations.
 97 |     pub fn sample_u32(&mut self) -> [u32; 8] {
 98 |         use std::mem::transmute_copy;
 99 |         // Note: using a `transmute` instead of `transmute_copy` can cause a
100 |         // segmentation fault. See https://github.com/rust-lang/rust/issues/32947.
101 |         unsafe { transmute_copy(&self.next()) }
102 |     }
103 | 
104 |     /// Returns 8 random numbers distributed uniformly over the half-open
105 |     /// interval [0, 1).
106 |     pub fn sample_unit(&mut self) -> Mf32 {
107 |         use std::mem::transmute;
108 | 
109 |         let mi32: Mi32 = unsafe { transmute(self.next()) };
110 |         let range = Mf32::broadcast(0.5 / i32::MIN as f32);
111 |         let half = Mf32::broadcast(0.5);
112 | 
113 |         mi32.into_mf32().mul_add(range, half)
114 |     }
115 | 
116 |     /// Returns 8 random numbers distributed uniformly over the half-open
117 |     /// interval [-1, 1).
118 |     pub fn sample_biunit(&mut self) -> Mf32 {
119 |         use std::mem::transmute;
120 | 
121 |         let mi32: Mi32 = unsafe { transmute(self.next()) };
122 |         let range = Mf32::broadcast(1.0 / i32::MIN as f32);
123 | 
124 |         mi32.into_mf32() * range
125 |     }
126 | 
127 |     /// Returns 8 random numbers distributed uniformly over the half-open
128 |     /// interval [-pi, pi).
129 |     pub fn sample_angle(&mut self) -> Mf32 {
130 |         use std::mem::transmute;
131 | 
132 |         let mi32: Mi32 = unsafe { transmute(self.next()) };
133 |         let range = Mf32::broadcast(consts::PI / i32::MIN as f32);
134 | 
135 |         mi32.into_mf32() * range
136 |     }
137 | 
138 |     /// Returns a random unit vector in the hemisphere around the positive
139 |     /// z-axis, drawn from a cosine-weighted distribution.
140 |     pub fn sample_hemisphere_vector(&mut self) -> MVector3 {
141 |         let phi = self.sample_angle();
142 |         let r_sqr = self.sample_unit();
143 | 
144 |         // Instead of the full square root, we could also do a fast inverse
145 |         // square root approximation and a reciprocal approximation. It is less
146 |         // precise, but according to the Intel intrinsics guide, that would take
147 |         // 14 cycles instead of 21. However, we need to compute the polynomials
148 |         // for sin and cos anyway and that takes time, so it is not a problem to
149 |         // take the slow but precise square root: by the time we need it, plenty
150 |         // of cycles will have passed. Pipelining to the rescue here.
151 |         let r = r_sqr.sqrt();
152 |         let x = phi.sin() * r;
153 |         let y = phi.cos() * r; // TODO: cos is a bottleneck, do I need the precision?
154 |         let z = (Mf32::one() - r_sqr).sqrt();
155 | 
156 |         // TODO: Perhaps it would be faster to use a less precise sin and cos,
157 |         // but normalize the vector in the end?
158 |         MVector3::new(x, y, z)
159 |     }
160 | 
161 |     /// Returns a random unit vector in the hemisphere around the positive
162 |     /// z-axis, drawn from a cosine-weighted distribution.
163 |     ///
164 |     /// This method uses a different sampling method than
165 |     /// `sample_hemisphere_vector`. Benchmarks show that it is not faster, and
166 |     /// with a small probability this function returns a wrong result too, so it
167 |     /// should not be used at all. It is kept here for comparison purposes.
168 |     fn sample_hemisphere_vector_reject(&mut self) -> MVector3 {
169 |         // This function uses rejection sampling without branching: sample two
170 |         // points in a square, and if the second one is not inside a circle,
171 |         // take the first one instead. The probability that both points do not
172 |         // lie in a circle is (1 - pi/4)^2, about 4.6%. To reduce that
173 |         // probability further you can take more samples.
174 |         let x0 = self.sample_biunit();
175 |         let y0 = self.sample_biunit();
176 |         let r0 = x0.mul_add(x0, y0 * y0);
177 | 
178 |         let x1 = self.sample_biunit();
179 |         let y1 = self.sample_biunit();
180 |         let r1 = x1.mul_add(x1, y1 * y1);
181 | 
182 |         // If r1 > 1, then the point lies outside of a unit disk, so the sign
183 |         // bit of this value will be positive, indicating that we should pick
184 |         // point 0 instead of point 1.
185 |         let pick_01 = Mf32::one() - r1;
186 | 
187 |         let x = x0.pick(x1, pick_01);
188 |         let y = y0.pick(y1, pick_01);
189 |         let r = r0.pick(r1, pick_01);
190 | 
191 |         let z = (Mf32::one() - r).sqrt();
192 | 
193 |         MVector3::new(x, y, z)
194 |     }
195 | }
196 | 
197 | #[test]
198 | fn sample_unit_is_in_interval() {
199 |     let mut rng = Rng::with_seed(2, 5, 7);
200 | 
201 |     for _ in 0..4096 {
202 |         let x = rng.sample_unit();
203 |         assert!(x.all_sign_bits_positive(), "{:?} should be >= 0", x);
204 |         assert!((Mf32::one() - x).all_sign_bits_positive(), "{:?} should be <= 1", x);
205 |     }
206 | }
207 | 
208 | #[test]
209 | fn sample_biunit_is_in_interval() {
210 |     let mut rng = Rng::with_seed(2, 5, 7);
211 | 
212 |     for _ in 0..4096 {
213 |         let x = rng.sample_biunit();
214 |         assert!((Mf32::one() + x).all_sign_bits_positive(), "{:?} should be >= -1", x);
215 |         assert!((Mf32::one() - x).all_sign_bits_positive(), "{:?} should be <= 1", x);
216 |     }
217 | }
218 | 
219 | #[test]
220 | fn sample_angle_is_in_interval() {
221 |     let mut rng = Rng::with_seed(2, 5, 7);
222 | 
223 |     for _ in 0..4096 {
224 |         let x = rng.sample_angle();
225 |         assert!((Mf32::broadcast(consts::PI) + x).all_sign_bits_positive(), "{:?} should be >= -pi", x);
226 |         assert!((Mf32::broadcast(consts::PI) - x).all_sign_bits_positive(), "{:?} should be <= pi", x);
227 |     }
228 | }
229 | 
230 | #[test]
231 | fn sample_hemisphere_vector_has_unit_norm() {
232 |     let mut rng = Rng::with_seed(2, 5, 7);
233 | 
234 |     for _ in 0..4096 {
235 |         let v = rng.sample_hemisphere_vector();
236 |         let r = v.norm_squared().sqrt();
237 |         assert!((r - Mf32::broadcast(0.991)).all_sign_bits_positive(), "{:?} should be ~1", r);
238 |         assert!((Mf32::broadcast(1.009) - r).all_sign_bits_positive(), "{:?} should be ~1", r);
239 |     }
240 | }
241 | 
242 | #[test]
243 | fn sample_u32_does_not_cause_sigsegv() {
244 |     use util::generate_slice8;
245 | 
246 |     let mut rng = Rng::with_seed(2, 5, 7);
247 |     let mut x = generate_slice8(|_| 0);
248 | 
249 |     for _ in 0..4096 {
250 |         let y = rng.sample_u32();
251 |         x = generate_slice8(|i| x[i] ^ y[i]);
252 |     }
253 | 
254 |     for i in 0..8 {
255 |         // It could be 0 in theory, but that probability is 1/2^32. Mainly put
256 |         // something here to ensure that nothing is optimized away.
257 |         assert!(x[i] != 0);
258 |     }
259 | }
260 | 
261 | macro_rules! unroll_10 {
262 |     { $x: block } => {
263 |         $x $x $x $x $x $x $x $x $x $x
264 |     }
265 | }
266 | 
267 | #[bench]
268 | fn bench_sample_unit_1000(b: &mut test::Bencher) {
269 |     let mut rng = Rng::with_seed(2, 5, 7);
270 |     b.iter(|| {
271 |         for _ in 0..100 {
272 |             unroll_10! {{
273 |                 test::black_box(rng.sample_unit());
274 |             }};
275 |         }
276 |     });
277 | }
278 | 
279 | #[bench]
280 | fn bench_sample_hemisphere_vector_1000(b: &mut test::Bencher) {
281 |     let mut rng = Rng::with_seed(2, 5, 7);
282 |     b.iter(|| {
283 |         for _ in 0..100 {
284 |             unroll_10! {{
285 |                 test::black_box(rng.sample_hemisphere_vector());
286 |             }};
287 |         }
288 |     });
289 | }
290 | 
291 | #[bench]
292 | fn bench_sample_hemisphere_vector_reject_1000(b: &mut test::Bencher) {
293 |     let mut rng = Rng::with_seed(2, 5, 7);
294 |     b.iter(|| {
295 |         for _ in 0..100 {
296 |             unroll_10! {{
297 |                 test::black_box(rng.sample_hemisphere_vector_reject());
298 |             }};
299 |         }
300 |     });
301 | }
302 | 


--------------------------------------------------------------------------------
/src/ray.rs:
--------------------------------------------------------------------------------
  1 | // Convector -- An interactive CPU path tracer
  2 | // Copyright 2016 Ruud van Asseldonk
  3 | 
  4 | // This program is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License version 3. A copy
  6 | // of the License is available in the root of the repository.
  7 | 
  8 | //! This module implements the ray and related structures.
  9 | 
 10 | use material::MMaterial;
 11 | use simd::{Mask, Mf32};
 12 | use std::ops::Neg;
 13 | use vector3::{MVector3, SVector3};
 14 | 
 15 | #[derive(Clone)]
 16 | pub struct SRay {
 17 |     pub origin: SVector3,
 18 |     pub direction: SVector3,
 19 | }
 20 | 
 21 | #[derive(Clone)]
 22 | pub struct MRay {
 23 |     pub origin: MVector3,
 24 |     pub direction: MVector3,
 25 | 
 26 |     /// A mask that determines which rays are active. If the sign bit is
 27 |     /// positive (bit is 0) then the ray is active. If the sign bit is negative
 28 |     /// (bit is 1) then the ray is inactive.
 29 |     ///
 30 |     /// This convention might seem backwards, but it makes triangle intersection
 31 |     /// more efficient because a negation can be avoided.
 32 |     pub active: Mask,
 33 | }
 34 | 
 35 | pub struct MIntersection {
 36 |     /// The position at which the ray intersected the surface.
 37 |     pub position: MVector3,
 38 | 
 39 |     /// The surface normal at the intersection point.
 40 |     pub normal: MVector3,
 41 | 
 42 |     /// This distance between the ray origin and the position.
 43 |     pub distance: Mf32,
 44 | 
 45 |     /// The material at the intersection surface.
 46 |     pub material: MMaterial,
 47 | 
 48 |     /// Texture coordinates at the intersection point.
 49 |     pub tex_coords: (Mf32, Mf32),
 50 | }
 51 | 
 52 | impl SRay {
 53 |     pub fn new(origin: SVector3, direction: SVector3) -> SRay {
 54 |         SRay {
 55 |             origin: origin,
 56 |             direction: direction,
 57 |         }
 58 |     }
 59 | }
 60 | 
 61 | impl MRay {
 62 |     pub fn new(origin: MVector3, direction: MVector3) -> MRay {
 63 |         MRay {
 64 |             origin: origin,
 65 |             direction: direction,
 66 |             active: Mf32::zero(),
 67 |         }
 68 |     }
 69 | 
 70 |     pub fn broadcast(ray: &SRay) -> MRay {
 71 |         MRay {
 72 |             origin: MVector3::broadcast(ray.origin),
 73 |             direction: MVector3::broadcast(ray.direction),
 74 |             active: Mf32::zero(),
 75 |         }
 76 |     }
 77 | 
 78 |     /// Builds an mray by applying the function to the numbers 0..7.
 79 |     ///
 80 |     /// Note: this is essentially a transpose, avoid in hot code.
 81 |     pub fn generate<F>(mut f: F) -> MRay
 82 |         where F: FnMut(usize) -> SRay
 83 |     {
 84 |         MRay {
 85 |             origin: MVector3::generate(|i| f(i).origin),
 86 |             direction: MVector3::generate(|i| f(i).direction),
 87 |             active: Mf32::zero(),
 88 |         }
 89 |     }
 90 | }
 91 | 
 92 | impl MIntersection {
 93 |     /// Constructs an empyt intersection with the specified distance and zeroes
 94 |     /// in all other fields. The material is set to the sky material.
 95 |     pub fn with_max_distance(max_dist: f32) -> MIntersection {
 96 |         MIntersection {
 97 |             position: MVector3::zero(),
 98 |             normal: MVector3::zero(),
 99 |             distance: Mf32::broadcast(max_dist),
100 |             material: MMaterial::sky(),
101 |             tex_coords: (Mf32::zero(), Mf32::zero()),
102 |         }
103 |     }
104 | 
105 |     pub fn pick(&self, other: &MIntersection, mask: Mask) -> MIntersection {
106 |         let u = self.tex_coords.0.pick(other.tex_coords.0, mask);
107 |         let v = self.tex_coords.1.pick(other.tex_coords.1, mask);
108 |         MIntersection {
109 |             position: self.position.pick(other.position, mask),
110 |             normal: self.normal.pick(other.normal, mask),
111 |             distance: self.distance.pick(other.distance, mask),
112 |             material: self.material.pick(other.material, mask),
113 |             tex_coords: (u, v),
114 |         }
115 |     }
116 | }
117 | 
118 | impl Neg for MRay {
119 |     type Output = MRay;
120 | 
121 |     fn neg(self) -> MRay {
122 |         MRay {
123 |             origin: self.origin,
124 |             direction: MVector3::zero() - self.direction,
125 |             active: self.active,
126 |         }
127 |     }
128 | }
129 | 


--------------------------------------------------------------------------------
/src/renderer.rs:
--------------------------------------------------------------------------------
  1 | // Convector -- An interactive CPU path tracer
  2 | // Copyright 2016 Ruud van Asseldonk
  3 | 
  4 | // This program is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License version 3. A copy
  6 | // of the License is available in the root of the repository.
  7 | 
  8 | use material::{continue_path, sky_intensity};
  9 | use random::Rng;
 10 | use scene::Scene;
 11 | use simd::{Mf32, Mi32};
 12 | use std::cell::UnsafeCell;
 13 | use util::{cache_line_aligned_vec, generate_slice8};
 14 | use vector3::{MVector3, SVector3};
 15 | 
 16 | pub struct Renderer {
 17 |     scene: Scene,
 18 |     width: u32,
 19 |     height: u32,
 20 |     enable_debug_view: bool,
 21 | 
 22 |     /// A value that increases at a rate of 1 per second.
 23 |     time: f32,
 24 | 
 25 |     /// The amount that time increases per frame.
 26 |     time_delta: f32,
 27 | }
 28 | 
 29 | /// The buffer that an image is rendered into.
 30 | pub struct RenderBuffer {
 31 |     buffer: UnsafeCell<Vec<Mi32>>,
 32 | }
 33 | 
 34 | struct MPixelData {
 35 |     color: MVector3,
 36 |     tex_index: Mi32,
 37 |     tex_coords: (Mf32, Mf32),
 38 |     fresnel: Mf32,
 39 | }
 40 | 
 41 | impl RenderBuffer {
 42 |     /// Allocates a new buffer to render into, memory uninitialized.
 43 |     ///
 44 |     /// The width and height must be a multiple of 16.
 45 |     pub fn new(width: u32, height: u32) -> RenderBuffer {
 46 |         assert_eq!(width & 15, 0);  // Width must be a multiple of 16.
 47 |         assert_eq!(height & 15, 0); // Height must be a multiple of 16.
 48 | 
 49 |         // There are 8 RGBA pixels in one mi32.
 50 |         let num_elems = (width as usize) * (height as usize) / 8;
 51 | 
 52 |         let mut vec = cache_line_aligned_vec(num_elems);
 53 |         unsafe { vec.set_len(num_elems); }
 54 | 
 55 |         RenderBuffer {
 56 |             buffer: UnsafeCell::new(vec),
 57 |         }
 58 |     }
 59 | 
 60 |     /// Zeroes the buffer.
 61 |     pub fn fill_black(&mut self) {
 62 |         // This is actually safe because self is borrowed mutably.
 63 |         for pixels in unsafe { self.get_mut_slice() } {
 64 |             *pixels = Mi32::zero();
 65 |         }
 66 |     }
 67 | 
 68 |     /// Returns a mutable view into the buffer.
 69 |     ///
 70 |     /// This is unsafe because it allows creating multiple mutable borrows of
 71 |     /// the buffer, which could result in races. Threads should ensure that
 72 |     /// they write to disjoint parts of the buffer.
 73 |     pub unsafe fn get_mut_slice(&self) -> &mut [Mi32] {
 74 |         (*self.buffer.get()).as_mut_slice()
 75 |     }
 76 | 
 77 |     /// Returns an RGBA bitmap suitable for display.
 78 |     #[cfg(not(windows))]
 79 |     pub fn into_bitmap(self) -> Vec<u8> {
 80 |         use util::transmute_vec;
 81 | 
 82 |         // This is actually safe because self is moved into the method.
 83 |         let buffer = unsafe { self.buffer.into_inner() };
 84 |         unsafe { transmute_vec(buffer) }
 85 |     }
 86 | 
 87 |     /// Returns an RGBA bitmap suitable for display.
 88 |     #[cfg(windows)]
 89 |     pub fn into_bitmap(self) -> Vec<u8> {
 90 |         use std::mem;
 91 |         use util::drop_cache_line_aligned_vec;
 92 | 
 93 |         // This is actually safe because self is moved into the method.
 94 |         let buffer = unsafe { self.buffer.into_inner() };
 95 | 
 96 |         // On Windows we must make an extra copy; we cannot just transmute the
 97 |         // buffer into a buffer of bytes, because the allocator then uses the
 98 |         // alignment of a byte to free the buffer, but it asserts that the
 99 |         // alignment for deallocation matches the alignment that the buffer was
100 |         // allocated with. I raised this point in the allocator RFC discussion:
101 |         // https://github.com/rust-lang/rfcs/pull/1398#issuecomment-198584430.
102 |         // The extra copy is unfortunate, but the allocator API needs to change
103 |         // before it can be avoided.
104 |         let byte_buffer = buffer.iter()
105 |             .flat_map(|mi32| {
106 |                 let bytes: &[u8; 32] = unsafe { mem::transmute(mi32) };
107 |                 bytes
108 |             })
109 |             .cloned()
110 |             .collect();
111 | 
112 |         drop_cache_line_aligned_vec(buffer);
113 |         byte_buffer
114 |     }
115 | }
116 | 
117 | // The render buffer must be shared among threads, but UnsafeCell is not Sync.
118 | unsafe impl Sync for RenderBuffer {}
119 | 
120 | impl Renderer {
121 |     pub fn new(scene: Scene, width: u32, height: u32) -> Renderer {
122 |         Renderer {
123 |             scene: scene,
124 |             width: width,
125 |             height: height,
126 |             enable_debug_view: false,
127 |             time: 0.0,
128 |             time_delta: 0.0,
129 |         }
130 |     }
131 | 
132 |     /// Sets the current time and the amount that the time is expected to change
133 |     /// per frame.
134 |     pub fn set_time(&mut self, time: f32, delta: f32) {
135 |         self.time = time;
136 |         self.time_delta = delta;
137 |     }
138 | 
139 |     /// For an interactive scene, updates the scene for the new frame.
140 |     /// TODO: This method does not really belong here.
141 |     pub fn update_scene(&mut self) {
142 |         let alpha = self.time * -0.02 + 0.1;
143 |         let alpha_delta = self.time_delta * -0.02;
144 |         let cam_position = SVector3::new(-3.8 * alpha.sin(), 1.6, 3.0 * alpha.cos());
145 |         let cam_pos_delta = SVector3::new(-3.8 * alpha.cos(), 0.0, -3.0 * alpha.sin()) * alpha_delta;
146 |         self.scene.camera.set_position(cam_position, cam_pos_delta);
147 |         self.scene.camera.set_rotation(alpha, alpha_delta);
148 |     }
149 | 
150 |     pub fn toggle_debug_view(&mut self) {
151 |         self.enable_debug_view = !self.enable_debug_view;
152 |     }
153 | 
154 |     /// Returns the screen coordinates of the block of 16x4 pixels where (x, y)
155 |     /// is the bottom-left coordinate. The order is as follows:
156 |     ///
157 |     ///     0c 0d 0e 0f  1c 1d 1e 1f  2c 2d 2e 2f  3c 3d 3e 3f
158 |     ///     08 09 0a 0b  18 19 1a 1b  28 29 2a 2b  38 39 3a 3b
159 |     ///     04 05 06 07  14 15 16 17  24 25 26 27  34 35 36 37
160 |     ///     00 01 02 03  10 11 12 13  20 21 22 23  30 31 32 33
161 |     ///
162 |     /// Or, in terms of the mf32s:
163 |     ///
164 |     ///     1 1 1 1  3 3 3 3  5 5 5 5  7 7 7 7
165 |     ///     1 1 1 1  3 3 3 3  5 5 5 5  7 7 7 7
166 |     ///     0 0 0 0  2 2 2 2  4 4 4 4  6 6 6 6
167 |     ///     0 0 0 0  2 2 2 2  4 4 4 4  6 6 6 6
168 |     ///
169 |     /// Where inside every mf32 the pixels are ordered from left to right,
170 |     /// bottom to top.
171 |     fn get_pixel_coords_16x4(&self, x: u32, y: u32, rng: &mut Rng) -> ([Mf32; 8], [Mf32; 8]) {
172 |         let scale = Mf32::broadcast(2.0 / self.width as f32);
173 |         let scale_mul = Mf32(2.0, 4.0, 8.0, 12.0, 0.0, 0.0, 0.0, 0.0) * scale;
174 | 
175 |         let off_x = Mf32(0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0);
176 |         let off_y = Mf32(0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0);
177 | 
178 |         let base_x = scale * (off_x + Mf32::broadcast(x as f32 - self.width as f32 * 0.5));
179 |         let base_y = scale * (off_y + Mf32::broadcast(y as f32 - self.height as f32 * 0.5));
180 | 
181 |         let xs = [
182 |             base_x,
183 |             base_x,
184 |             base_x + Mf32::broadcast(scale_mul.1), // 4.0 * scale
185 |             base_x + Mf32::broadcast(scale_mul.1), // 4.0 * scale
186 |             base_x + Mf32::broadcast(scale_mul.2), // 8.0 * scale
187 |             base_x + Mf32::broadcast(scale_mul.2), // 8.0 * scale
188 |             base_x + Mf32::broadcast(scale_mul.3), // 12.0 * scale
189 |             base_x + Mf32::broadcast(scale_mul.3)  // 12.0 * scale
190 |         ];
191 | 
192 |         let ys = [
193 |             base_y, base_y + Mf32::broadcast(scale_mul.0), // 2.0 * scale
194 |             base_y, base_y + Mf32::broadcast(scale_mul.0), // 2.0 * scale
195 |             base_y, base_y + Mf32::broadcast(scale_mul.0), // 2.0 * scale
196 |             base_y, base_y + Mf32::broadcast(scale_mul.0)  // 2.0 * scale
197 |         ];
198 | 
199 |         // Add a random offset of at most one pixel, to sample with anti-alias.
200 |         // TODO: If I ever do multiple samples per pixel in one frame, I could
201 |         // do stratified sampling here.
202 |         let xs_aa = generate_slice8(|i| rng.sample_unit().mul_add(scale, xs[i]));
203 |         let ys_aa = generate_slice8(|i| rng.sample_unit().mul_add(scale, ys[i]));
204 | 
205 |         (xs_aa, ys_aa)
206 |     }
207 | 
208 |     /// Shuffles bytes around to store 16x4 rendered pixels in the correct
209 |     /// location in a bitmap.
210 |     fn store_mi32_16x4(&self, target: &mut [Mi32], x: u32, y: u32, data: &[Mi32; 8]) {
211 |         // Helper functions to shuffle around the pixels from the order as
212 |         // described in `get_pixel_coords_16x4` into four rows of 16 pixels.
213 |         let mk_line0 = |left: Mi32, right: Mi32|
214 |             Mi32(left.0, left.1, left.2, left.3, right.0, right.1, right.2, right.3);
215 |         let mk_line1 = |left: Mi32, right: Mi32|
216 |             Mi32(left.4, left.5, left.6, left.7, right.4, right.5, right.6, right.7);
217 | 
218 |         // Store the pixels in the bitmap. If the bitmap is aligned to the cache
219 |         // line size, this stores exactly four cache lines, so there is no need
220 |         // to fetch those lines because all bytes are overwritten. This saves a
221 |         // trip to memory, which makes this store fast.
222 |         let idx_line0 = ((y * self.width + 0 * self.width + x) / 8) as usize;
223 |         let idx_line1 = ((y * self.width + 1 * self.width + x) / 8) as usize;
224 |         let idx_line2 = ((y * self.width + 2 * self.width + x) / 8) as usize;
225 |         let idx_line3 = ((y * self.width + 3 * self.width + x) / 8) as usize;
226 | 
227 |         target[idx_line0 + 0] = mk_line0(data[0], data[2]);
228 |         target[idx_line0 + 1] = mk_line0(data[4], data[6]);
229 |         target[idx_line1 + 0] = mk_line1(data[0], data[2]);
230 |         target[idx_line1 + 1] = mk_line1(data[4], data[6]);
231 |         target[idx_line2 + 0] = mk_line0(data[1], data[3]);
232 |         target[idx_line2 + 1] = mk_line0(data[5], data[7]);
233 |         target[idx_line3 + 0] = mk_line1(data[1], data[3]);
234 |         target[idx_line3 + 1] = mk_line1(data[5], data[7]);
235 |     }
236 | 
237 |     /// Converts floating-point color values to 32-bit RGBA and stores the
238 |     /// values in the bitmap.
239 |     fn store_pixels_color_16x4(&self,
240 |                                bitmap: &mut [Mi32],
241 |                                x: u32,
242 |                                y: u32,
243 |                                data: &[MPixelData; 8]) {
244 |         // Convert f32 colors to i32 colors in the range 0-255.
245 |         let range = Mf32::broadcast(255.0);
246 |         let rgbas = generate_slice8(|i| {
247 |             // Multiply color by 2.0 to brighten up the scene a bit.
248 |             let rgb_255 = (data[i].color * Mf32::broadcast(2.0)).clamp_one() * range;
249 |             let r = rgb_255.x.into_mi32();
250 |             let g = rgb_255.y.into_mi32().map(|x| x << 8);
251 |             let b = rgb_255.z.into_mi32().map(|x| x << 16);
252 |             (r | g) | b
253 |         });
254 | 
255 |         self.store_mi32_16x4(bitmap, x, y, &rgbas);
256 |     }
257 | 
258 |     /// Converts floating-point texture coordinates to integers and stores the
259 |     /// values in the bitmap.
260 |     fn store_pixels_gbuffer_16x4(&self,
261 |                                  gbuffer: &mut [Mi32],
262 |                                  x: u32,
263 |                                  y: u32,
264 |                                  data: &[MPixelData; 8]) {
265 |         // Generate the pixels for texture coordinates and the Fresnel factor.
266 |         let range = Mf32::broadcast(255.0);
267 |         let uvs = generate_slice8(|i| {
268 |             let tex_index = data[i].tex_index;
269 |             let tex_x = data[i].tex_coords.0 * range;
270 |             let tex_y = data[i].tex_coords.1 * range;
271 |             let fresnel = data[i].fresnel * range;
272 | 
273 |             // Do not clamp the texture coordinates, make them wrap instead.
274 |             let wrap = Mi32::broadcast(0xff);
275 |             let r = tex_x.into_mi32() & wrap;
276 |             let g = (tex_y.into_mi32() & wrap).map(|x| x << 8);
277 |             let b = fresnel.into_mi32().map(|x| x << 16);
278 | 
279 |             // Store the texture index in the alpha channel.
280 |             let a = tex_index.map(|x| x << 24);
281 | 
282 |             (r | g) | (b | a)
283 |         });
284 | 
285 |         self.store_mi32_16x4(gbuffer, x, y, &uvs);
286 |     }
287 | 
288 |     /// Renders a block of 16x4 pixels, where (x, y) is the coordinate of the
289 |     /// bottom-left pixel. Bitmap must be an array of 8 pixels at once, and it
290 |     /// must be aligned to 64 bytes (a cache line). Also returns texture indices
291 |     /// for every pixel.
292 |     fn render_block_16x4(&self, x: u32, y: u32, rng: &mut Rng) -> [MPixelData; 8] {
293 |         let (xs, ys) = self.get_pixel_coords_16x4(x, y, rng);
294 | 
295 |         if self.enable_debug_view {
296 |             generate_slice8(|i| self.render_pixels_debug(xs[i], ys[i]))
297 |         } else {
298 |             generate_slice8(|i| self.render_pixels(xs[i], ys[i], rng))
299 |         }
300 |     }
301 | 
302 |     /// Renders a square part of a frame.
303 |     ///
304 |     /// The (x, y) coordinate is the coordinate of the bottom-left pixel of the
305 |     /// patch. The patch width must be a multiple of 16.
306 |     pub fn render_patch_u8(&self,
307 |                            bitmap: &mut [Mi32],
308 |                            gbuffer: &mut [Mi32],
309 |                            patch_width: u32,
310 |                            x: u32,
311 |                            y: u32,
312 |                            frame_number: u32) {
313 |         assert_eq!(patch_width & 15, 0); // Patch width must be a multiple of 16.
314 |         let w = patch_width / 16;
315 |         let h = patch_width / 4;
316 |         let mut rng = Rng::with_seed(x, y, frame_number);
317 | 
318 |         for i in 0..w {
319 |             for j in 0..h {
320 |                 let xb = x + i * 16;
321 |                 let yb = y + j * 4;
322 |                 let data = self.render_block_16x4(xb, yb, &mut rng);
323 |                 self.store_pixels_color_16x4(bitmap, xb, yb, &data);
324 |                 self.store_pixels_gbuffer_16x4(gbuffer, xb, yb, &data);
325 |             }
326 |         }
327 |     }
328 | 
329 |     /// Renders a square part of a frame, adds the contribution to the buffer.
330 |     ///
331 |     /// The (x, y) coordinate is the coordinate of the bottom-left pixel of the
332 |     /// patch. The patch width must be a multiple of 16. The memory layout of
333 |     /// the HDR buffer is as a bitmap of 16x4 blocks.
334 |     ///
335 |     /// This also fills the gbuffer. This is not done accumulatively, it is
336 |     /// filled for the current frame. (Though the gbuffer should be fairly
337 |     /// constant anyway, and there is no way to blend it, apart from averaging
338 |     /// texture coordinates.)
339 |     pub fn accumulate_patch_f32(&self,
340 |                                 hdr_buffer: &mut [[MVector3; 8]],
341 |                                 gbuffer: &mut [Mi32],
342 |                                 patch_width: u32,
343 |                                 x: u32,
344 |                                 y: u32,
345 |                                 frame_number: u32) {
346 |         assert_eq!(patch_width & 15, 0); // Patch width must be a multiple of 16.
347 |         let w = patch_width / 16;
348 |         let h = patch_width / 4;
349 |         let mut rng = Rng::with_seed(x, y, frame_number);
350 | 
351 |         for i in 0..w {
352 |             for j in 0..h {
353 |                 let xb = x + i * 16;
354 |                 let yb = y + j * 4;
355 |                 let data = self.render_block_16x4(xb, yb, &mut rng);
356 |                 let index = ((y / 4 + j) * (self.width / 16) + (x / 16 + i)) as usize;
357 |                 let current = hdr_buffer[index];
358 |                 hdr_buffer[index] = generate_slice8(|k| current[k] + data[k].color);
359 |                 self.store_pixels_gbuffer_16x4(gbuffer, xb, yb, &data);
360 |             }
361 |         }
362 |     }
363 | 
364 |     /// Creates a new float buffer, the size of the viewport, that can be
365 |     /// rendered to with `accumulate_patch_f32()`.
366 |     pub fn new_buffer_f32(&self) -> Vec<[MVector3; 8]> {
367 |         let w = self.width / 16;
368 |         let h = self.height / 4;
369 |         let mut buffer = Vec::with_capacity((w * h) as usize);
370 |         for _ in 0..(w * h) {
371 |             buffer.push(generate_slice8(|_| MVector3::zero()));
372 |         }
373 |         buffer
374 |     }
375 | 
376 |     /// Converts a buffer of floating point values used for accumulative
377 |     /// rendering into a 32 bit per pixel RGBA bitmap.
378 |     pub fn buffer_f32_into_render_buffer(&self,
379 |                                          hdr_buffer: &[[MVector3; 8]],
380 |                                          render_buffer: &mut RenderBuffer,
381 |                                          num_samples: u32) {
382 |         let w = self.width / 16;
383 |         let h = self.height / 4;
384 |         assert_eq!(w * 16, self.width);
385 |         assert_eq!(h * 4, self.height);
386 |         let factor = Mf32::broadcast(1.0 / (num_samples as f32));
387 | 
388 |         {
389 |             // This is safe here because there is only one mutable borrow.
390 |             let bitmap = unsafe { render_buffer.get_mut_slice() };
391 | 
392 |             for j in 0..h {
393 |                 for i in 0..w {
394 |                     let rgbs = hdr_buffer[(j * w + i) as usize];
395 |                     let rgbs = generate_slice8(|k| rgbs[k] * factor);
396 |                     let data = generate_slice8(|k| {
397 |                         MPixelData {
398 |                             color: rgbs[k],
399 |                             // These values are unused, only the color is stored
400 |                             // in this function.
401 |                             tex_index: Mi32::zero(),
402 |                             tex_coords: (Mf32::zero(), Mf32::zero()),
403 |                             fresnel: Mf32::zero(),
404 |                         }
405 |                     });
406 |                     self.store_pixels_color_16x4(bitmap, i * 16, j * 4, &data);
407 |                 }
408 |             }
409 |         }
410 |     }
411 | 
412 |     /// Returns colors for the pixels, as well as the texture indices.
413 |     fn render_pixels(&self, x: Mf32, y: Mf32, rng: &mut Rng) -> MPixelData {
414 |         let t = rng.sample_unit();
415 |         let mut ray = self.scene.camera.get_ray(x, y, t);
416 |         let mut color = MVector3::new(Mf32::one(), Mf32::one(), Mf32::one());
417 |         let mut hit_emissive = Mf32::zero();
418 |         let mut texture_index = Mi32::zero();
419 |         let mut texture_coords = (Mf32::zero(), Mf32::zero());
420 |         let mut fresnel = Mf32::zero();
421 | 
422 |         let max_bounces = 5;
423 |         for i in 0..max_bounces {
424 |             let isect = self.scene.intersect_nearest(&ray);
425 |             hit_emissive = isect.material;
426 | 
427 |             // Do not allow NaNs to creep in.
428 |             debug_assert!(ray.direction.all_finite(), "infinite ray direction at iteration {}", i);
429 |             debug_assert!(isect.position.all_finite(), "infinite intersection at iteration {}", i);
430 |             debug_assert!(isect.distance.all_finite(), "infinite distance at iteration {}", i);
431 | 
432 |             // Stop when every ray hit a light source.
433 |             if isect.material.all_sign_bits_negative() {
434 |                 break;
435 |             }
436 | 
437 |             // Get a new ray and the color modulation. For the first bounce, the
438 |             // Fresnel term should not contribute to the color modulation
439 |             // because that is handled on the GPU.
440 |             let (new_ray, color_mod, fr) =
441 |                 continue_path(isect.material, &self.scene, &ray, &isect, rng, i == 0);
442 |             ray = new_ray;
443 |             color = color.mul_coords(color_mod);
444 | 
445 |             if i == 0 {
446 |                 texture_index = isect.material.get_texture();
447 |                 texture_coords = isect.tex_coords;
448 |                 fresnel = fr;
449 |             }
450 |         }
451 | 
452 |         // Compute light contribution.
453 |         let emission = sky_intensity(ray.direction);
454 |         color = color.mul_coords(emission);
455 | 
456 |         // If the last thing that a ray hit was an emissive material, it has
457 |         // found a light source and the computed color is correct. If the ray
458 |         // did not find a light source but the loop was terminated, the computed
459 |         // color is invalid; it should be black.
460 |         let color = MVector3::zero().pick(color, hit_emissive);
461 | 
462 |         MPixelData {
463 |             color: color,
464 |             tex_index: texture_index,
465 |             tex_coords: texture_coords,
466 |             fresnel: fresnel,
467 |         }
468 |     }
469 | 
470 |     fn render_pixels_debug(&self, x: Mf32, y: Mf32) -> MPixelData {
471 |         let t = Mf32::zero();
472 |         let ray = self.scene.camera.get_ray(x, y, t);
473 |         let (numi_aabb, numi_tri) = self.scene.intersect_debug(&ray);
474 | 
475 |         let g = Mf32::broadcast((numi_aabb as f32).log2() * 0.1);
476 |         let b = Mf32::broadcast((numi_tri as f32).log2() * 0.1);
477 | 
478 |         let color = MVector3::new(Mf32::zero(), g, b);
479 | 
480 |         MPixelData {
481 |             color: color,
482 |             tex_index: Mi32::zero(),
483 |             tex_coords: (Mf32::zero(), Mf32::zero()),
484 |             fresnel: Mf32::zero(),
485 |         }
486 |     }
487 | }
488 | 
489 | #[test]
490 | fn render_buffer_into_bitmap() {
491 |     let render_buffer = RenderBuffer::new(1280, 736);
492 |     let bitmap = render_buffer.into_bitmap();
493 |     drop(bitmap);
494 |     let render_buffer = RenderBuffer::new(1280, 736);
495 |     let _bitmap = render_buffer.into_bitmap();
496 |     // The render buffer was transmuted or copied into a vector of pixels, and
497 |     // dropping the vector at this point should not result in a crash.
498 | }
499 | 


--------------------------------------------------------------------------------
/src/scene.rs:
--------------------------------------------------------------------------------
  1 | // Convector -- An interactive CPU path tracer
  2 | // Copyright 2016 Ruud van Asseldonk
  3 | 
  4 | // This program is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License version 3. A copy
  6 | // of the License is available in the root of the repository.
  7 | 
  8 | use bvh::Bvh;
  9 | use material::{MDirectSample, MMaterial};
 10 | use quaternion::{MQuaternion, SQuaternion, rotate};
 11 | use random::Rng;
 12 | use ray::{MIntersection, MRay};
 13 | use simd::Mf32;
 14 | use std::f32::consts::PI;
 15 | use triangle::Triangle;
 16 | use util::generate_slice8;
 17 | use vector3::{MVector3, SVector3};
 18 | use wavefront::Mesh;
 19 | 
 20 | pub struct Camera {
 21 |     position: SVector3,
 22 |     position_delta: SVector3,
 23 | 
 24 |     orientation: SQuaternion,
 25 |     orientation_delta: SQuaternion,
 26 | 
 27 |     /// Distance such that a vector at `(1, 0, screen_distance)` makes an angle
 28 |     /// of the desired field of view with `(-1, 0, screen_distance)`.
 29 |     screen_distance: f32,
 30 | }
 31 | 
 32 | impl Camera {
 33 |     /// Creates a camera at the origin with 60 degrees field of view.
 34 |     pub fn new() -> Camera {
 35 |         Camera {
 36 |             position: SVector3::zero(),
 37 |             position_delta: SVector3::zero(),
 38 |             orientation: SQuaternion::new(1.0, 0.0, 0.0, 0.0),
 39 |             orientation_delta: SQuaternion::new(0.0, 0.0, 0.0, 0.0),
 40 |             screen_distance: 1.0 / (PI / 5.0).sin(),
 41 |         }
 42 |     }
 43 | 
 44 |     /// Sets the position of the camera at the beginning of the frame, and the
 45 |     /// offset such that position + delta is the position at the end of the
 46 |     /// frame.
 47 |     pub fn set_position(&mut self, position: SVector3, delta: SVector3) {
 48 |         self.position = position;
 49 |         self.position_delta = delta;
 50 |     }
 51 | 
 52 |     /// Sets the orientation of the camera at the beginning of the frame, and
 53 |     /// the delta such that orientation + delta normalized is the orientation at
 54 |     /// the end of the frame.
 55 |     pub fn set_orientation(&mut self, orientation: SQuaternion, delta: SQuaternion) {
 56 |         self.orientation = orientation;
 57 |         self.orientation_delta = delta;
 58 |     }
 59 | 
 60 |     /// Sets the desired horizontal field of view in radians.
 61 |     pub fn set_fov(&mut self, fov: f32) {
 62 |         self.screen_distance = 1.0 / (fov / 2.0).sin();
 63 |     }
 64 | 
 65 |     /// Sets the rotation of the camera in the xz-plane.
 66 |     pub fn set_rotation(&mut self, radians: f32, delta: f32) {
 67 |         let x = (radians * 0.5).cos();
 68 |         let y = (radians * 0.5).sin();
 69 |         self.orientation = SQuaternion::new(x, 0.0, -y, 0.0);
 70 | 
 71 |         let x_delta = 0.5 * -(radians * 0.5).sin() * delta;
 72 |         let y_delta = 0.5 * (radians * 0.5).cos() * delta;
 73 |         self.orientation_delta = SQuaternion::new(x_delta, 0.0, -y_delta, 0.0);
 74 |     }
 75 | 
 76 |     /// Returns a camera ray for the given screen coordinates.
 77 |     ///
 78 |     /// Values for x are in the range (-1, 1), the scale is uniform in both
 79 |     /// directions. The time ranges from 0.0 at the beginning of the frame to
 80 |     /// 1.0 at the end of the frame.
 81 |     pub fn get_ray(&self, x: Mf32, y: Mf32, t: Mf32) -> MRay {
 82 |         let origin = MVector3::broadcast(self.position);
 83 |         let origin_delta = MVector3::broadcast(self.position_delta);
 84 |         let origin = origin_delta.mul_add(t, origin);
 85 | 
 86 |         let orientation = MQuaternion::broadcast(self.orientation);
 87 |         let orientation_delta = MQuaternion::broadcast(self.orientation_delta);
 88 |         let orientation = orientation.interpolate(&orientation_delta, t);
 89 | 
 90 |         let dist = Mf32::broadcast(-self.screen_distance);
 91 |         let dir_src = MVector3::new(x, y, dist).normalized();
 92 |         let dir = rotate(&dir_src, &orientation);
 93 | 
 94 |         MRay {
 95 |             origin: origin,
 96 |             direction: dir,
 97 |             active: Mf32::zero(),
 98 |         }
 99 |     }
100 | }
101 | 
102 | pub struct Scene {
103 |     pub camera: Camera,
104 | 
105 |     /// Bounding volume hierarchy of all triangles in the scene.
106 |     bvh: Bvh,
107 | 
108 |     /// Indices into the BVH's triangle list, of triangles that have a material
109 |     /// eligible for direct sampling.
110 |     direct_sample: Vec<u32>,
111 | }
112 | 
113 | impl Scene {
114 |     pub fn from_meshes(meshes: &[Mesh]) -> Scene {
115 |         let bvh = Bvh::from_meshes(meshes);
116 | 
117 |         let mut direct_sample = Vec::new();
118 |         for i in 0..bvh.triangles.len() {
119 |             if bvh.triangles[i].material.is_direct_sample() {
120 |                 direct_sample.push(i as u32);
121 |             }
122 |         }
123 | 
124 |         Scene {
125 |             camera: Camera::new(),
126 |             bvh: bvh,
127 |             direct_sample: direct_sample,
128 |         }
129 |     }
130 | 
131 |     pub fn print_stats(&self) {
132 |         self.bvh.print_stats();
133 | 
134 |         println!("scene statistics:");
135 |         println!("  triangles eligible for direct sampling: {} / {} ({:0.1}%)",
136 |                  self.direct_sample.len(),
137 |                  self.bvh.triangles.len(),
138 |                  100.0 * self.direct_sample.len() as f32 / self.bvh.triangles.len() as f32);
139 |     }
140 | 
141 |     /// Returns 8 random points on 8 random triangles eligible for direct
142 |     /// sampling.
143 |     pub fn get_direct_sample(&self, rng: &mut Rng) -> MDirectSample {
144 |         // The number of triangles eligible for direct sampling must be greater
145 |         // than 0, bute for good random number, I assume below that there are 8.
146 |         // This is the case for my hard-coded scene.
147 |         debug_assert!(self.direct_sample.len() == 8);
148 | 
149 |         let random_bits = rng.sample_u32();
150 | 
151 |         // Pick a random direct sampling triangle for every coordinate. This has
152 |         // to be done serially, unfortunately. The low order bits of the random
153 |         // number are not really random modulo 8, but the high order bits are.
154 |         // This has to do with how Rng works. In short, the sequence x*p^n is
155 |         // not random modulo 8, because p^n can take at most 4 values mod 8. And
156 |         // if you are unlucky, x = 0 mod 8, and then all indices are the same.
157 |         // Therefore take the high order bits, which are sufficiently random.
158 |         // TODO: Are the bounds checks a bottleneck here?
159 |         let indices = generate_slice8(|i| (random_bits[i] >> 29) as u32);
160 |         let tri_indices = generate_slice8(|i| self.direct_sample[indices[i] as usize]);
161 |         let tris = generate_slice8(|i| &self.bvh.triangles[tri_indices[i] as usize]);
162 | 
163 |         // Gather the vertices of the triangles into SIMD vectors, so from now
164 |         // on we are not serial any more.
165 |         let v0 = MVector3::generate(|i| tris[i].v0);
166 |         let v1 = MVector3::generate(|i| tris[i].v1);
167 |         let v2 = MVector3::generate(|i| tris[i].v2);
168 | 
169 |         let e1 = v0 - v2;
170 |         let e2 = v1 - v0;
171 |         let normal_denorm = e1.cross(e2);
172 |         let cross_norm_recip = normal_denorm.norm_squared().rsqrt();
173 |         let normal = normal_denorm * cross_norm_recip;
174 |         let area = Mf32::broadcast(0.5) * cross_norm_recip.recip_fast();
175 | 
176 |         let u = rng.sample_unit();
177 |         let v = rng.sample_unit();
178 |         // If u + v > 1, the point lies outside of the triangle, and s will have
179 |         // negative sign. If the point is inside the triangle, s will have
180 |         // positive sign.
181 |         let s = (Mf32::one() - u) - v;
182 |         // If the point lies outside the triangle, it lies in the other half of
183 |         // the parallellogram, so transform the coordinates to get them into the
184 |         // correct triangle again.
185 |         let u = u.pick(Mf32::one() - u, s);
186 |         let v = v.pick(Mf32::one() - v, s);
187 | 
188 |         let p = e2.mul_add(v, e1.neg_mul_add(u, v0));
189 | 
190 |         let ds = MDirectSample {
191 |             position: p,
192 |             normal: normal,
193 |             area: area,
194 |         };
195 | 
196 |         // Prevent NaNs from creeping in, and ensure that the sample is valid.
197 |         debug_assert!(normal.all_finite());
198 |         debug_assert!(area.all_finite());
199 |         debug_assert!(area.all_sign_bits_positive(), "area must be positive");
200 | 
201 |         ds
202 |     }
203 | 
204 |     /// Returns the number of triangles eligible for direct sampling.
205 |     pub fn direct_sample_num(&self) -> usize {
206 |         self.direct_sample.len()
207 |     }
208 | 
209 |     pub fn foreach_direct_sample<F: FnMut(&Triangle)>(&self, mut f: F) {
210 |         for i in &self.direct_sample {
211 |             // TODO: Remove the bounds check?
212 |             let triangle = &self.bvh.triangles[*i as usize];
213 |             f(triangle);
214 |         }
215 |     }
216 | 
217 |     /// Returns the interections with the shortest distance along the ray.
218 |     ///
219 |     /// Intersects the sky if no other geometry was intersected.
220 |     pub fn intersect_nearest(&self, ray: &MRay) -> MIntersection {
221 |         let huge_distance = Mf32::broadcast(1.0e5);
222 |         let far_away = MIntersection {
223 |             position: ray.direction.mul_add(huge_distance, ray.origin),
224 |             normal: ray.direction,
225 |             distance: huge_distance,
226 |             material: MMaterial::sky(),
227 |             tex_coords: (Mf32::zero(), Mf32::zero()),
228 |         };
229 |         self.bvh.intersect_nearest(ray, far_away)
230 |     }
231 | 
232 |     /// Returns the number of AABBs and triangles intersected to find the
233 |     /// nearest intersection.
234 |     pub fn intersect_debug(&self, ray: &MRay) -> (u32, u32) {
235 |         let huge_distance = Mf32::broadcast(1.0e5);
236 |         let far_away = MIntersection {
237 |             position: ray.direction.mul_add(huge_distance, ray.origin),
238 |             normal: ray.direction,
239 |             distance: huge_distance,
240 |             material: MMaterial::sky(),
241 |             tex_coords: (Mf32::zero(), Mf32::zero()),
242 |         };
243 |         self.bvh.intersect_debug(ray, far_away)
244 |     }
245 | }
246 | 


--------------------------------------------------------------------------------
/src/stats.rs:
--------------------------------------------------------------------------------
  1 | // Convector -- An interactive CPU path tracer
  2 | // Copyright 2016 Ruud van Asseldonk
  3 | 
  4 | // This program is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License version 3. A copy
  6 | // of the License is available in the root of the repository.
  7 | 
  8 | //! A simple way to keep track of statistics.
  9 | 
 10 | use time::Duration;
 11 | 
 12 | /// Keeps track of the min, median, and max of a variable.
 13 | ///
 14 | /// The number of values stored is bounded.
 15 | pub struct Stats {
 16 |     values: Vec<u32>,
 17 | }
 18 | 
 19 | impl Stats {
 20 |     pub fn new() -> Stats {
 21 |         Stats { values: Vec::with_capacity(128) }
 22 |     }
 23 | 
 24 |     pub fn insert(&mut self, value: u32) {
 25 |         // Make room if there is none. Removing one extreme value below the
 26 |         // median and one above does not affect the median, so we can discard
 27 |         // values without affecting the median. However, when the median
 28 |         // shifts, these values could have been imporant, and the result is
 29 |         // incorrect. For a stable value, the median will not shift by much, so
 30 |         // it is best to remove the most extreme values. On the other hand, the
 31 |         // min and max are interesting to know, so merge the values after the
 32 |         // min and before the max.
 33 |         if self.values.len() == self.values.capacity() {
 34 |             debug_assert!(self.values.len() >= 4);
 35 |             let len = self.values.len();
 36 |             // Merge the two values after the min and the two values before the
 37 |             // max.
 38 |             let avg_high = (self.values[len - 3] + self.values[len - 2]) / 2;
 39 |             let avg_low = (self.values[1] + self.values[2]) / 2;
 40 |             self.values[len - 3] = avg_high;
 41 |             self.values[2] = avg_low;
 42 |             self.values.remove(len - 2);
 43 |             self.values.remove(1);
 44 |         }
 45 | 
 46 |         let idx = match self.values.binary_search(&value) {
 47 |             Ok(i) => i,
 48 |             Err(i) => i,
 49 |         };
 50 | 
 51 |         self.values.insert(idx, value);
 52 |     }
 53 | 
 54 |     /// Inserts the duration rounded to microseconds.
 55 |     pub fn insert_time_us(&mut self, duration: Duration) {
 56 |         let ns = duration.num_nanoseconds().unwrap();
 57 |         let us = (ns + 500) / 1000;
 58 |         self.insert(us as u32);
 59 |     }
 60 | 
 61 |     /// Returns the median of the stored values.
 62 |     ///
 63 |     /// Panics if no values are present.
 64 |     pub fn median(&self) -> u32 {
 65 |         // This is not correct for an even number of values, but as the number
 66 |         // of values grows bigger this difference becomes smaller.
 67 |         self.values[self.values.len() / 2]
 68 |     }
 69 | 
 70 |     /// Returns the minimum of the stored values.
 71 |     ///
 72 |     /// Panics if no values are present.
 73 |     pub fn min(&self) -> u32 {
 74 |         self.values[0]
 75 |     }
 76 | }
 77 | 
 78 | /// A collection of global stats that the app keeps track of.
 79 | pub struct GlobalStats {
 80 |     /// Texture upload time in microseconds.
 81 |     pub tex_upload_us: Stats,
 82 |     /// Draw and wait for vsync time in microseconds.
 83 |     pub draw_vsync_us: Stats,
 84 |     /// Total time of rendering and drawing a frame.
 85 |     pub frame_us: Stats,
 86 | }
 87 | 
 88 | impl GlobalStats {
 89 |     pub fn new() -> GlobalStats {
 90 |         GlobalStats {
 91 |             tex_upload_us: Stats::new(),
 92 |             draw_vsync_us: Stats::new(),
 93 |             frame_us: Stats::new(),
 94 |         }
 95 |     }
 96 | 
 97 |     pub fn print(&self) {
 98 |         println!("");
 99 |         println!("texture upload: median {} us, min {} us",
100 |                  self.tex_upload_us.median(),
101 |                  self.tex_upload_us.min());
102 |         println!("draw and vsync: median {} us, min {} us",
103 |                  self.draw_vsync_us.median(),
104 |                  self.draw_vsync_us.min());
105 |         println!("frame time: median {} us, min {} us -> {:0.1} fps",
106 |                  self.frame_us.median(),
107 |                  self.frame_us.min(),
108 |                  1.0 / (self.frame_us.median() as f32 * 1e-6));
109 |     }
110 | }
111 | 


--------------------------------------------------------------------------------
/src/trace.rs:
--------------------------------------------------------------------------------
  1 | // Convector -- An interactive CPU path tracer
  2 | // Copyright 2016 Ruud van Asseldonk
  3 | 
  4 | // This program is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License version 3. A copy
  6 | // of the License is available in the root of the repository.
  7 | 
  8 | //! This mod writes trace logs that can be inspected with chrome://tracing.
  9 | //! It is intended as a debugging tool, so I can see what all the cores are
 10 | //! doing; how work is scheduled among CPUs and what is blocking.
 11 | //!
 12 | //! Note: this mod is not related to ray tracing, sorry for the name.
 13 | 
 14 | // TODO: Integrate this with stats.
 15 | 
 16 | use std::collections::VecDeque;
 17 | use std::fs::File;
 18 | use std::io;
 19 | use std::path::Path;
 20 | use std::sync::{Arc, Mutex};
 21 | use thread_id;
 22 | use time::{Duration, PreciseTime};
 23 | 
 24 | struct TraceEvent {
 25 |     start: PreciseTime,
 26 |     end: PreciseTime,
 27 |     description: &'static str,
 28 |     frame: u32,
 29 |     id: u32,
 30 |     tid: u64,
 31 | }
 32 | 
 33 | pub struct ScopedTraceEvent {
 34 |     start: PreciseTime,
 35 |     description: &'static str,
 36 |     frame: u32,
 37 |     id: u32,
 38 |     log: Arc<Mutex<TraceLogImpl>>,
 39 |     handled: bool,
 40 | }
 41 | 
 42 | struct TraceLogImpl {
 43 |     events: VecDeque<TraceEvent>,
 44 |     limit: usize,
 45 | }
 46 | 
 47 | pub struct TraceLog {
 48 |     log: Arc<Mutex<TraceLogImpl>>,
 49 |     epoch: PreciseTime,
 50 |     frame_number: u32,
 51 | }
 52 | 
 53 | impl ScopedTraceEvent {
 54 |     /// Records the event in the trace log and returns its duration.
 55 |     pub fn take_duration(mut self) -> Duration {
 56 |         let end = PreciseTime::now();
 57 |         self.add_to_trace(end);
 58 |         self.start.to(end)
 59 |     }
 60 | 
 61 |     fn add_to_trace(&mut self, now: PreciseTime) {
 62 |         let event = TraceEvent {
 63 |             start: self.start,
 64 |             end: now,
 65 |             description: self.description,
 66 |             frame: self.frame,
 67 |             id: self.id,
 68 |             tid: thread_id::get() as u64,
 69 |         };
 70 |         let mut trace_log_impl = self.log.lock().unwrap();
 71 |         if trace_log_impl.events.len() == trace_log_impl.limit {
 72 |             trace_log_impl.events.pop_front();
 73 |         }
 74 |         trace_log_impl.events.push_back(event);
 75 |         self.handled = true;
 76 |     }
 77 | }
 78 | 
 79 | impl Drop for ScopedTraceEvent {
 80 |     fn drop(&mut self) {
 81 |         if !self.handled {
 82 |             let end = PreciseTime::now();
 83 |             self.add_to_trace(end);
 84 |         }
 85 |     }
 86 | }
 87 | 
 88 | impl TraceLog {
 89 |     pub fn with_limit(limit: usize) -> TraceLog {
 90 |         let trace_log_impl = TraceLogImpl {
 91 |             events: VecDeque::with_capacity(limit),
 92 |             limit: limit,
 93 |         };
 94 |         TraceLog {
 95 |             log: Arc::new(Mutex::new(trace_log_impl)),
 96 |             epoch: PreciseTime::now(),
 97 |             frame_number: 0,
 98 |         }
 99 |     }
100 | 
101 |     /// Increments the frame number and returns the current frame number.
102 |     pub fn inc_frame_number(&mut self) -> u32 {
103 |         self.frame_number += 1;
104 |         self.frame_number
105 |     }
106 | 
107 |     /// Starts a new trace event. When the returned value goes out of scope, it
108 |     /// is added to the log with the correct end time.
109 |     pub fn scoped(&self, description: &'static str, id: u32) -> ScopedTraceEvent {
110 |         ScopedTraceEvent {
111 |             start: PreciseTime::now(),
112 |             description: description,
113 |             frame: self.frame_number,
114 |             id: id,
115 |             log: self.log.clone(),
116 |             handled: false,
117 |         }
118 |     }
119 | 
120 |     /// Writes the trace as a json string in the trace log format that can be
121 |     /// read by Chrome’s trace viewer (chrome://tracing).
122 |     pub fn export<W: io::Write>(&self, output: &mut W) -> io::Result<()> {
123 |         try!(write!(output, "{{\"traceEvents\":["));
124 |         let mut is_first = true;
125 |         for event in self.log.lock().unwrap().events.iter() {
126 |             if !is_first {
127 |                 try!(write!(output, ","));
128 |             }
129 |             let ts = self.epoch.to(event.start).num_microseconds().unwrap();
130 |             let dur = event.start.to(event.end).num_microseconds().unwrap();
131 |             try!(write!(output, "{{\"name\":\"{0}\",\
132 |                                    \"cat\":\"\",\
133 |                                    \"ph\":\"X\",\
134 |                                    \"ts\":{1},\
135 |                                    \"dur\":{2},\
136 |                                    \"pid\":0,\
137 |                                    \"tid\":{3},\
138 |                                    \"args\":{{\
139 |                                    \"frame\":{4},\
140 |                                    \"id\":{5}}}}}",
141 |                                 event.description, ts, dur, event.tid,
142 |                                 event.frame, event.id));
143 |             is_first = false;
144 |         }
145 |         write!(output, "],\"displayTimeUnit\":\"ms\"}}")
146 |     }
147 | 
148 |     /// Writes the trace to a json file.
149 |     pub fn export_to_file<P: AsRef<Path>>(&self, path: P) -> io::Result<()> {
150 |         let mut file = try!(File::create(path));
151 |         self.export(&mut file)
152 |     }
153 | }
154 | 


--------------------------------------------------------------------------------
/src/triangle.rs:
--------------------------------------------------------------------------------
  1 | // Convector -- An interactive CPU path tracer
  2 | // Copyright 2016 Ruud van Asseldonk
  3 | 
  4 | // This program is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License version 3. A copy
  6 | // of the License is available in the root of the repository.
  7 | 
  8 | //! This module implement the triangle primitive and related geometry functions.
  9 | //!
 10 | //! The only primitive is the triangle, there are no spheres or other shapes.
 11 | //! This avoids having to dispatch on the primitive type to intersect an object.
 12 | //! It avoids a virtual method call, which in turn enables the triangle
 13 | //! intersection code to be inlined.
 14 | 
 15 | use material::{SMaterial, MMaterial};
 16 | use ray::{MIntersection, MRay};
 17 | use simd::Mf32;
 18 | use vector3::{MVector3, SVector3};
 19 | 
 20 | #[cfg(test)]
 21 | use {bench, test};
 22 | 
 23 | #[derive(Clone, Debug)]
 24 | pub struct Triangle {
 25 |     pub v0: SVector3,
 26 |     pub v1: SVector3,
 27 |     pub v2: SVector3,
 28 |     pub uv0: (f32, f32),
 29 |     pub uv1: (f32, f32),
 30 |     pub uv2: (f32, f32),
 31 |     pub material: SMaterial,
 32 | }
 33 | 
 34 | /// The result of intersecting a triangle to compute a probability density.
 35 | pub struct MDirectIntersection {
 36 |     pub normal: MVector3,
 37 |     pub area: Mf32,
 38 |     pub distance: Mf32,
 39 |     pub mask: Mf32,
 40 | }
 41 | 
 42 | impl Triangle {
 43 |     pub fn new(v0: SVector3, v1: SVector3, v2: SVector3, mat: SMaterial) -> Triangle {
 44 |         Triangle {
 45 |             v0: v0,
 46 |             v1: v1,
 47 |             v2: v2,
 48 |             uv0: (0.0, 0.0),
 49 |             uv1: (0.0, 0.0),
 50 |             uv2: (0.0, 0.0),
 51 |             material: mat,
 52 |         }
 53 |     }
 54 | 
 55 |     pub fn barycenter(&self) -> SVector3 {
 56 |         (self.v0 + self.v1 + self.v2) * 3.0f32.recip()
 57 |     }
 58 | 
 59 |     pub fn intersect(&self, ray: &MRay, isect: MIntersection) -> MIntersection {
 60 |         // One would expect that if the triangle were represented as
 61 |         // (v0, e1, e2) instead of (v0, v1, v2), that would be faster because we
 62 |         // could avoid the subtractions here. My measurements show that the
 63 |         // converse is true.
 64 |         // TODO: Add a proper benchmark.
 65 |         let v0 = MVector3::broadcast(self.v0);
 66 | 
 67 |         // Note: broadcasting before doing the subtract, although it seems to
 68 |         // silly, improves performance by ~5 ns per intersection (25%).
 69 |         let e1 = MVector3::broadcast(self.v0) - MVector3::broadcast(self.v2);
 70 |         let e2 = MVector3::broadcast(self.v1) - MVector3::broadcast(self.v0);
 71 | 
 72 |         // All points P on the plane in which the triangle lies satisfy the
 73 |         // equation (P . normal) = c for a unique constant c determined by the
 74 |         // plane. (The dot denotes the dot product here.) To intersect the ray
 75 |         // with the plane, solve the equation (O + tD) . normal = c, where O
 76 |         // is the origin of the ray and D the direction. Note: if the ray
 77 |         // direction D is normalized, then t is the distance from the ray origin
 78 |         // to the plane. There is no need to normalize the triangle normal at
 79 |         // this point, because it appears both in the numerator and denominator.
 80 |         let normal_denorm = e1.cross(e2);
 81 |         let from_ray = v0 - ray.origin;
 82 | 
 83 |         // Use a true division (_mm256_div_ps), not the reciprocal approximation
 84 |         // (_mm256_rcp_ps) because the approximation is too inaccurate and
 85 |         // causes visual artifacts. The alternative is to use the approximation
 86 |         // with one Newton iteration, but that is slightly slower than just
 87 |         // doing the division. (Even though the microbenchmarks show that
 88 |         // `recip_precise` is faster than the division, when used in this
 89 |         // method, the division is faster.)
 90 |         let denom = Mf32::one() / ray.direction.dot(normal_denorm);
 91 |         let t = from_ray.dot(normal_denorm) * denom;
 92 | 
 93 |         // If the potential intersection is further away than the current
 94 |         // intersection for all of the rays, it is possible to early out. This
 95 |         // cranks up the number of branches from 209M/s to 256M/s and the
 96 |         // misprediction rate from 0.66% to 1.11%. Surprisingly, there is no
 97 |         // significant effect on the framerate. It appears that the early out
 98 |         // wins almost exactly cancel the mispredict penalty on my Skylake i7.
 99 |         // I opt for not poisioning the branch prediction cache here.
100 | 
101 |         // if (t - isect.distance).all_sign_bits_positive() {
102 |         //     return isect
103 |         // }
104 | 
105 |         // Express the location of the intersection in terms of the basis for
106 |         // the plane given by (-e1, e2). The computation of u and v is based on
107 |         // the method in this paper (there they are called alpha and beta):
108 |         // https://www.cs.utah.edu/~aek/research/triangle.pdf
109 |         let cross = ray.direction.cross(from_ray);
110 |         let u = cross.dot(e2) * denom;
111 |         let v = cross.dot(e1) * denom;
112 |         let w = (Mf32::one() - u) - v;
113 | 
114 |         // In this coordinate system, the triangle is the set of points such
115 |         // { (u, v) in plane | u >= 0 and v >= 0 and u + v <= 1 }
116 | 
117 |         // We need t to be positive, because we should not intersect backwards.
118 |         // Also, u and v need to be positive. We can abuse the vblendvps
119 |         // instruction, which considers only the sign bit, so if t, u, v, and w
120 |         // all have sign bit set to 0 (positive), then their bitwise or will
121 |         // have so too. If w is positive then u + v < 1.0.
122 |         let mask_positive = (t | u) | (v | w);
123 | 
124 |         // The intersection also needs to be closer than any previous
125 |         // intersection. (Again, do the reverse comparison because sign bit 1
126 |         // means discard intersection.)
127 |         let mask_closer = t.geq(isect.distance);
128 | 
129 |         // Interpolate the texture coordinates.
130 |         let (tx0x, tx0y) = (Mf32::broadcast(self.uv0.0), Mf32::broadcast(self.uv0.1));
131 |         let (tx1x, tx1y) = (Mf32::broadcast(self.uv1.0), Mf32::broadcast(self.uv1.1));
132 |         let (tx2x, tx2y) = (Mf32::broadcast(self.uv2.0), Mf32::broadcast(self.uv2.1));
133 |         let tex_x = tx0x.mul_add(w, tx1x.mul_add(v, tx2x * u));
134 |         let tex_y = tx0y.mul_add(w, tx1y.mul_add(v, tx2y * u));
135 | 
136 |         let new_isect = MIntersection {
137 |             position: ray.direction.mul_add(t, ray.origin),
138 |             normal: normal_denorm.normalized(),
139 |             distance: t,
140 |             material: MMaterial::broadcast_material(self.material),
141 |             tex_coords: (tex_x, tex_y),
142 |         };
143 | 
144 |         // Per ray, pick the new intersection if it is closer and if it was
145 |         // indeed an intersection of the triangle, or pick the previous
146 |         // intersection otherwise.
147 |         new_isect.pick(&isect, mask_positive | (ray.active | mask_closer))
148 |     }
149 | 
150 |     /// Intersects the triangle to determine the probability density for the
151 |     /// given ray.
152 |     pub fn intersect_direct(&self, ray: &MRay) -> MDirectIntersection {
153 |         // See `intersect()` for commented version.
154 |         let v0 = MVector3::broadcast(self.v0);
155 |         let e1 = MVector3::broadcast(self.v0) - MVector3::broadcast(self.v2);
156 |         let e2 = MVector3::broadcast(self.v1) - MVector3::broadcast(self.v0);
157 | 
158 |         let normal_denorm = e1.cross(e2);
159 |         let norm_sqr = normal_denorm.norm_squared();
160 |         let rnorm = norm_sqr.rsqrt();
161 |         let area = Mf32::broadcast(0.5) * rnorm.recip_fast();
162 |         let from_ray = v0 - ray.origin;
163 | 
164 |         // This version does not need to be as accurate as the regular intersect
165 |         // because it is only used to estimate probability densities. Hence the
166 |         // fast reciprocal approximation is fine here.
167 |         let denom = ray.direction.dot(normal_denorm).recip_fast();
168 |         let t = from_ray.dot(normal_denorm) * denom;
169 | 
170 |         let cross = ray.direction.cross(from_ray);
171 |         let u = cross.dot(e2) * denom;
172 |         let v = cross.dot(e1) * denom;
173 | 
174 |         // If the sign bit of mask is 0 (positive), the triangle was
175 |         // intersected.
176 |         let mask_uv = (u + v).geq(Mf32::one());
177 |         let mask = (u | v) | (t | mask_uv);
178 | 
179 |         MDirectIntersection {
180 |             normal: normal_denorm * rnorm,
181 |             distance: t,
182 |             area: area,
183 |             mask: mask,
184 |         }
185 |     }
186 | }
187 | 
188 | #[test]
189 | fn intersect_triangle() {
190 |     use ray::SRay;
191 | 
192 |     let triangle = Triangle::new(
193 |         SVector3::new(0.0, 1.0, 1.0),
194 |         SVector3::new(-1.0, -1.0, 1.0),
195 |         SVector3::new(1.0, -1.0, 1.0),
196 |         SMaterial::white(),
197 |     );
198 | 
199 |     let r1 = SRay {
200 |         origin: SVector3::zero(),
201 |         direction: SVector3::new(0.0, 0.0, 1.0),
202 |     };
203 | 
204 |     let r2 = SRay {
205 |         origin: SVector3::new(-1.0, 0.0, 0.0),
206 |         direction: SVector3::new(0.0, 0.0, 1.0),
207 |     };
208 | 
209 |     let ray = MRay::generate(|i| if i % 2 == 0 { r1.clone() } else { r2.clone() });
210 | 
211 |     let isect_far = MIntersection::with_max_distance(1e5);
212 |     let isect = triangle.intersect(&ray, isect_far);
213 | 
214 |     println!("distance is {}", isect.distance.0);
215 |     assert!(isect.distance.0 < 1.01);
216 |     assert!(isect.distance.0 > 0.99);
217 |     assert_eq!(isect.distance.1, 1e5);
218 | 
219 |     let up = MVector3::new(Mf32::zero(), Mf32::zero(), Mf32::one());
220 |     let should_be_origin = isect.position - up;
221 |     let should_be_zero = should_be_origin.norm_squared();
222 |     assert!(should_be_zero.0 < 0.01);
223 | }
224 | 
225 | #[test]
226 | fn intersect_triangle_direct() {
227 |     use ray::SRay;
228 | 
229 |     let triangle = Triangle::new(
230 |         SVector3::new(0.0, 1.0, 1.0),
231 |         SVector3::new(-1.0, -1.0, 1.0),
232 |         SVector3::new(1.0, -1.0, 1.0),
233 |         SMaterial::white(),
234 |     );
235 | 
236 |     let r1 = SRay {
237 |         origin: SVector3::zero(),
238 |         direction: SVector3::new(0.0, 0.0, 1.0),
239 |     };
240 | 
241 |     let r2 = SRay {
242 |         origin: SVector3::new(-1.0, 0.0, 0.0),
243 |         direction: SVector3::new(0.0, 0.0, 1.0),
244 |     };
245 | 
246 |     let ray = MRay::generate(|i| if i % 2 == 0 { r1.clone() } else { r2.clone() });
247 | 
248 |     let isect_direct = triangle.intersect_direct(&ray);
249 |     assert!(isect_direct.distance.0 < 1.01);
250 |     assert!(isect_direct.distance.0 > 0.99);
251 |     assert!(isect_direct.distance.1 < 1.01);
252 |     assert!(isect_direct.distance.1 > 0.99);
253 | 
254 |     let normal_norm = isect_direct.normal.norm_squared();
255 |     assert!(normal_norm.0 < 1.01);
256 |     assert!(normal_norm.0 > 0.99);
257 |     assert!(normal_norm.1 < 1.01);
258 |     assert!(normal_norm.1 > 0.99);
259 | }
260 | 
261 | #[bench]
262 | fn bench_intersect_8_mrays_per_tri(b: &mut test::Bencher) {
263 |     let rays = bench::mrays_inward(4096 / 8);
264 |     let tris = bench::triangles(4096);
265 |     let mut rays_it = rays.iter().cycle();
266 |     let mut tris_it = tris.iter().cycle();
267 |     b.iter(|| {
268 |         let triangle = tris_it.next().unwrap();
269 |         for _ in 0..8 {
270 |             let ray = rays_it.next().unwrap();
271 |             let isect = MIntersection::with_max_distance(1e5);
272 |             test::black_box(triangle.intersect(&ray, isect));
273 |         }
274 |     });
275 | }
276 | 
277 | #[bench]
278 | fn bench_intersect_8_tris_per_mray(b: &mut test::Bencher) {
279 |     let rays = bench::mrays_inward(4096 / 8);
280 |     let tris = bench::triangles(4096);
281 |     let mut rays_it = rays.iter().cycle();
282 |     let mut tris_it = tris.iter().cycle();
283 |     b.iter(|| {
284 |         let ray = rays_it.next().unwrap();
285 |         let mut isect = MIntersection::with_max_distance(1e5);
286 |         for _ in 0..8 {
287 |             let triangle = tris_it.next().unwrap();
288 |             isect = triangle.intersect(&ray, isect);
289 |         }
290 |         test::black_box(isect);
291 |     });
292 | }
293 | 
294 | #[bench]
295 | fn bench_intersect_direct_8_tris_per_mray(b: &mut test::Bencher) {
296 |     let rays = bench::mrays_inward(4096 / 8);
297 |     let tris = bench::triangles(8);
298 |     let mut rays_it = rays.iter().cycle();
299 |     b.iter(|| {
300 |         let ray = rays_it.next().unwrap();
301 |         for triangle in &tris {
302 |             test::black_box(triangle.intersect_direct(&ray));
303 |         }
304 |     });
305 | }
306 | 


--------------------------------------------------------------------------------
/src/ui.rs:
--------------------------------------------------------------------------------
  1 | // Convector -- An interactive CPU path tracer
  2 | // Copyright 2016 Ruud van Asseldonk
  3 | 
  4 | // This program is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License version 3. A copy
  6 | // of the License is available in the root of the repository.
  7 | 
  8 | //! This module handles user input and getting pixels onto the screen. It uses
  9 | //! the Glium library, a safe wrapper around OpenGL.
 10 | 
 11 | use filebuffer::FileBuffer;
 12 | use glium::{DisplayBuild, Program, Surface, VertexBuffer};
 13 | use glium::backend::Facade;
 14 | use glium::backend::glutin_backend::GlutinFacade;
 15 | use glium::glutin::{Event, WindowBuilder};
 16 | use glium::index::{NoIndices, PrimitiveType};
 17 | use glium::texture::{MipmapsOption, RawImage2d, SrgbTexture2d, Texture2d};
 18 | use stats::GlobalStats;
 19 | use std::str;
 20 | use time::PreciseTime;
 21 | 
 22 | /// Vertex for the full-screen quad.
 23 | #[derive(Copy, Clone)]
 24 | struct Vertex {
 25 |     position: [f32; 2],
 26 |     tex_coords: [f32; 2],
 27 | }
 28 | 
 29 | implement_vertex!(Vertex, position, tex_coords);
 30 | 
 31 | /// A full-screen quad that can be rendered by OpenGL.
 32 | struct FullScreenQuad {
 33 |     vertex_buffer: VertexBuffer<Vertex>,
 34 |     indices: NoIndices,
 35 |     program_blend: Program,
 36 |     program_gbuffer: Program,
 37 |     program_id: Program,
 38 |     program_median: Program,
 39 | }
 40 | 
 41 | impl FullScreenQuad {
 42 |     /// Sets up the vertex buffer and shader for a full-screen quad.
 43 |     pub fn new<F: Facade>(facade: &F) -> FullScreenQuad {
 44 |         let vertex1 = Vertex { position: [-1.0, -1.0], tex_coords: [0.0, 0.0] };
 45 |         let vertex2 = Vertex { position: [ 1.0, -1.0], tex_coords: [1.0, 0.0] };
 46 |         let vertex3 = Vertex { position: [-1.0,  1.0], tex_coords: [0.0, 1.0] };
 47 |         let vertex4 = Vertex { position: [ 1.0,  1.0], tex_coords: [1.0, 1.0] };
 48 |         let quad = vec![vertex1, vertex2, vertex3, vertex4];
 49 |         let vertex_buffer = VertexBuffer::new(facade, &quad).unwrap();
 50 |         let indices = NoIndices(PrimitiveType::TriangleStrip);
 51 | 
 52 |         let vertex_shader = FileBuffer::open("src/gpu/vertex.glsl")
 53 |             .expect("failed to load vertex shader source");
 54 | 
 55 |         let program_blend = {
 56 |             let fragment_shader = FileBuffer::open("src/gpu/blend.glsl")
 57 |                 .expect("failed to load fragment shader source");
 58 | 
 59 |             Program::from_source(facade,
 60 |                                  str::from_utf8(&vertex_shader[..]).unwrap(),
 61 |                                  str::from_utf8(&fragment_shader[..]).unwrap(),
 62 |                                  None)
 63 |                 .unwrap()
 64 |         };
 65 | 
 66 |         let program_gbuffer = {
 67 |             let fragment_shader = FileBuffer::open("src/gpu/gbuffer.glsl")
 68 |                 .expect("failed to load fragment shader source");
 69 | 
 70 |             Program::from_source(facade,
 71 |                                  str::from_utf8(&vertex_shader[..]).unwrap(),
 72 |                                  str::from_utf8(&fragment_shader[..]).unwrap(),
 73 |                                  None)
 74 |                 .unwrap()
 75 |         };
 76 | 
 77 |         let program_id = {
 78 |             let fragment_shader = FileBuffer::open("src/gpu/id.glsl")
 79 |                 .expect("failed to load fragment shader source");
 80 | 
 81 |             Program::from_source(facade,
 82 |                                  str::from_utf8(&vertex_shader[..]).unwrap(),
 83 |                                  str::from_utf8(&fragment_shader[..]).unwrap(),
 84 |                                  None)
 85 |                 .unwrap()
 86 |         };
 87 | 
 88 |         let program_median = {
 89 |             let fragment_shader = FileBuffer::open("src/gpu/median.glsl")
 90 |                 .expect("failed to load fragment shader source");
 91 | 
 92 |             Program::from_source(facade,
 93 |                                  str::from_utf8(&vertex_shader[..]).unwrap(),
 94 |                                  str::from_utf8(&fragment_shader[..]).unwrap(),
 95 |                                  None)
 96 |                 .unwrap()
 97 |         };
 98 | 
 99 |         FullScreenQuad {
100 |             vertex_buffer: vertex_buffer,
101 |             indices: indices,
102 |             program_blend: program_blend,
103 |             program_gbuffer: program_gbuffer,
104 |             program_id: program_id,
105 |             program_median: program_median,
106 |         }
107 |     }
108 | 
109 |     /// Renders the frames blended to the target surface.
110 |     pub fn draw_blended<S: Surface>(&self, target: &mut S, frames: &[Texture2d]) {
111 |         let uniforms = uniform! {
112 |             frame0: &frames[0],
113 |             frame1: &frames[1],
114 |             frame2: &frames[2],
115 |             frame3: &frames[3],
116 |             frame4: &frames[4],
117 |             frame5: &frames[5],
118 |             frame6: &frames[6],
119 |             frame7: &frames[7],
120 |         };
121 |         target.draw(&self.vertex_buffer,
122 |                   &self.indices,
123 |                   &self.program_blend,
124 |                   &uniforms,
125 |                   &Default::default())
126 |             .expect("failed to draw quad");
127 |     }
128 | 
129 |     /// Renders a single frame to the target surface.
130 |     pub fn draw_single<S: Surface>(&self, target: &mut S, frame: &Texture2d) {
131 |         // Draw blended as well, but blend between the same frame.
132 |         let uniforms = uniform! {
133 |             frame0: frame,
134 |             frame1: frame,
135 |             frame2: frame,
136 |             frame3: frame,
137 |             frame4: frame,
138 |             frame5: frame,
139 |             frame6: frame,
140 |             frame7: frame,
141 |         };
142 |         target.draw(&self.vertex_buffer,
143 |                   &self.indices,
144 |                   &self.program_blend,
145 |                   &uniforms,
146 |                   &Default::default())
147 |             .expect("failed to draw quad");
148 |     }
149 | 
150 |     /// Draws the source onto the target.
151 |     ///
152 |     /// This does not have the same effect as using `source.fill()`, because
153 |     /// that does not apply the linear RGB -> sRGB conversion when the target is
154 |     /// the framebuffer, whereas this method does (this is handled automatically
155 |     /// by OpenGL).
156 |     pub fn draw_id<S: Surface>(&self, target: &mut S, source: &Texture2d) {
157 |         let uniforms = uniform! {
158 |             frame: source,
159 |         };
160 |         target.draw(&self.vertex_buffer,
161 |                   &self.indices,
162 |                   &self.program_id,
163 |                   &uniforms,
164 |                   &Default::default())
165 |             .expect("failed to draw quad");
166 |     }
167 | 
168 |     /// Applies the gbuffer shader for texture filtering.
169 |     pub fn draw_gbuffer<S: Surface>(&self,
170 |                                     target: &mut S,
171 |                                     frame: &Texture2d,
172 |                                     gbuffer: &Texture2d,
173 |                                     textures: &[SrgbTexture2d]) {
174 |         let uniforms = uniform! {
175 |             frame: frame,
176 |             gbuffer: gbuffer,
177 |             texture1: &textures[0],
178 |             texture2: &textures[1],
179 |         };
180 |         target.draw(&self.vertex_buffer,
181 |                   &self.indices,
182 |                   &self.program_gbuffer,
183 |                   &uniforms,
184 |                   &Default::default())
185 |             .expect("failed to draw quad");
186 |     }
187 | 
188 |     /// Applies a median filter to the source and draws that to the target.
189 |     pub fn draw_median<S: Surface>(&self,
190 |                                    target: &mut S,
191 |                                    source: &Texture2d,
192 |                                    width: u32,
193 |                                    height: u32) {
194 |         let uniforms = uniform! {
195 |             frame: source,
196 |             pixel_size: [1.0 / width as f32, 1.0 / height as f32],
197 |         };
198 |         target.draw(&self.vertex_buffer,
199 |                   &self.indices,
200 |                   &self.program_median,
201 |                   &uniforms,
202 |                   &Default::default())
203 |             .expect("failed to draw quad");
204 |     }
205 | }
206 | 
207 | pub struct Window {
208 |     display: GlutinFacade,
209 |     quad: FullScreenQuad,
210 |     frames: [Texture2d; 8],
211 |     scratch: Texture2d,
212 |     gbuffer_texture: Texture2d,
213 |     textures: Vec<SrgbTexture2d>,
214 |     frame_index: u32,
215 |     enable_blend: bool,
216 |     enable_median: bool,
217 |     width: u32,
218 |     height: u32,
219 | }
220 | 
221 | pub enum Action {
222 |     DumpTrace,
223 |     None,
224 |     PrintStats,
225 |     Quit,
226 |     ToggleDebugView,
227 |     ToggleRealtime,
228 | }
229 | 
230 | fn black_bitmap(width: u32, height: u32) -> Vec<u8> {
231 |     let size = width * height * 4;
232 |     let mut bitmap = Vec::with_capacity(size as usize);
233 |     for _ in 0..size {
234 |         bitmap.push(0);
235 |     }
236 |     bitmap
237 | }
238 | 
239 | impl Window {
240 |     /// Opens a new window using Glutin.
241 |     pub fn new(width: u32, height: u32, title: &str) -> Window {
242 |         use std::mem;
243 | 
244 |         // TODO: Proper HiDPI support.
245 |         let display = WindowBuilder::new()
246 |             .with_dimensions(width, height)
247 |             .with_title(String::from(title))
248 |             .with_srgb(Some(true)) // Automatically convert RGB -> sRGB.
249 |             .with_vsync()
250 |             .build_glium()
251 |             .expect("failed to create gl window");
252 | 
253 |         let quad = FullScreenQuad::new(&display);
254 | 
255 |         let scratch = Texture2d::empty(&display, width, height)
256 |             .expect("failed to create scratch texture");
257 | 
258 |         let gbuffer_tex = Texture2d::empty(&display, width, height)
259 |             .expect("failed to create scratch texture");
260 | 
261 |         let mut window = Window {
262 |             display: display,
263 |             quad: quad,
264 |             frames: unsafe { mem::uninitialized() },
265 |             scratch: scratch,
266 |             gbuffer_texture: gbuffer_tex,
267 |             textures: Vec::new(),
268 |             frame_index: 0,
269 |             enable_blend: true,
270 |             enable_median: true,
271 |             width: width,
272 |             height: height,
273 |         };
274 | 
275 |         let f0 = window.upload_frame(black_bitmap(width, height));
276 |         let f1 = window.upload_frame(black_bitmap(width, height));
277 |         let f2 = window.upload_frame(black_bitmap(width, height));
278 |         let f3 = window.upload_frame(black_bitmap(width, height));
279 |         let f4 = window.upload_frame(black_bitmap(width, height));
280 |         let f5 = window.upload_frame(black_bitmap(width, height));
281 |         let f6 = window.upload_frame(black_bitmap(width, height));
282 |         let f7 = window.upload_frame(black_bitmap(width, height));
283 |         let frames = [f0, f1, f2, f3, f4, f5, f6, f7];
284 | 
285 |         // Put the frames in place and avoid deallocating uninitialized memory.
286 |         mem::forget(mem::replace(&mut window.frames, frames));
287 | 
288 |         window
289 |     }
290 | 
291 |     fn upload_frame(&mut self, bitmap: Vec<u8>) -> Texture2d {
292 |         let dimensions = (self.width, self.height);
293 |         let texture_data = RawImage2d::from_raw_rgba(bitmap, dimensions);
294 |         let texture = Texture2d::with_mipmaps(&self.display, texture_data, MipmapsOption::NoMipmap)
295 |             .expect("failed to create texture");
296 |         texture
297 |     }
298 | 
299 |     /// Uploads a texture to the GPU. This is intended for the textures that are
300 |     /// used for the scene, not the full-screen rendered frames. Texture
301 |     /// dimensions must be 1024 x 1024.
302 |     pub fn upload_texture(&mut self, bitmap: Vec<u8>) {
303 |         assert_eq!(bitmap.len(), 1024 * 1024 * 3);
304 | 
305 |         let texture_data = RawImage2d::from_raw_rgb(bitmap, (1024, 1024));
306 |         let texture =
307 |             SrgbTexture2d::with_mipmaps(&self.display, texture_data, MipmapsOption::NoMipmap)
308 |                 .expect("failed to create texture");
309 | 
310 |         self.textures.push(texture);
311 |     }
312 | 
313 |     pub fn display_buffer(&mut self,
314 |                           rgba_buffer: Vec<u8>,
315 |                           gbuffer: Vec<u8>,
316 |                           stats: &mut GlobalStats) {
317 |         assert_eq!(rgba_buffer.len(),
318 |                    self.width as usize * self.height as usize * 4);
319 | 
320 |         let begin_texture = PreciseTime::now();
321 | 
322 |         // Upload the render result to the GPU. It is not yet correct, it needs
323 |         // a gbuffer pass to add the textures.
324 |         self.scratch = self.upload_frame(rgba_buffer);
325 |         self.gbuffer_texture = self.upload_frame(gbuffer);
326 | 
327 |         // TODO: Fix timers and trace here.
328 | 
329 |         // Apply the gbuffer pass and render into one of the eight frames that
330 |         // are kept on the GPU.
331 |         self.frame_index = (self.frame_index + 1) % 8;
332 |         let frame_index = self.frame_index as usize;
333 |         let mut target = self.frames[frame_index].as_surface();
334 |         self.quad.draw_gbuffer(&mut target,
335 |                                &self.scratch,
336 |                                &self.gbuffer_texture,
337 |                                &self.textures[..]);
338 | 
339 |         let begin_draw = PreciseTime::now();
340 | 
341 |         // Blend the past eight frames together into the scratch texture. (Or
342 |         // not, if blending is disabled.)
343 |         let mut target = self.scratch.as_surface();
344 |         if self.enable_blend {
345 |             self.quad.draw_blended(&mut target, &self.frames[..]);
346 |         } else {
347 |             self.quad.draw_single(&mut target, &self.frames[frame_index]);
348 |         }
349 | 
350 |         // Apply a median filter to the scratch texture (or not if disabled) and
351 |         // display that.  Finishing drawing will swap the buffers and wait for a
352 |         // vsync.
353 |         let mut target = self.display.draw();
354 |         if self.enable_median {
355 |             self.quad.draw_median(&mut target, &self.scratch, self.width, self.height);
356 |         } else {
357 |             self.quad.draw_id(&mut target, &self.scratch);
358 |         }
359 |         target.finish().expect("failed to swap buffers");
360 | 
361 |         let end_draw = PreciseTime::now();
362 |         stats.tex_upload_us.insert_time_us(begin_texture.to(begin_draw));
363 |         stats.draw_vsync_us.insert_time_us(begin_draw.to(end_draw));
364 |     }
365 | 
366 |     /// Handles all window events and returns an action to be performed.
367 |     pub fn handle_events(&mut self) -> Action {
368 |         for ev in self.display.poll_events() {
369 |             match ev {
370 |                 // Window was closed by the user.
371 |                 Event::Closed => return Action::Quit,
372 |                 // The user pressed 'b' to toggle blending.
373 |                 Event::ReceivedCharacter('b') => self.enable_blend = !self.enable_blend,
374 |                 // The user pressed 'd' to toggle debug view.
375 |                 Event::ReceivedCharacter('d') => return Action::ToggleDebugView,
376 |                 // The user pressed 'm' to toggle the median filter.
377 |                 Event::ReceivedCharacter('m') => self.enable_median = !self.enable_median,
378 |                 // The user pressed 'q' for quit.
379 |                 Event::ReceivedCharacter('q') => return Action::Quit,
380 |                 // The user pressed 'r' to toggle the render mode.
381 |                 Event::ReceivedCharacter('r') => return Action::ToggleRealtime,
382 |                 // The user pressed 's' for stats.
383 |                 Event::ReceivedCharacter('s') => return Action::PrintStats,
384 |                 // The user pressed 't' for trace.
385 |                 Event::ReceivedCharacter('t') => return Action::DumpTrace,
386 |                 // Something else.
387 |                 _ => (),
388 |             }
389 |         }
390 |         Action::None
391 |     }
392 | }
393 | 


--------------------------------------------------------------------------------
/src/util.rs:
--------------------------------------------------------------------------------
 1 | // Convector -- An interactive CPU path tracer
 2 | // Copyright 2016 Ruud van Asseldonk
 3 | 
 4 | // This program is free software: you can redistribute it and/or modify
 5 | // it under the terms of the GNU General Public License version 3. A copy
 6 | // of the License is available in the root of the repository.
 7 | 
 8 | //! A mod with utility functions.
 9 | 
10 | use alloc::heap;
11 | use std::mem;
12 | 
13 | /// Allocates a buffer for the specified number of elements, aligned to a cache
14 | /// line.
15 | pub fn cache_line_aligned_vec<T>(len: usize) -> Vec<T> {
16 |     unsafe {
17 |         let num_bytes = mem::size_of::<T>() * len;
18 |         let cache_line_len = 64;
19 |         let buffer = heap::allocate(num_bytes, cache_line_len);
20 |         let ptr: *mut T = mem::transmute(buffer);
21 |         Vec::from_raw_parts(ptr, 0, len)
22 |     }
23 | }
24 | 
25 | /// Drops a vector that was constructed with `cache_line_aligned_vec` without
26 | /// crashing on Windows.
27 | pub fn drop_cache_line_aligned_vec<T>(mut v: Vec<T>) {
28 |     unsafe {
29 |         let ptr: *mut u8 = mem::transmute(v.as_mut_ptr());
30 |         let num_bytes = v.capacity() * mem::size_of::<T>();
31 | 
32 |         // Prevent the destructor from freeing anything.
33 |         mem::forget(v);
34 | 
35 |         // Then free manually.
36 |         let cache_line_len = 64;
37 |         heap::deallocate(ptr, num_bytes, cache_line_len);
38 |     }
39 | }
40 | 
41 | /// Transmutes the buffer of a vector into a buffer of elements with a different
42 | /// type. The sizes of the types must be multiples of each other.
43 | pub unsafe fn transmute_vec<T, U>(mut v: Vec<T>) -> Vec<U> {
44 |     let cap_bytes = mem::size_of::<T>() * v.capacity();
45 |     let len_bytes = mem::size_of::<T>() * v.len();
46 | 
47 |     let new_cap = cap_bytes / mem::size_of::<U>();
48 |     let new_len = len_bytes / mem::size_of::<U>();
49 | 
50 |     assert_eq!(cap_bytes, new_cap * mem::size_of::<U>());
51 |     assert_eq!(len_bytes, new_len * mem::size_of::<U>());
52 | 
53 |     let ptr: *mut U = mem::transmute(v.as_mut_ptr());
54 | 
55 |     // Prevent running the destructor of v, we are going to reuse its internals.
56 |     mem::forget(v);
57 | 
58 |     Vec::from_raw_parts(ptr, new_len, new_cap)
59 | }
60 | 
61 | /// Transmutes an immutable slice into a mutable slice.
62 | #[allow(mutable_transmutes)]
63 | pub unsafe fn make_mutable<T>(x: &[T]) -> &mut [T] {
64 |     // UnsafeCell is a real pain to deal with; after 15 minutes I still did not
65 |     // manage to write something that compiles. Just transmute the mutability
66 |     // in.
67 |     mem::transmute(x)
68 | }
69 | 
70 | /// Builds a fixed-size slice by calling f for every index.
71 | pub fn generate_slice8<T, F>(mut f: F) -> [T; 8]
72 |     where F: FnMut(usize) -> T
73 | {
74 |     [f(0), f(1), f(2), f(3), f(4), f(5), f(6), f(7)]
75 | }
76 | 


--------------------------------------------------------------------------------
/src/vector3.rs:
--------------------------------------------------------------------------------
  1 | // Convector -- An interactive CPU path tracer
  2 | // Copyright 2016 Ruud van Asseldonk
  3 | 
  4 | // This program is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License version 3. A copy
  6 | // of the License is available in the root of the repository.
  7 | 
  8 | //! Implements vectors in R3.
  9 | 
 10 | use simd::{Mask, Mf32};
 11 | use std::f32;
 12 | use std::fmt;
 13 | use std::ops::{Add, Sub, Neg, Mul};
 14 | 
 15 | #[cfg(test)]
 16 | use {bench, test};
 17 | 
 18 | #[derive(Copy, Clone, Debug, PartialEq)]
 19 | pub struct SVector3 {
 20 |     pub x: f32,
 21 |     pub y: f32,
 22 |     pub z: f32,
 23 | }
 24 | 
 25 | #[derive(Copy, Clone, Debug, PartialEq)]
 26 | pub struct MVector3 {
 27 |     pub x: Mf32,
 28 |     pub y: Mf32,
 29 |     pub z: Mf32,
 30 | }
 31 | 
 32 | #[derive(Copy, Clone, Debug)]
 33 | pub enum Axis {
 34 |     X,
 35 |     Y,
 36 |     Z,
 37 | }
 38 | 
 39 | impl SVector3 {
 40 |     pub fn new(x: f32, y: f32, z: f32) -> SVector3 {
 41 |         SVector3 { x: x, y: y, z: z }
 42 |     }
 43 | 
 44 |     pub fn zero() -> SVector3 {
 45 |         SVector3::new(0.0, 0.0, 0.0)
 46 |     }
 47 | 
 48 |     pub fn one() -> SVector3 {
 49 |         SVector3::new(1.0, 1.0, 1.0)
 50 |     }
 51 | 
 52 |     #[inline(always)]
 53 |     pub fn cross_naive(self: SVector3, other: SVector3) -> SVector3 {
 54 |         let (a, b) = (self, other);
 55 |         SVector3 {
 56 |             x: a.y * b.z - a.z * b.y,
 57 |             y: a.z * b.x - a.x * b.z,
 58 |             z: a.x * b.y - a.y * b.x,
 59 |         }
 60 |     }
 61 | 
 62 |     #[inline(always)]
 63 |     pub fn cross_fma(self: SVector3, other: SVector3) -> SVector3 {
 64 |         let (a, b) = (self, other);
 65 |         SVector3 {
 66 |             x: a.y.mul_add(b.z, -a.z * b.y),
 67 |             y: a.z.mul_add(b.x, -a.x * b.z),
 68 |             z: a.x.mul_add(b.y, -a.y * b.x),
 69 |         }
 70 |     }
 71 | 
 72 |     pub fn cross(self, other: SVector3) -> SVector3 {
 73 |         // Benchmarks show that the FMA version is faster than the
 74 |         // naive version (1.9 ns vs 2.1 ns on my Skylake i7). **However**
 75 |         // the "fma" codegen feature must be enabled, otherwise the naive
 76 |         // version is faster.
 77 |         self.cross_fma(other)
 78 |     }
 79 | 
 80 |     #[inline(always)]
 81 |     pub fn dot_naive(self, other: SVector3) -> f32 {
 82 |         let (a, b) = (self, other);
 83 |         a.x * b.x + a.y * b.y + a.z * b.z
 84 |     }
 85 | 
 86 |     #[inline(always)]
 87 |     pub fn dot_fma(self, other: SVector3) -> f32 {
 88 |         let (a, b) = (self, other);
 89 |         a.x.mul_add(b.x, a.y.mul_add(b.y, a.z * b.z))
 90 |     }
 91 | 
 92 |     pub fn dot(self, other: SVector3) -> f32 {
 93 |         // Benchmarks show that the naive version is faster than the FMA version
 94 |         // when the "fma" codegen feature is not enabled, but when it is the
 95 |         // performance is similar. The FMA version appears to be slightly more
 96 |         // stable.
 97 |         self.dot_fma(other)
 98 |     }
 99 | 
100 |     pub fn norm_squared(self) -> f32 {
101 |         self.dot(self)
102 |     }
103 | 
104 |     pub fn normalized(self) -> SVector3 {
105 |         let norm_squared = self.norm_squared();
106 |         if norm_squared == 0.0 {
107 |             self
108 |         } else {
109 |             let rnorm = norm_squared.sqrt().recip();
110 |             SVector3 {
111 |                 x: self.x * rnorm,
112 |                 y: self.y * rnorm,
113 |                 z: self.z * rnorm,
114 |             }
115 |         }
116 |     }
117 | 
118 |     pub fn get_coord(self, axis: Axis) -> f32 {
119 |         match axis {
120 |             Axis::X => self.x,
121 |             Axis::Y => self.y,
122 |             Axis::Z => self.z,
123 |         }
124 |     }
125 | 
126 |     /// Returns the coordinatewise minimum of the two vectors.
127 |     pub fn min(self, other: SVector3) -> SVector3 {
128 |         SVector3 {
129 |             x: f32::min(self.x, other.x),
130 |             y: f32::min(self.y, other.y),
131 |             z: f32::min(self.z, other.z),
132 |         }
133 |     }
134 | 
135 |     /// Returns the coordinatewise maximum of the two vectors.
136 |     pub fn max(self, other: SVector3) -> SVector3 {
137 |         SVector3 {
138 |             x: f32::max(self.x, other.x),
139 |             y: f32::max(self.y, other.y),
140 |             z: f32::max(self.z, other.z),
141 |         }
142 |     }
143 | }
144 | 
145 | impl MVector3 {
146 |     pub fn new(x: Mf32, y: Mf32, z: Mf32) -> MVector3 {
147 |         MVector3 { x: x, y: y, z: z }
148 |     }
149 | 
150 |     pub fn zero() -> MVector3 {
151 |         MVector3::new(Mf32::zero(), Mf32::zero(), Mf32::zero())
152 |     }
153 | 
154 |     pub fn broadcast(a: SVector3) -> MVector3 {
155 |         MVector3 {
156 |             x: Mf32::broadcast(a.x),
157 |             y: Mf32::broadcast(a.y),
158 |             z: Mf32::broadcast(a.z),
159 |         }
160 |     }
161 | 
162 |     /// Builds an mvector by applying the function to the numbers 0..7.
163 |     ///
164 |     /// Note: this is essentially a transpose, avoid in hot code.
165 |     pub fn generate<F>(mut f: F) -> MVector3
166 |         where F: FnMut(usize) -> SVector3
167 |     {
168 |         MVector3 {
169 |             x: Mf32::generate(|i| f(i).x),
170 |             y: Mf32::generate(|i| f(i).y),
171 |             z: Mf32::generate(|i| f(i).z),
172 |         }
173 |     }
174 | 
175 |     #[inline(always)]
176 |     pub fn cross_naive(self, other: MVector3) -> MVector3 {
177 |         let (a, b) = (self, other);
178 |         MVector3 {
179 |             x: a.y * b.z - a.z * b.y,
180 |             y: a.z * b.x - a.x * b.z,
181 |             z: a.x * b.y - a.y * b.x,
182 |         }
183 |     }
184 | 
185 |     #[inline(always)]
186 |     pub fn cross_fma(self, other: MVector3) -> MVector3 {
187 |         let (a, b) = (self, other);
188 |         MVector3 {
189 |             x: a.y.mul_sub(b.z, a.z * b.y),
190 |             y: a.z.mul_sub(b.x, a.x * b.z),
191 |             z: a.x.mul_sub(b.y, a.y * b.x),
192 |         }
193 |     }
194 | 
195 |     pub fn cross(self, other: MVector3) -> MVector3 {
196 |         // Benchmarks show that the FMA version is faster than the
197 |         // naive version (2.1 ns vs 2.4 ns on my Skylake i7).
198 |         self.cross_fma(other)
199 |     }
200 | 
201 |     #[inline(always)]
202 |     pub fn dot_naive(self, other: MVector3) -> Mf32 {
203 |         let (a, b) = (self, other);
204 |         a.x * b.x + a.y * b.y + a.z * b.z
205 |     }
206 | 
207 |     #[inline(always)]
208 |     pub fn dot_fma(self, other: MVector3) -> Mf32 {
209 |         let (a, b) = (self, other);
210 |         a.x.mul_add(b.x, a.y.mul_add(b.y, a.z * b.z))
211 |     }
212 | 
213 |     pub fn dot(self, other: MVector3) -> Mf32 {
214 |         // Benchmarks show no performance difference between the naive version
215 |         // and the FMA version. Use the naive one because it is more portable.
216 |         self.dot_naive(other)
217 |     }
218 | 
219 |     /// Given a vector in the hemisphere with pole at the positive z-axis,
220 |     /// rotates the vector into the hemisphere with pole given by the normal.
221 |     pub fn rotate_hemisphere(self, normal: MVector3) -> MVector3 {
222 |         // If the z-component of the normal is near -1, we might divide by 0. To
223 |         // avoid this, if the z-component is negative, flip the normal. Then we
224 |         // end up in the wrong hemisphere, so at the end, flip the computed
225 |         // vector again.
226 |         let n = normal.pick(-normal, normal.z);
227 | 
228 |         // One option here would be to take the cross product of the normal and
229 |         // an up vector, and the cross product of the normal with that vector,
230 |         // to get a new orthonormal basis. Then use the old coordinates in this
231 |         // new basis. The method below -- however not as simple -- requires less
232 |         // arithmetic operations.
233 |         // Based on https://math.stackexchange.com/a/61550/6873.
234 |         let v = self;
235 | 
236 |         // Using the fast reciprocal instead of the precise one does hurt
237 |         // precision, but this is used only after the first bounce, so it is
238 |         // less of an issue, and the performance difference is significant. The
239 |         // inaccurate version is about 15% faster.
240 |         let rz = (Mf32::one() + n.z).recip_fast();
241 | 
242 |         let c = n.x * n.y * rz;
243 |         let x = v.x.mul_sub(n.y.mul_add(n.y * rz, n.z), v.y.mul_sub(c, v.z * n.x));
244 |         let y = v.x.neg_mul_add(c, v.y.mul_add(n.x.mul_add(n.x * rz, n.z), v.z * n.y));
245 |         let z = v.x.neg_mul_add(n.x, v.y.neg_mul_add(n.y, v.z * n.z));
246 | 
247 |         let result = MVector3::new(x, y, z);
248 | 
249 |         // If we flipped the normal, flip the result too.
250 |         result.pick(-result, normal.z)
251 |     }
252 | 
253 |     /// Scalar multiplication and vector add using fused multiply-add.
254 |     pub fn mul_add(self, factor: Mf32, other: MVector3) -> MVector3 {
255 |         MVector3 {
256 |             x: self.x.mul_add(factor, other.x),
257 |             y: self.y.mul_add(factor, other.y),
258 |             z: self.z.mul_add(factor, other.z),
259 |         }
260 |     }
261 | 
262 |     /// Scalar multiplication with -factor and vector add using fused multiply-add.
263 |     pub fn neg_mul_add(self, factor: Mf32, other: MVector3) -> MVector3 {
264 |         MVector3 {
265 |             x: self.x.neg_mul_add(factor, other.x),
266 |             y: self.y.neg_mul_add(factor, other.y),
267 |             z: self.z.neg_mul_add(factor, other.z),
268 |         }
269 |     }
270 | 
271 |     /// Scalar multiplication and vector subtract using fused multiply-subtract.
272 |     pub fn mul_sub(self, factor: Mf32, other: MVector3) -> MVector3 {
273 |         MVector3 {
274 |             x: self.x.mul_sub(factor, other.x),
275 |             y: self.y.mul_sub(factor, other.y),
276 |             z: self.z.mul_sub(factor, other.z),
277 |         }
278 |     }
279 | 
280 |     /// Multiplies two vectors coordinatewise.
281 |     pub fn mul_coords(self, factors: MVector3) -> MVector3 {
282 |         MVector3 {
283 |             x: self.x * factors.x,
284 |             y: self.y * factors.y,
285 |             z: self.z * factors.z,
286 |         }
287 |     }
288 | 
289 |     /// Returns ||self|| * ||self||.
290 |     pub fn norm_squared(self) -> Mf32 {
291 |         self.dot(self)
292 |     }
293 | 
294 |     /// Returns 1 / ||self||.
295 |     pub fn rnorm(self) -> Mf32 {
296 |         self.norm_squared().rsqrt()
297 |     }
298 | 
299 |     pub fn normalized(self) -> MVector3 {
300 |         let rnorm = self.rnorm();
301 |         MVector3 {
302 |             x: self.x * rnorm,
303 |             y: self.y * rnorm,
304 |             z: self.z * rnorm,
305 |         }
306 |     }
307 | 
308 |     /// Clamps every coordinate to 1.0 if it exceeds 1.0.
309 |     pub fn clamp_one(self) -> MVector3 {
310 |         MVector3 {
311 |             x: Mf32::one().min(self.x),
312 |             y: Mf32::one().min(self.y),
313 |             z: Mf32::one().min(self.z),
314 |         }
315 |     }
316 | 
317 |     /// Picks self if the sign bit of mask is 0, or picks other if it is 1.
318 |     pub fn pick(self, other: MVector3, mask: Mask) -> MVector3 {
319 |         MVector3 {
320 |             x: self.x.pick(other.x, mask),
321 |             y: self.y.pick(other.y, mask),
322 |             z: self.z.pick(other.z, mask),
323 |         }
324 |     }
325 | 
326 |     /// Returns whether all components are finite.
327 |     ///
328 |     /// This is slow, use only for diagnostic purposes.
329 |     pub fn all_finite(self) -> bool {
330 |         self.x.all_finite() && self.y.all_finite() && self.z.all_finite()
331 |     }
332 | }
333 | 
334 | impl Add for SVector3 {
335 |     type Output = SVector3;
336 | 
337 |     fn add(self, other: SVector3) -> SVector3 {
338 |         SVector3 {
339 |             x: self.x + other.x,
340 |             y: self.y + other.y,
341 |             z: self.z + other.z,
342 |         }
343 |     }
344 | }
345 | 
346 | impl Add for MVector3 {
347 |     type Output = MVector3;
348 | 
349 |     fn add(self, other: MVector3) -> MVector3 {
350 |         MVector3 {
351 |             x: self.x + other.x,
352 |             y: self.y + other.y,
353 |             z: self.z + other.z,
354 |         }
355 |     }
356 | }
357 | 
358 | impl Sub for SVector3 {
359 |     type Output = SVector3;
360 | 
361 |     fn sub(self, other: SVector3) -> SVector3 {
362 |         SVector3 {
363 |             x: self.x - other.x,
364 |             y: self.y - other.y,
365 |             z: self.z - other.z,
366 |         }
367 |     }
368 | }
369 | 
370 | impl Sub for MVector3 {
371 |     type Output = MVector3;
372 | 
373 |     fn sub(self, other: MVector3) -> MVector3 {
374 |         MVector3 {
375 |             x: self.x - other.x,
376 |             y: self.y - other.y,
377 |             z: self.z - other.z,
378 |         }
379 |     }
380 | }
381 | 
382 | impl Neg for SVector3 {
383 |     type Output = SVector3;
384 | 
385 |     fn neg(self) -> SVector3 {
386 |         SVector3 {
387 |             x: -self.x,
388 |             y: -self.y,
389 |             z: -self.z,
390 |         }
391 |     }
392 | }
393 | 
394 | impl Neg for MVector3 {
395 |     type Output = MVector3;
396 | 
397 |     fn neg(self) -> MVector3 {
398 |         MVector3 {
399 |             x: -self.x,
400 |             y: -self.y,
401 |             z: -self.z,
402 |         }
403 |     }
404 | }
405 | 
406 | impl Mul<f32> for SVector3 {
407 |     type Output = SVector3;
408 | 
409 |     fn mul(self, a: f32) -> SVector3 {
410 |         SVector3 {
411 |             x: self.x * a,
412 |             y: self.y * a,
413 |             z: self.z * a,
414 |         }
415 |     }
416 | }
417 | 
418 | impl Mul<Mf32> for MVector3 {
419 |     type Output = MVector3;
420 | 
421 |     fn mul(self, a: Mf32) -> MVector3 {
422 |         MVector3 {
423 |             x: self.x * a,
424 |             y: self.y * a,
425 |             z: self.z * a,
426 |         }
427 |     }
428 | }
429 | 
430 | impl fmt::Display for SVector3 {
431 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
432 |         write!(f, "({}, {}, {})", self.x, self.y, self.z)
433 |     }
434 | }
435 | 
436 | #[cfg(test)]
437 | fn assert_mvectors_equal(expected: MVector3, computed: MVector3, margin: f32) {
438 |     // Test that the vectors are equal, to within floating point inaccuracy
439 |     // margins.
440 |     let error = (computed - expected).norm_squared();
441 |     assert!((Mf32::broadcast(margin * margin) - error).all_sign_bits_positive(),
442 |             "expected: {:?}, computed: {:?}", expected, computed);
443 | }
444 | 
445 | #[test]
446 | fn verify_rotate_hemisphere() {
447 |     let x = MVector3::new(Mf32::one(), Mf32::zero(), Mf32::zero());
448 |     let y = MVector3::new(Mf32::zero(), Mf32::one(), Mf32::zero());
449 |     let z = MVector3::new(Mf32::zero(), Mf32::zero(), Mf32::one());
450 | 
451 |     // If we rotate z -> y, then a vector along x does not change.
452 |     assert_mvectors_equal(x, x.rotate_hemisphere(y), 1e-3);
453 | 
454 |     // Same for z -> x, then y does not change.
455 |     assert_mvectors_equal(y, y.rotate_hemisphere(x), 1e-3);
456 | 
457 |     // If we rotate z -> y about the x-axis, then y rotates to -z.
458 |     assert_mvectors_equal(-z, y.rotate_hemisphere(y), 1e-3);
459 | 
460 |     // If we rotate z -> x about the y-axis, then x rotates to -z.
461 |     assert_mvectors_equal(-z, x.rotate_hemisphere(x), 1e-3);
462 | 
463 |     // A starting normal of positive z is assumed, so picking that should not
464 |     // change anything.
465 |     assert_mvectors_equal(x, x.rotate_hemisphere(z), 1e-3);
466 |     assert_mvectors_equal(y, y.rotate_hemisphere(z), 1e-3);
467 | }
468 | 
469 | #[test]
470 | fn rotate_hemisphere_is_orthogonal() {
471 |     let ns = bench::mvectors_on_unit_sphere(4096);
472 |     let xs = bench::mvectors_on_unit_sphere(4096);
473 |     let ys = bench::mvectors_on_unit_sphere(4096);
474 | 
475 |     for (&n, (&x, &y)) in ns.iter().zip(xs.iter().zip(ys.iter())) {
476 |         let sum_before = x + y;
477 |         let sum_mapped = sum_before.rotate_hemisphere(n);
478 |         let n2_before = sum_before.norm_squared();
479 |         let n2_after = sum_mapped.norm_squared();
480 | 
481 |         // An orthogonal map does not change the length of vectors.
482 |         let error = (n2_before - n2_after) * (n2_before - n2_after);
483 |         assert!((Mf32::broadcast(1e-5) - error).all_sign_bits_positive(),
484 |             "expected equal norm, got {:?} and {:?}", n2_before, n2_after);
485 | 
486 |         // Also, an orthogonal map is linear.
487 |         let sum_after = x.rotate_hemisphere(n) + y.rotate_hemisphere(n);
488 |         assert_mvectors_equal(sum_mapped, sum_after, 1e-3);
489 |     }
490 | }
491 | 
492 | #[test]
493 | fn rotate_hemisphere_extrema() {
494 |     let x = MVector3::new(Mf32::one(), Mf32::zero(), Mf32::zero());
495 |     let y = MVector3::new(Mf32::zero(), Mf32::one(), Mf32::zero());
496 |     let z = MVector3::new(Mf32::zero(), Mf32::zero(), Mf32::one());
497 | 
498 |     // The rotation code breaks down at a normal vector -z due to division by
499 |     // zero. This edge case should be handled correctly.
500 | 
501 |     // If we rotate z -> -z, then a vector along x flips sign.
502 |     assert_eq!(x.rotate_hemisphere(-z), -x);
503 | 
504 |     // A vector along y should change sign too, because the hemisphere is
505 |     // flipped if the z-component of the normal is negative.
506 |     assert_eq!(y.rotate_hemisphere(-z), -y);
507 | 
508 |     // The z-axis itself should just rotate along.
509 |     assert_eq!(z.rotate_hemisphere(-z), -z);
510 | }
511 | 
512 | #[test]
513 | fn rotate_hemisphere_random() {
514 |     use random::Rng;
515 |     let x = MVector3::new(Mf32::one(), Mf32::zero(), Mf32::zero());
516 |     let epsilon = Mf32::broadcast(0.0001);
517 |     let mut rng = Rng::with_seed(1, 2, 3);
518 |     let mut had_negative_y = false;
519 |     let mut had_positive_y = false;
520 |     let mut had_negative_z = false;
521 |     let mut had_positive_z = false;
522 |     for _ in 0..4096 {
523 |         let v = rng.sample_hemisphere_vector();
524 |         let w = v.rotate_hemisphere(x);
525 |         assert!((w.x + epsilon).all_sign_bits_positive(),
526 |             "when rotating {:?} to the positive x-axis, no x-coordinate should be negative, \
527 |              but the result is {:?}", v, w);
528 | 
529 |         // After rotation, (y, z) should lie on a circle, and every sign should
530 |         // occur for these two coordinates.
531 |         had_negative_y = had_negative_y || !(epsilon + w.y).all_sign_bits_positive();
532 |         had_positive_y = had_positive_y || !(epsilon - w.y).all_sign_bits_positive();
533 |         had_negative_z = had_negative_z || !(epsilon + w.z).all_sign_bits_positive();
534 |         had_positive_z = had_positive_z || !(epsilon - w.z).all_sign_bits_positive();
535 |     }
536 |     assert!(had_negative_y);
537 |     assert!(had_positive_y);
538 |     assert!(had_negative_z);
539 |     assert!(had_positive_z);
540 | }
541 | 
542 | macro_rules! unroll_10 {
543 |     { $x: block } => {
544 |         $x $x $x $x $x $x $x $x $x $x
545 |     }
546 | }
547 | 
548 | #[bench]
549 | fn bench_scross_naive_1000(bencher: &mut test::Bencher) {
550 |     let vectors = bench::svector3_pairs(4096);
551 |     let mut vectors_it = vectors.iter().cycle();
552 |     bencher.iter(|| {
553 |         let &(a, b) = vectors_it.next().unwrap();
554 |         for _ in 0..100 {
555 |             unroll_10! {{
556 |                 test::black_box(test::black_box(a).cross_naive(test::black_box(b)));
557 |             }};
558 |         }
559 |     });
560 | }
561 | 
562 | #[bench]
563 | fn bench_scross_fma_1000(bencher: &mut test::Bencher) {
564 |     let vectors = bench::svector3_pairs(4096);
565 |     let mut vectors_it = vectors.iter().cycle();
566 |     bencher.iter(|| {
567 |         let &(a, b) = vectors_it.next().unwrap();
568 |         for _ in 0..100 {
569 |             unroll_10! {{
570 |                 test::black_box(test::black_box(a).cross_fma(test::black_box(b)));
571 |             }};
572 |         }
573 |     });
574 | }
575 | 
576 | #[bench]
577 | fn bench_mcross_naive_1000(bencher: &mut test::Bencher) {
578 |     let vectors = bench::mvector3_pairs(4096 / 8);
579 |     let mut vectors_it = vectors.iter().cycle();
580 |     bencher.iter(|| {
581 |         let &(a, b) = vectors_it.next().unwrap();
582 |         for _ in 0..100 {
583 |             unroll_10! {{
584 |                 test::black_box(test::black_box(a).cross_naive(test::black_box(b)));
585 |             }};
586 |         }
587 |     });
588 | }
589 | 
590 | #[bench]
591 | fn bench_mcross_fma_1000(bencher: &mut test::Bencher) {
592 |     let vectors = bench::mvector3_pairs(4096 / 8);
593 |     let mut vectors_it = vectors.iter().cycle();
594 |     bencher.iter(|| {
595 |         let &(a, b) = vectors_it.next().unwrap();
596 |         for _ in 0..100 {
597 |             unroll_10! {{
598 |                 test::black_box(test::black_box(a).cross_fma(test::black_box(b)));
599 |             }};
600 |         }
601 |     });
602 | }
603 | 
604 | #[bench]
605 | fn bench_sdot_naive_1000(bencher: &mut test::Bencher) {
606 |     let vectors = bench::svector3_pairs(4096);
607 |     let mut vectors_it = vectors.iter().cycle();
608 |     bencher.iter(|| {
609 |         let &(a, b) = vectors_it.next().unwrap();
610 |         for _ in 0..100 {
611 |             unroll_10! {{
612 |                 test::black_box(test::black_box(a).dot_naive(test::black_box(b)));
613 |             }};
614 |         }
615 |     });
616 | }
617 | 
618 | #[bench]
619 | fn bench_sdot_fma_1000(bencher: &mut test::Bencher) {
620 |     let vectors = bench::svector3_pairs(4096);
621 |     let mut vectors_it = vectors.iter().cycle();
622 |     bencher.iter(|| {
623 |         let &(a, b) = vectors_it.next().unwrap();
624 |         for _ in 0..100 {
625 |             unroll_10! {{
626 |                 test::black_box(test::black_box(a).dot_fma(test::black_box(b)));
627 |             }};
628 |         }
629 |     });
630 | }
631 | 
632 | #[bench]
633 | fn bench_mdot_naive_1000(bencher: &mut test::Bencher) {
634 |     let vectors = bench::mvector3_pairs(4096 / 8);
635 |     let mut vectors_it = vectors.iter().cycle();
636 |     bencher.iter(|| {
637 |         let &(a, b) = vectors_it.next().unwrap();
638 |         for _ in 0..100 {
639 |             unroll_10! {{
640 |                 test::black_box(test::black_box(a).dot_naive(test::black_box(b)));
641 |             }};
642 |         }
643 |     });
644 | }
645 | 
646 | #[bench]
647 | fn bench_mdot_fma_1000(bencher: &mut test::Bencher) {
648 |     let vectors = bench::mvector3_pairs(4096 / 8);
649 |     let mut vectors_it = vectors.iter().cycle();
650 |     bencher.iter(|| {
651 |         let &(a, b) = vectors_it.next().unwrap();
652 |         for _ in 0..100 {
653 |             unroll_10! {{
654 |                 test::black_box(test::black_box(a).dot_fma(test::black_box(b)));
655 |             }};
656 |         }
657 |     });
658 | }
659 | 
660 | #[bench]
661 | fn bench_rotate_hemisphere_1000(bencher: &mut test::Bencher) {
662 |     let vectors = bench::mvector3_pairs(4096 / 8);
663 |     let mut vectors_it = vectors.iter().cycle();
664 |     bencher.iter(|| {
665 |         let &(v, n) = vectors_it.next().unwrap();
666 |         for _ in 0..100 {
667 |             unroll_10! {{
668 |                 test::black_box(test::black_box(v).rotate_hemisphere(test::black_box(n)));
669 |             }};
670 |         }
671 |     });
672 | }
673 | 


--------------------------------------------------------------------------------
/src/wavefront.rs:
--------------------------------------------------------------------------------
  1 | // Convector -- An interactive CPU path tracer
  2 | // Copyright 2016 Ruud van Asseldonk
  3 | 
  4 | // This program is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU General Public License version 3. A copy
  6 | // of the License is available in the root of the repository.
  7 | 
  8 | //! This module reads Wavefront OBJ files. There are crates for that, but
  9 | //! reinventing the wheel is much more fun.
 10 | 
 11 | use filebuffer::FileBuffer;
 12 | use material::SMaterial;
 13 | use std::collections::HashMap;
 14 | use std::path::Path;
 15 | use std::str::{FromStr, from_utf8};
 16 | use vector3::SVector3;
 17 | 
 18 | pub struct Triangle {
 19 |     pub vertices: (u32, u32, u32),
 20 |     pub tex_coords: Option<(u32, u32, u32)>,
 21 |     pub material: SMaterial,
 22 | }
 23 | 
 24 | pub struct Mesh {
 25 |     pub vertices: Vec<SVector3>,
 26 |     pub tex_coords: Vec<(f32, f32)>,
 27 |     pub triangles: Vec<Triangle>,
 28 | }
 29 | 
 30 | fn assert_nondegenerate(vertices: &[SVector3], line: u32, i0: u32, i1: u32, i2: u32) {
 31 |     let v0 = vertices[i0 as usize];
 32 |     let v1 = vertices[i1 as usize];
 33 |     let v2 = vertices[i2 as usize];
 34 | 
 35 |     // The cross product of two edges must not be zero. If it is, the three
 36 |     // vertices are collinear.
 37 |     let e1 = v0 - v2;
 38 |     let e2 = v1 - v0;
 39 |     if e1.cross(e2).norm_squared() == 0.0 {
 40 |         println!("encountered degenerate triangle while loading mesh");
 41 |         println!("  line:     {}", line);
 42 |         println!("  vertices: {}, {}, {}", v0, v1, v2);
 43 |         println!("  indices:  {}, {}, {}", i0 + 1, i1 + 1, i2 + 1);
 44 |         panic!("go clean your geometry");
 45 |     }
 46 | }
 47 | 
 48 | /// Returns the vertex index, and the texture coordinate index if there is one.
 49 | fn parse_vertex_index(index: &str) -> (u32, Option<u32>) {
 50 |     let mut parts = index.split('/').map(|i| u32::from_str(i).unwrap());
 51 |     let vidx = parts.next().expect("missing vertex index");
 52 |     let tidx = parts.next();
 53 |     // Indices in the obj file are 1-based, but Rust is 0-based.
 54 |     (vidx - 1, tidx.map(|i| i - 1))
 55 | }
 56 | 
 57 | pub fn push_triangle(vertices: &[SVector3],
 58 |                      triangles: &mut Vec<Triangle>,
 59 |                      i0: (u32, Option<u32>),
 60 |                      i1: (u32, Option<u32>),
 61 |                      i2: (u32, Option<u32>),
 62 |                      material: SMaterial,
 63 |                      line_nr: u32) {
 64 |     assert_nondegenerate(&vertices, line_nr, i0.0, i1.0, i2.0);
 65 |     let vidxs = (i0.0, i1.0, i2.0);
 66 |     let tidxs = match (i0.1, i1.1, i2.1) {
 67 |         (Some(t0), Some(t1), Some(t2)) => Some((t0, t1, t2)),
 68 |         _ => None,
 69 |     };
 70 |     let triangle = Triangle {
 71 |         vertices: vidxs,
 72 |         tex_coords: tidxs,
 73 |         material: material,
 74 |     };
 75 |     triangles.push(triangle);
 76 | }
 77 | 
 78 | impl Mesh {
 79 |     pub fn load<P: AsRef<Path>>(path: P) -> Mesh {
 80 |         Mesh::load_with_materials(path, &HashMap::new())
 81 |     }
 82 | 
 83 |     pub fn load_with_materials<P: AsRef<Path>>(path: P,
 84 |                                                materials: &HashMap<&str, SMaterial>)
 85 |                                                -> Mesh {
 86 |         let fbuffer = FileBuffer::open(path).expect("failed to open file");
 87 |         let input = from_utf8(&fbuffer[..]).expect("obj must be valid utf-8");
 88 | 
 89 |         let mut vertices = Vec::new();
 90 |         let mut tex_coords = Vec::new();
 91 |         let mut triangles = Vec::new();
 92 |         let mut material = SMaterial::white(); // The default material.
 93 | 
 94 |         for (line, line_nr) in input.lines().zip(1u32..) {
 95 |             if line.is_empty() {
 96 |                 continue;
 97 |             }
 98 | 
 99 |             let mut pieces = line.split_whitespace();
100 |             match pieces.next() {
101 |                 Some("v") => {
102 |                     let mut coords = pieces.map(|v| f32::from_str(v).unwrap());
103 |                     let vertex = SVector3 {
104 |                         x: coords.next().expect("missing x coordinate"),
105 |                         y: coords.next().expect("missing y coordinate"),
106 |                         z: coords.next().expect("missing z coordinate"),
107 |                     };
108 |                     vertices.push(vertex);
109 |                 }
110 |                 Some("vt") => {
111 |                     let mut coords = pieces.map(|v| f32::from_str(v).unwrap());
112 |                     let u = coords.next().expect("missing u coordinate");
113 |                     let v = coords.next().expect("missing v coordinate");
114 |                     tex_coords.push((u, v));
115 |                 }
116 |                 Some("usemtl") => {
117 |                     let material_name = pieces.next().expect("missing material name");
118 |                     if let Some(&new_mat) = materials.get(material_name) {
119 |                         material = new_mat;
120 |                     } else {
121 |                         panic!("material '{}' not present in material dictionary",
122 |                                material_name);
123 |                     }
124 |                 }
125 |                 Some("f") => {
126 |                     // Indices stored are 1-based, convert to 0-based.
127 |                     let mut indices = pieces.map(parse_vertex_index);
128 |                     let i0 = indices.next().expect("missing triangle index");
129 |                     let i1 = indices.next().expect("missing triangle index");
130 |                     let mut i2 = indices.next().expect("missing triangle index");
131 | 
132 |                     push_triangle(&vertices, &mut triangles, i0, i1, i2, material, line_nr);
133 | 
134 |                     // There might be a quad or n-gon. Assuming it is convex, we
135 |                     // can triangulate it at import time.
136 |                     while let Some(i3) = indices.next() {
137 |                         push_triangle(&vertices, &mut triangles, i0, i2, i3, material, line_nr);
138 |                         i2 = i3;
139 |                     }
140 |                 }
141 |                 _ => {
142 |                     // Anything else is not supported.
143 |                 }
144 |             }
145 |         }
146 | 
147 |         Mesh {
148 |             vertices: vertices,
149 |             triangles: triangles,
150 |             tex_coords: tex_coords,
151 |         }
152 |     }
153 | }
154 | 
155 | // The loader should be able to load all of these files without crashing. The
156 | // files are known to be well-formed and without degenerate faces.
157 | 
158 | #[test]
159 | fn read_indoor() {
160 |     let mut materials = HashMap::new();
161 |     materials.insert("wall", SMaterial::white());
162 |     materials.insert("glass", SMaterial::sky());
163 |     Mesh::load_with_materials("models/box_walls.obj", &materials);
164 | }
165 | 
166 | #[test]
167 | fn read_stanford_bunny() {
168 |     Mesh::load("models/stanford_bunny.obj");
169 | }
170 | 
171 | #[test]
172 | fn read_stanford_dragon() {
173 |     Mesh::load("models/stanford_dragon.obj");
174 | }
175 | 
176 | #[test]
177 | fn read_suzanne() {
178 |     Mesh::load("models/suzanne.obj");
179 | }
180 | 


--------------------------------------------------------------------------------
/textures/floor.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ruuda/convector/2f5f2428fa6c54002bd2ee8ce3d0f2188aab49f8/textures/floor.jpg


--------------------------------------------------------------------------------
/textures/license.md:
--------------------------------------------------------------------------------
1 | The textures here have been taken from [textures.com](http://www.textures.com)
2 | and are bundled here with my default scene. They can be downloaded for free from
3 | textures.com but redistribution in the form of texture packs is not allowed. See
4 | [their terms of use](http://textures.com/terms_of_use.html) for details.
5 | 


--------------------------------------------------------------------------------
/textures/wood_light.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ruuda/convector/2f5f2428fa6c54002bd2ee8ce3d0f2188aab49f8/textures/wood_light.jpg


--------------------------------------------------------------------------------
/tools/approx_acos.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # Convector -- An interactive CPU path tracer
 4 | # Copyright 2016 Ruud van Asseldonk
 5 | 
 6 | # This program is free software: you can redistribute it and/or modify
 7 | # it under the terms of the GNU General Public License version 3. A copy
 8 | # of the License is available in the root of the repository.
 9 | 
10 | # The goal is to approximate acos(x) with a rational function f such that the
11 | # worst absolute error is minimal. That is, pick the function that performs best
12 | # in the worst case. Furthermore, I impose the following restrictions:
13 | #
14 | #  * f(0) = pi/2. This implies that the constant term is pi/2.
15 | #  * f(1) = 0 and f(-1) = pi. This implies that (a + b) / (1 + c + d) = -pi/2.
16 | 
17 | from mpmath import mp, fabs, acos
18 | from scipy.optimize import minimize
19 | 
20 | mp.prec = 64
21 | 
22 | def d(a, b, c):
23 |     return -1 - 2 * (a + b) / mp.pi - c
24 | 
25 | def f(x, a, b, c):
26 |     return mp.pi/2 + (a * x + b * x**3) / (1 + c * x**2 + d(a, b, c) * x**4)
27 | 
28 | def error(coefs, progress=True):
29 |     (a, b, c) = coefs
30 |     xs = (x / mp.mpf(4096) for x in range(-4096, 4097))
31 |     err = max(fabs(acos(x) - f(x, a, b, c)) for x in xs)
32 |     if progress:
33 |         print('(a, b, c, d): ({}, {}, {}, {})'.format(a, b, c, d(a, b, c)))
34 |         print('evaluated error: ', err)
35 |         print()
36 |     return float(err)
37 | 
38 | initial_guess = (-0.9823, 0.9421, -1.1851)
39 | coefs = minimize(error, initial_guess).x
40 | print('a:', coefs[0])
41 | print('b:', coefs[1])
42 | print('c:', coefs[2])
43 | print('d:', d(*coefs))
44 | print('max error:', error(coefs, progress=False))
45 | 
46 | # Output:
47 | #
48 | #     a: -0.939115566365855,
49 | #     b: 0.9217841528914573,
50 | #     c: -1.2845906244690837,
51 | #     d: 0.295624144969963174
52 | #     max error:  0.0167244179117447796
53 | 


--------------------------------------------------------------------------------
/tools/approx_cos.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # Convector -- An interactive CPU path tracer
 4 | # Copyright 2016 Ruud van Asseldonk
 5 | 
 6 | # This program is free software: you can redistribute it and/or modify
 7 | # it under the terms of the GNU General Public License version 3. A copy
 8 | # of the License is available in the root of the repository.
 9 | 
10 | # The goal is to approximate cos(x) with a polynomial f on the domain (-pi, pi),
11 | # such that the worst absolute error is minimal. That is, pick the function that
12 | # performs best in the worst case. Furthermore, I impose the following
13 | # restrictions:
14 | #
15 | #  * f(0) = 1. This implies that the constant term is 1.
16 | #  * f(pi) = -1 and f(-pi) = -1. This implies that
17 | #    c = -(2 + a*pi^2 + b*pi^4) / pi^6.
18 | 
19 | from mpmath import mp, cos, fabs
20 | from scipy.optimize import minimize
21 | 
22 | mp.prec = 64
23 | 
24 | def c(a, b):
25 |     return - (2.0 + a * mp.pi**2 + b * mp.pi**4) / mp.pi**6
26 | 
27 | def f(x, a, b):
28 |     return 1.0 + a * x**2 + b * x**4 + c(a, b) * x**6
29 | 
30 | def error(coefs, progress=True):
31 |     (a, b) = coefs
32 |     xs = (x * mp.pi / mp.mpf(4096) for x in range(-4096, 4097))
33 |     err = max(fabs(cos(x) - f(x, a, b)) for x in xs)
34 |     if progress:
35 |         print('(a, b, c): ({}, {}, {})'.format(a, b, c(a, b)))
36 |         print('evaluated error: ', err)
37 |         print()
38 |     return float(err)
39 | 
40 | initial_guess = (-0.4960, 0.03926)
41 | coefs = minimize(error, initial_guess).x
42 | print('a:', coefs[0])
43 | print('b:', coefs[1])
44 | print('c:', c(*coefs))
45 | print('max error:', error(coefs, progress=False))
46 | 
47 | # Output:
48 | #
49 | #     a: -0.496000299455
50 | #     b: 0.0392596924214
51 | #     c: -0.000966231179636657107
52 | #     max error: 0.0020164493561441203
53 | 


--------------------------------------------------------------------------------
/tools/approx_sin.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # Convector -- An interactive CPU path tracer
 4 | # Copyright 2016 Ruud van Asseldonk
 5 | 
 6 | # This program is free software: you can redistribute it and/or modify
 7 | # it under the terms of the GNU General Public License version 3. A copy
 8 | # of the License is available in the root of the repository.
 9 | 
10 | # The goal is to approximate sin(x) with a polynomial f on the domain (-pi, pi),
11 | # such that the worst absolute error is minimal. That is, pick the function that
12 | # performs best in the worst case. Furthermore, I impose the following
13 | # restrictions:
14 | #
15 | #  * f(0)  = 0. This implies that the constant term is 0.
16 | #  * f(pi) = 0 and f(-pi) = 0. This implies that c = -(a*pi + b*pi^3) / pi^5.
17 | 
18 | from mpmath import mp, fabs, sin
19 | from scipy.optimize import minimize
20 | 
21 | mp.prec = 64
22 | 
23 | def c(a, b):
24 |     return - (a * mp.pi + b * mp.pi**3) / mp.pi**5
25 | 
26 | def f(x, a, b):
27 |     return a * x + b * x**3 + c(a, b) * x**5
28 | 
29 | def error(coefs, progress=True):
30 |     (a, b) = coefs
31 |     xs = (x * mp.pi / mp.mpf(4096) for x in range(-4096, 4097))
32 |     err = max(fabs(sin(x) - f(x, a, b)) for x in xs)
33 |     if progress:
34 |         print('(a, b, c): ({}, {}, {})'.format(a, b, c(a, b)))
35 |         print('evaluated error: ', err)
36 |         print()
37 |     return float(err)
38 | 
39 | initial_guess = (0.9820, -0.1522)
40 | coefs = minimize(error, initial_guess).x
41 | print('a:', coefs[0])
42 | print('b:', coefs[1])
43 | print('c:', c(*coefs))
44 | print('max error:', error(coefs, progress=False))
45 | 
46 | # Output:
47 | #
48 | #     a: 0.982012145975
49 | #     b: -0.152178468117
50 | #     c: 0.00533758325004438232
51 | #     max error: 0.008109495819698682
52 | 


--------------------------------------------------------------------------------