├── README.md
├── fsharp
    ├── .gitignore
    ├── Makefile
    ├── README.md
    ├── ray.fs
    ├── shell.nix
    └── trace.fsproj
├── futhark
    ├── .gitignore
    ├── Makefile
    ├── README.md
    ├── bvh.fut
    ├── futhark.pkg
    ├── lib
    │   └── github.com
    │   │   └── diku-dk
    │   │       ├── segmented
    │   │           ├── segmented.fut
    │   │           └── segmented_tests.fut
    │   │       └── sorts
    │   │           ├── .gitignore
    │   │           ├── bubble_sort.fut
    │   │           ├── bubble_sort_tests.fut
    │   │           ├── insertion_sort.fut
    │   │           ├── insertion_sort_tests.fut
    │   │           ├── merge_sort.fut
    │   │           ├── merge_sort_tests.fut
    │   │           ├── quick_sort.fut
    │   │           ├── quick_sort_test.fut
    │   │           ├── radix_sort.fut
    │   │           └── radix_sort_tests.fut
    ├── main.c
    ├── prim.fut
    ├── radixtree.fut
    ├── ray.fut
    └── shell.nix
├── haskell
    ├── .gitignore
    ├── BVH.hs
    ├── Image.hs
    ├── Makefile
    ├── README.md
    ├── Raytracing.hs
    ├── Scene.hs
    ├── Vec3.hs
    ├── bench-ray.hs
    ├── cabal.project
    ├── ray.cabal
    ├── ray.hs
    ├── shell.nix
    └── stack.yaml
├── irreg.png
├── ispc
    └── shell.nix
├── ocaml
    ├── .gitignore
    ├── .merlin
    ├── Makefile
    ├── README.md
    ├── _tags
    ├── ray.ml
    ├── shell.nix
    └── timing.c
├── rgbbox.png
├── rust
    ├── .gitignore
    ├── Cargo.lock
    ├── Cargo.toml
    ├── Makefile
    ├── README.md
    ├── benches
    │   └── ray.rs
    ├── shell.nix
    └── src
    │   ├── lib.rs
    │   ├── main.rs
    │   └── sample_scenes.rs
├── scala
    ├── .gitignore
    ├── Makefile
    ├── README.md
    ├── build.sbt
    ├── project
    │   ├── build.properties
    │   └── plugins.sbt
    ├── shell.nix
    └── src
    │   └── main
    │       └── scala
    │           └── raytracer
    │               ├── BVH.scala
    │               ├── Bench.scala
    │               ├── Image.scala
    │               ├── Main.scala
    │               ├── Raytracer.scala
    │               ├── Scene.scala
    │               └── Vec3.scala
└── sml
    ├── .gitignore
    ├── Makefile
    ├── ray.sml
    └── shell.nix


/README.md:
--------------------------------------------------------------------------------
  1 | # Performance comparison of parallel ray tracing in functional programming languages
  2 | 
  3 | This repository is an embryonic collection of ray tracers written with
  4 | parallel functional programming techniques.  The intent is to
  5 | investigate, on a rather small and simple problem, to which degree
  6 | functional programming lives up to the frequent promise of easy
  7 | parallelism, and whether the resulting code is actually fast in an
  8 | objective sense.  The benchmarking technique is mostly crude, so
  9 | assume only large relative differences are meaningful.  I welcome
 10 | contributions, as I have little confidence that any of my code is
 11 | optimal.  I am an expert in *at most* one of the languages on
 12 | exhibition here.  I also welcome new implementations in other
 13 | languages!
 14 | 
 15 | Note also that this is not a **good** ray tracer.  It does not
 16 | generate particularly pretty images.  It's chosen simply because it
 17 | expresses two interesting kinds of parallelism (see below), and
 18 | because even an ugly image is more interesting than just a number.
 19 | Two scenes are used.  The first is **rgbbox**:
 20 | 
 21 | ![rgbbox](rgbbox.png?stopcachingthis)
 22 | 
 23 | The second is **irreg**:
 24 | 
 25 | ![irreg](irreg.png?stopcachingthis)
 26 | 
 27 | This second scene is interesting because the load is unbalanced: all
 28 | objects are in the lower half of the pixels.
 29 | 
 30 | For each scene, two things are benchmarked:
 31 | 
 32 | 1. Constructing a
 33 |    [BVH](https://en.wikipedia.org/wiki/Bounding_volume_hierarchy) of
 34 |    the scene.  This is interesting because it is a divide-and-conquer
 35 |    task parallel problem.
 36 | 
 37 | 2. Actually rendering the scene, accelerated by the BVH.  This is
 38 |    mostly straightforward data parallelism, but with a potentially
 39 |    beefy amount of work for each pixel.
 40 | 
 41 | ## Results
 42 | 
 43 | The following measurements are for 1000x1000 renderings.  I used a
 44 | Ryzen 1700X (8 cores, 16 threads) CPU and an MI100 GPU.  **Compare
 45 | numbers within the same column.**
 46 | 
 47 | Language | rgbbox (BVH) | rgbbox (render) | irreg (BVH) | irreg (render)
 48 | -------- | ------------ | --------------- | ----------- | --------------
 49 | [F#](fsharp/)             | 0.5ms |  816ms |  6.1ms |  437ms
 50 | [Futhark (GPU)](futhark/) | 1.1ms |   14ms |  1.4ms |    8ms
 51 | [Futhark (CPU)](futhark/) | 0.2ms |  179ms |  2.8ms |   62ms
 52 | [Haskell](haskell/)       | 0.3ms |  590ms | 12.2ms |  344ms
 53 | [MPL][mpl]                | 0.4ms |  341ms |  9.4ms |  112ms
 54 | [OCaml](ocaml/)           | 1.3ms |  723ms |   15ms |  240ms
 55 | [Rust](rust/)             | 0.1ms |  258ms |  0.8ms |  100ms
 56 | [Scala](scala/)           | 0.2ms |  306ms |  4.2ms |  126ms
 57 | 
 58 | [mpl]: https://github.com/MPLLang/mpl/tree/master/examples/src/ray
 59 | 
 60 | ## Commentary
 61 | 
 62 | The Haskell implementation uses the `Strict` language pragma to
 63 | disable laziness in the core modules.  This has about 1.5-2x impact on
 64 | the run-time.  The
 65 | [massiv](https://hackage.haskell.org/package/massiv-0.5.1.0) library
 66 | is used for parallel arrays and is the source of most of the
 67 | performance.
 68 | 
 69 | After a few false starts, F# runs quite fast when using .NET Core.
 70 | The main tricks appear to be [using inline functions and explicit
 71 | value types](https://github.com/athas/raytracers/pull/12).
 72 | 
 73 | MPL (which is a parallelism-oriented fork of
 74 | [MLton](http://mlton.org/) for Standard ML) is definitely the star
 75 | here.  The code is readable, written in a completely natural style,
 76 | and performance is excellent.
 77 | 
 78 | Multicore OCaml is also quite fast, and the code is likewise very
 79 | clean.
 80 | 
 81 | While the implementations are allowed to use single-precision floating
 82 | point if they wish, the Scala implementation is actually [much faster
 83 | when using double
 84 | precision](https://github.com/athas/raytracers/pull/24).
 85 | 
 86 | While Futhark is fast, the code is significantly longer and more
 87 | complex.  This is particularly because of the BVH construction.  In
 88 | all other implementations, the BVH is expressed as a straightforward
 89 | recursive divide-and-conquer function, which is also easy to
 90 | parallelise with fork-join techniques.  Since Futhark does not support
 91 | recursion, it instead uses a bottom-up technique presented by Tero
 92 | Karras in the paper [Maximizing Parallelism in the Construction of
 93 | BVHs, Octrees, and *k*-d
 94 | Trees](https://research.nvidia.com/sites/default/files/publications/karras2012hpg_paper.pdf).
 95 | This is actually a pretty fast technique (although not for the small
 96 | scenes used here), but it is about two hundred lines longer than the
 97 | recursive formulation.  The CPU timings use the `multicore` backend
 98 | and `clang` for compiling the C code.
 99 | 
100 | While Rust is not a functional language, it is included as an example
101 | of the performance of (relatively) low level programming.
102 | Unsurprisingly, it is among the fastest CPU languages, as it has a
103 | mature compiler, and its default behaviour of unboxing everything is
104 | exactly what you need for this program.
105 | 
106 | What is not visible from the above table is that most of the
107 | implementations were significantly slower in their original
108 | formulation.  Only Futhark, MPL, and Rust are essentially unchanged
109 | from their first straightforward implementation.  For the others, most
110 | of the performance comes down to various low-level tweaks, in
111 | particular avoiding boxing and allocations.  This is not exactly
112 | unexpected, but I still find it sad that when it comes to performance
113 | in functional languages, we must think about the *compiler* more than
114 | we think about the *language*.
115 | 
116 | ## See also
117 | 
118 | Jon Harrop's [Ray tracer language
119 | comparison](https://www.ffconsultancy.com/languages/ray_tracer/index.html)
120 | is an inspiration for this page.  The main difference is that I focus
121 | on parallelism.  The ray tracer here also requires the construction of
122 | an explicit BVH from scene data, while Jon Harrop's ray tracer used a
123 | functional formulation to describe the recursive structure of his
124 | scene.
125 | 


--------------------------------------------------------------------------------
/fsharp/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Ignore Visual Studio temporary files, build results, and
  2 | ## files generated by popular Visual Studio add-ons.
  3 | ##
  4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
  5 | 
  6 | # User-specific files
  7 | *.rsuser
  8 | *.suo
  9 | *.user
 10 | *.userosscache
 11 | *.sln.docstates
 12 | 
 13 | # User-specific files (MonoDevelop/Xamarin Studio)
 14 | *.userprefs
 15 | 
 16 | # Mono auto generated files
 17 | mono_crash.*
 18 | 
 19 | # Build results
 20 | [Dd]ebug/
 21 | [Dd]ebugPublic/
 22 | [Rr]elease/
 23 | [Rr]eleases/
 24 | x64/
 25 | x86/
 26 | [Aa][Rr][Mm]/
 27 | [Aa][Rr][Mm]64/
 28 | bld/
 29 | [Bb]in/
 30 | [Oo]bj/
 31 | [Ll]og/
 32 | 
 33 | # Visual Studio 2015/2017 cache/options directory
 34 | .vs/
 35 | # Uncomment if you have tasks that create the project's static files in wwwroot
 36 | #wwwroot/
 37 | 
 38 | # Visual Studio 2017 auto generated files
 39 | Generated\ Files/
 40 | 
 41 | # MSTest test Results
 42 | [Tt]est[Rr]esult*/
 43 | [Bb]uild[Ll]og.*
 44 | 
 45 | # NUNIT
 46 | *.VisualState.xml
 47 | TestResult.xml
 48 | 
 49 | # Build Results of an ATL Project
 50 | [Dd]ebugPS/
 51 | [Rr]eleasePS/
 52 | dlldata.c
 53 | 
 54 | # Benchmark Results
 55 | BenchmarkDotNet.Artifacts/
 56 | 
 57 | # .NET Core
 58 | project.lock.json
 59 | project.fragment.lock.json
 60 | artifacts/
 61 | 
 62 | # StyleCop
 63 | StyleCopReport.xml
 64 | 
 65 | # Files built by Visual Studio
 66 | *_i.c
 67 | *_p.c
 68 | *_h.h
 69 | *.ilk
 70 | *.meta
 71 | *.obj
 72 | *.iobj
 73 | *.pch
 74 | *.pdb
 75 | *.ipdb
 76 | *.pgc
 77 | *.pgd
 78 | *.rsp
 79 | *.sbr
 80 | *.tlb
 81 | *.tli
 82 | *.tlh
 83 | *.tmp
 84 | *.tmp_proj
 85 | *_wpftmp.csproj
 86 | *.log
 87 | *.vspscc
 88 | *.vssscc
 89 | .builds
 90 | *.pidb
 91 | *.svclog
 92 | *.scc
 93 | 
 94 | # Chutzpah Test files
 95 | _Chutzpah*
 96 | 
 97 | # Visual C++ cache files
 98 | ipch/
 99 | *.aps
100 | *.ncb
101 | *.opendb
102 | *.opensdf
103 | *.sdf
104 | *.cachefile
105 | *.VC.db
106 | *.VC.VC.opendb
107 | 
108 | # Visual Studio profiler
109 | *.psess
110 | *.vsp
111 | *.vspx
112 | *.sap
113 | 
114 | # Visual Studio Trace Files
115 | *.e2e
116 | 
117 | # TFS 2012 Local Workspace
118 | $tf/
119 | 
120 | # Guidance Automation Toolkit
121 | *.gpState
122 | 
123 | # ReSharper is a .NET coding add-in
124 | _ReSharper*/
125 | *.[Rr]e[Ss]harper
126 | *.DotSettings.user
127 | 
128 | # JustCode is a .NET coding add-in
129 | .JustCode
130 | 
131 | # TeamCity is a build add-in
132 | _TeamCity*
133 | 
134 | # DotCover is a Code Coverage Tool
135 | *.dotCover
136 | 
137 | # AxoCover is a Code Coverage Tool
138 | .axoCover/*
139 | !.axoCover/settings.json
140 | 
141 | # Visual Studio code coverage results
142 | *.coverage
143 | *.coveragexml
144 | 
145 | # NCrunch
146 | _NCrunch_*
147 | .*crunch*.local.xml
148 | nCrunchTemp_*
149 | 
150 | # MightyMoose
151 | *.mm.*
152 | AutoTest.Net/
153 | 
154 | # Web workbench (sass)
155 | .sass-cache/
156 | 
157 | # Installshield output folder
158 | [Ee]xpress/
159 | 
160 | # DocProject is a documentation generator add-in
161 | DocProject/buildhelp/
162 | DocProject/Help/*.HxT
163 | DocProject/Help/*.HxC
164 | DocProject/Help/*.hhc
165 | DocProject/Help/*.hhk
166 | DocProject/Help/*.hhp
167 | DocProject/Help/Html2
168 | DocProject/Help/html
169 | 
170 | # Click-Once directory
171 | publish/
172 | 
173 | # Publish Web Output
174 | *.[Pp]ublish.xml
175 | *.azurePubxml
176 | # Note: Comment the next line if you want to checkin your web deploy settings,
177 | # but database connection strings (with potential passwords) will be unencrypted
178 | *.pubxml
179 | *.publishproj
180 | 
181 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
182 | # checkin your Azure Web App publish settings, but sensitive information contained
183 | # in these scripts will be unencrypted
184 | PublishScripts/
185 | 
186 | # NuGet Packages
187 | *.nupkg
188 | # The packages folder can be ignored because of Package Restore
189 | **/[Pp]ackages/*
190 | # except build/, which is used as an MSBuild target.
191 | !**/[Pp]ackages/build/
192 | # Uncomment if necessary however generally it will be regenerated when needed
193 | #!**/[Pp]ackages/repositories.config
194 | # NuGet v3's project.json files produces more ignorable files
195 | *.nuget.props
196 | *.nuget.targets
197 | 
198 | # Microsoft Azure Build Output
199 | csx/
200 | *.build.csdef
201 | 
202 | # Microsoft Azure Emulator
203 | ecf/
204 | rcf/
205 | 
206 | # Windows Store app package directories and files
207 | AppPackages/
208 | BundleArtifacts/
209 | Package.StoreAssociation.xml
210 | _pkginfo.txt
211 | *.appx
212 | *.appxbundle
213 | *.appxupload
214 | 
215 | # Visual Studio cache files
216 | # files ending in .cache can be ignored
217 | *.[Cc]ache
218 | # but keep track of directories ending in .cache
219 | !?*.[Cc]ache/
220 | 
221 | # Others
222 | ClientBin/
223 | ~$*
224 | *~
225 | *.dbmdl
226 | *.dbproj.schemaview
227 | *.jfm
228 | *.pfx
229 | *.publishsettings
230 | orleans.codegen.cs
231 | 
232 | # Including strong name files can present a security risk
233 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
234 | #*.snk
235 | 
236 | # Since there are multiple workflows, uncomment next line to ignore bower_components
237 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
238 | #bower_components/
239 | 
240 | # RIA/Silverlight projects
241 | Generated_Code/
242 | 
243 | # Backup & report files from converting an old project file
244 | # to a newer Visual Studio version. Backup files are not needed,
245 | # because we have git ;-)
246 | _UpgradeReport_Files/
247 | Backup*/
248 | UpgradeLog*.XML
249 | UpgradeLog*.htm
250 | ServiceFabricBackup/
251 | *.rptproj.bak
252 | 
253 | # SQL Server files
254 | *.mdf
255 | *.ldf
256 | *.ndf
257 | 
258 | # Business Intelligence projects
259 | *.rdl.data
260 | *.bim.layout
261 | *.bim_*.settings
262 | *.rptproj.rsuser
263 | *- Backup*.rdl
264 | 
265 | # Microsoft Fakes
266 | FakesAssemblies/
267 | 
268 | # GhostDoc plugin setting file
269 | *.GhostDoc.xml
270 | 
271 | # Node.js Tools for Visual Studio
272 | .ntvs_analysis.dat
273 | node_modules/
274 | 
275 | # Visual Studio 6 build log
276 | *.plg
277 | 
278 | # Visual Studio 6 workspace options file
279 | *.opt
280 | 
281 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
282 | *.vbw
283 | 
284 | # Visual Studio LightSwitch build output
285 | **/*.HTMLClient/GeneratedArtifacts
286 | **/*.DesktopClient/GeneratedArtifacts
287 | **/*.DesktopClient/ModelManifest.xml
288 | **/*.Server/GeneratedArtifacts
289 | **/*.Server/ModelManifest.xml
290 | _Pvt_Extensions
291 | 
292 | # Paket dependency manager
293 | .paket/paket.exe
294 | paket-files/
295 | 
296 | # FAKE - F# Make
297 | .fake/
298 | 
299 | # CodeRush personal settings
300 | .cr/personal
301 | 
302 | # Python Tools for Visual Studio (PTVS)
303 | __pycache__/
304 | *.pyc
305 | 
306 | # Cake - Uncomment if you are using it
307 | # tools/**
308 | # !tools/packages.config
309 | 
310 | # Tabs Studio
311 | *.tss
312 | 
313 | # Telerik's JustMock configuration file
314 | *.jmconfig
315 | 
316 | # BizTalk build output
317 | *.btp.cs
318 | *.btm.cs
319 | *.odx.cs
320 | *.xsd.cs
321 | 
322 | # OpenCover UI analysis results
323 | OpenCover/
324 | 
325 | # Azure Stream Analytics local run output
326 | ASALocalRun/
327 | 
328 | # MSBuild Binary and Structured Log
329 | *.binlog
330 | 
331 | # NVidia Nsight GPU debugger configuration file
332 | *.nvuser
333 | 
334 | # MFractors (Xamarin productivity tool) working folder
335 | .mfractor/
336 | 
337 | # Local History for Visual Studio
338 | .localhistory/
339 | 
340 | # BeatPulse healthcheck temp database
341 | healthchecksdb
342 | 
343 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
344 | MigrationBackup/
345 | 
346 | ##
347 | ## Visual studio for Mac
348 | ##
349 | 
350 | 
351 | # globs
352 | Makefile.in
353 | *.userprefs
354 | *.usertasks
355 | config.make
356 | config.status
357 | aclocal.m4
358 | install-sh
359 | autom4te.cache/
360 | *.tar.gz
361 | tarballs/
362 | test-results/
363 | 
364 | # Mac bundle stuff
365 | *.dmg
366 | *.app
367 | 
368 | # content below from: https://github.com/github/gitignore/blob/master/Global/macOS.gitignore
369 | # General
370 | .DS_Store
371 | .AppleDouble
372 | .LSOverride
373 | 
374 | # Icon must end with two \r
375 | Icon
376 | 
377 | 
378 | # Thumbnails
379 | ._*
380 | 
381 | # Files that might appear in the root of a volume
382 | .DocumentRevisions-V100
383 | .fseventsd
384 | .Spotlight-V100
385 | .TemporaryItems
386 | .Trashes
387 | .VolumeIcon.icns
388 | .com.apple.timemachine.donotpresent
389 | 
390 | # Directories potentially created on remote AFP share
391 | .AppleDB
392 | .AppleDesktop
393 | Network Trash Folder
394 | Temporary Items
395 | .apdisk
396 | 
397 | # content below from: https://github.com/github/gitignore/blob/master/Global/Windows.gitignore
398 | # Windows thumbnail cache files
399 | Thumbs.db
400 | ehthumbs.db
401 | ehthumbs_vista.db
402 | 
403 | # Dump file
404 | *.stackdump
405 | 
406 | # Folder config file
407 | [Dd]esktop.ini
408 | 
409 | # Recycle Bin used on file shares
410 | $RECYCLE.BIN/
411 | 
412 | # Windows Installer files
413 | *.cab
414 | *.msi
415 | *.msix
416 | *.msm
417 | *.msp
418 | 
419 | # Windows shortcuts
420 | *.lnk
421 | 
422 | # JetBrains Rider
423 | .idea/
424 | *.sln.iml
425 | 
426 | ##
427 | ## Visual Studio Code
428 | ##
429 | .vscode/*
430 | !.vscode/settings.json
431 | !.vscode/tasks.json
432 | !.vscode/launch.json
433 | !.vscode/extensions.json
434 | 
435 | **/.ionide/
436 | 
437 | *.ppm
438 | ray.exe
439 | ray.exe.so
440 | 


--------------------------------------------------------------------------------
/fsharp/Makefile:
--------------------------------------------------------------------------------
 1 | all: rgbbox_1000.ppm irreg_1000.ppm
 2 | 
 3 | rgbbox_1000.ppm:
 4 | 	dotnet run -c release -- -f $@ -s rgbbox -n 1000 -m 1000
 5 | 
 6 | irreg_1000.ppm:
 7 | 	dotnet run -c release -- -f $@ -s irreg -n 1000 -m 1000
 8 | 
 9 | .PHONY: clean
10 | 
11 | clean:
12 | 	dotnet clean
13 | 	rm -f *.ppm
14 | 


--------------------------------------------------------------------------------
/fsharp/README.md:
--------------------------------------------------------------------------------
 1 | # F# implementation
 2 | 
 3 | Just `dotnet run -c release`. The following options can be specified:
 4 | 
 5 | * `-m height`
 6 | * `-n width`
 7 | * `-f file.ppm`
 8 | * `-s <rgbbox|irreg>`
 9 | * `-r number of warmup runs`
10 | 
11 | Requires the [.NET Core 3.1 SDK](https://dotnet.microsoft.com/download).


--------------------------------------------------------------------------------
/fsharp/ray.fs:
--------------------------------------------------------------------------------
  1 | open System
  2 | open System.Diagnostics
  3 | open System.Text
  4 | open System.Threading.Tasks
  5 | 
  6 | [<Struct>]
  7 | type Vec3 =
  8 |     { X: float32
  9 |       Y: float32
 10 |       Z: float32 }
 11 | 
 12 | let inline vecAdd v1 v2 =
 13 |     { X = v1.X + v2.X
 14 |       Y = v1.Y + v2.Y
 15 |       Z = v1.Z + v2.Z }
 16 | let inline vecSub v1 v2 =
 17 |     { X = v1.X - v2.X
 18 |       Y = v1.Y - v2.Y
 19 |       Z = v1.Z - v2.Z }
 20 | let inline vecMul v1 v2 =
 21 |     { X = v1.X * v2.X
 22 |       Y = v1.Y * v2.Y
 23 |       Z = v1.Z * v2.Z }
 24 | let inline vecDiv v1 v2 =
 25 |     { X = v1.X / v2.X
 26 |       Y = v1.Y / v2.Y
 27 |       Z = v1.Z / v2.Z }
 28 | 
 29 | let inline scale s v =
 30 |     { X=s*v.X
 31 |       Y=s*v.Y
 32 |       Z=s*v.Z }
 33 | 
 34 | let inline dot v1 v2 =
 35 |     let v3 = vecMul v1 v2
 36 |     v3.X + v3.Y + v3.Z
 37 | 
 38 | let inline norm v =
 39 |     dot v v
 40 |     |> sqrt
 41 | 
 42 | let inline normalise v =
 43 |     scale (1.0f / norm v) v
 44 | 
 45 | let inline cross v1 v2 =
 46 |     { X=v1.Y*v2.Z-v1.Z*v2.Y
 47 |       Y=v1.Z*v2.X-v1.X*v2.Z
 48 |       Z=v1.X*v2.Y-v1.Y*v2.X }
 49 | 
 50 | [<Struct>]
 51 | type AABB =
 52 |     { Min: Vec3
 53 |       Max: Vec3 }
 54 | 
 55 | let inline enclosing box0 box1 =
 56 |     let small =
 57 |         { X = min box0.Min.X box1.Min.X
 58 |           Y = min box0.Min.Y box1.Min.Y
 59 |           Z = min box0.Min.Z box1.Min.Z }
 60 | 
 61 |     let big =
 62 |         { X = max box0.Max.X box1.Max.X
 63 |           Y = max box0.Max.Y box1.Max.Y
 64 |           Z = max box0.Max.Z box1.Max.Z }
 65 | 
 66 |     { Min = small; Max = big }
 67 | 
 68 | let inline centre aabb =
 69 |     { X = aabb.Min.X + 0.5f * (aabb.Max.X - aabb.Min.X)
 70 |       Y = aabb.Min.Y + 0.5f * (aabb.Max.Y - aabb.Min.Y)
 71 |       Z = aabb.Min.Z + 0.5f * (aabb.Max.Z - aabb.Min.Z) }
 72 | 
 73 | type Bvh<'T> =
 74 |     | BvhLeaf of AABB * 'T
 75 |     | BvhSplit of AABB * Bvh<'T> * Bvh<'T>
 76 | 
 77 | let inline BvhAABB bvh =
 78 |     match bvh with
 79 |     | (BvhLeaf (box, _)) -> box
 80 |     | (BvhSplit (box, _, _)) -> box
 81 | 
 82 | let inline split n xs =
 83 |     match (n, xs) with
 84 |     | (0, _) -> struct ([||], xs)
 85 |     | (_, [||]) -> struct ([||], [||])
 86 |     | (n, xs) -> struct (xs.[..n-1], xs.[n..])
 87 | 
 88 | let inline axis d v =
 89 |     match d % 3 with
 90 |     | 0 -> v.X
 91 |     | 1 -> v.Y
 92 |     | _ -> v.Z
 93 | 
 94 | let mkBvh f allObjs =
 95 |     let rec mk d n xs =
 96 |         match xs with
 97 |         | [||] -> failwith "mk_bvh: no nodes"
 98 |         | [| x |] -> BvhLeaf(f x, x)
 99 |         | _ ->
100 |             let key x = axis d (centre(f x))
101 |             let xsSorted = Array.sortBy key xs
102 |             let struct (xsLeft, xsRight) = split (n/2) xsSorted
103 |             let xsLeft () = mk (d+1) (n/2) xsLeft
104 |             let doRight () = mk (d+1) (n-(n/2)) xsRight
105 |             let struct (left, right) =
106 |                 if n < 100
107 |                 then (xsLeft(), doRight())
108 |                 else
109 |                     let leftTask =
110 |                         Task.Factory.StartNew(xsLeft,
111 |                                               TaskCreationOptions.None)
112 |                     let rightTask =
113 |                         Task.Factory.StartNew(doRight,
114 |                                               TaskCreationOptions.None)
115 |                     struct (leftTask.Result, rightTask.Result)
116 |             let box = enclosing (BvhAABB left) (BvhAABB right)
117 |             BvhSplit (box, left, right)
118 |     mk 0 (Array.length allObjs) allObjs
119 | 
120 | type Pos = Vec3
121 | type Dir = Vec3
122 | type Colour = Vec3
123 | 
124 | let black = { X=0.0f; Y=0.0f; Z=0.0f }
125 | let white = { X=1.0f; Y=1.0f; Z=1.0f }
126 | 
127 | [<Struct>]
128 | type Ray = { Origin: Pos; Dir: Dir }
129 | 
130 | let pointAtParam ray t =
131 |     vecAdd ray.Origin (scale t ray.Dir)
132 | 
133 | [<Struct>]
134 | type Hit =
135 |     { T: float32
136 |       P: Pos
137 |       Normal: Dir
138 |       Colour: Colour }
139 | 
140 | [<Struct>]
141 | type Sphere =
142 |     { Pos: Pos
143 |       Colour: Colour
144 |       Radius: float32 }
145 | 
146 | let inline sphereAABB s =
147 |     { Min = vecSub s.Pos { X=s.Radius; Y=s.Radius; Z=s.Radius }
148 |       Max = vecAdd s.Pos { X=s.Radius; Y=s.Radius; Z=s.Radius } }
149 | 
150 | let inline sphereHit s r tMin tMax =
151 |     let oc = vecSub r.Origin s.Pos
152 |     let a = dot r.Dir r.Dir
153 |     let b = dot oc r.Dir
154 |     let c = dot oc oc - s.Radius*s.Radius
155 |     let discriminant = b*b - a*c
156 |     let inline f temp =
157 |         if temp < tMax && temp > tMin then
158 |             let hit =
159 |                 { T = temp
160 |                   P = pointAtParam r temp
161 |                   Normal = scale (1.0f/s.Radius) (vecSub (pointAtParam r temp) s.Pos)
162 |                   Colour = s.Colour }
163 |             ValueSome hit
164 |         else ValueNone
165 |     if discriminant <= 0.0f then
166 |         ValueNone
167 |     else
168 |         match f ((-b - sqrt(b*b-a*c))/a) with
169 |         | ValueSome hit -> ValueSome hit
170 |         | ValueNone -> f ((-b + sqrt(b*b-a*c))/a)
171 | 
172 | let inline aabbHit aabb r tmin0 tmax0 =
173 |     let inline iter min' max' origin' dir' tmin' tmax' =
174 |         let invD = 1.0f / dir'
175 |         let t0 = (min' - origin') * invD
176 |         let t1 = (max' - origin') * invD
177 |         let struct (t0', t1') = if invD < 0.0f then struct (t1, t0) else struct (t0, t1)
178 |         let tmin'' = max t0' tmin'
179 |         let tmax'' = min t1' tmax'
180 |         struct (tmin'', tmax'')
181 |     let struct (tmin1, tmax1) =
182 |         iter aabb.Min.X aabb.Max.X r.Origin.X r.Dir.X tmin0 tmax0
183 |     if tmax1 <= tmin1 then
184 |         false
185 |     else
186 |         let struct (tmin2, tmax2) =
187 |             iter aabb.Min.Y aabb.Max.Y r.Origin.Y r.Dir.Y tmin1 tmax1
188 |         if tmax2 <= tmin2 then
189 |             false
190 |         else
191 |             let struct (tmin3, tmax3) =
192 |                 iter aabb.Min.Z aabb.Max.Z r.Origin.Z r.Dir.Z tmin2 tmax2
193 |             tmax3 > tmin3
194 | 
195 | let rec objsHit bvh r tMin tMax =
196 |     match bvh with
197 |     | (BvhLeaf (_, s)) ->
198 |         sphereHit s r tMin tMax
199 |     | (BvhSplit (box, left, right)) ->
200 |         if not (aabbHit box r tMin tMax) then
201 |             ValueNone
202 |         else
203 |             match objsHit left r tMin tMax with
204 |             | ValueSome h ->
205 |                 match objsHit right r tMin h.T with
206 |                 | ValueNone -> ValueSome h
207 |                 | ValueSome h' -> ValueSome h'
208 |             | ValueNone ->
209 |                 objsHit right r tMin tMax
210 | 
211 | [<Struct>]
212 | type Camera =
213 |     { Origin: Pos
214 |       LLC: Pos
215 |       Horizontal: Dir
216 |       Vertical: Dir }
217 | 
218 | let inline camera lookfrom lookat vup vfov aspect =
219 |     let theta = vfov * MathF.PI / 180.0f
220 |     let halfHeight = tan (theta / 2.0f)
221 |     let halfWidth = aspect * halfHeight
222 |     let origin = lookfrom
223 |     let w = normalise (vecSub lookfrom lookat)
224 |     let u = normalise (cross vup w)
225 |     let v = cross w u
226 |     
227 |     { Origin = lookfrom
228 |       LLC = vecSub
229 |              (vecSub (vecSub origin (scale halfWidth u))
230 |                      (scale halfHeight v)) w
231 |       Horizontal = scale (2.0f*halfWidth) u
232 |       Vertical = scale (2.0f*halfHeight) v }
233 | 
234 | let inline getRay cam s t =
235 |     { Origin = cam.Origin
236 |       Dir = vecSub (vecAdd (vecAdd cam.LLC (scale s cam.Horizontal)) (scale t cam.Vertical))
237 |                     cam.Origin }
238 | 
239 | let inline reflect v n =
240 |     vecSub v (scale (2.0f * dot v n) n)
241 | 
242 | let inline scatter r hit =
243 |     let reflected = reflect (normalise r.Dir) hit.Normal
244 |     let scattered = { Origin = hit.P; Dir = reflected }
245 |     
246 |     if dot scattered.Dir hit.Normal > 0.0f then
247 |         ValueSome struct (scattered, hit.Colour)
248 |     else
249 |         ValueNone
250 | 
251 | let rec rayColour objs r depth =
252 |     match objsHit objs r 0.001f 1000000000.0f with
253 |     | ValueSome hit ->
254 |         match scatter r hit with
255 |         | ValueSome (scattered, attenuation) ->
256 |             if depth < 50 then
257 |                 vecMul attenuation (rayColour objs scattered (depth+1))
258 |              else
259 |                 black
260 |          | ValueNone -> black
261 |     | ValueNone ->
262 |         let unitDir = normalise r.Dir
263 |         let t = 0.5f * (unitDir.Y + 1.0f)
264 |         let bg = { X=0.5f; Y=0.7f; Z=1.0f }
265 |         vecAdd (scale (1.0f-t) white) (scale t bg)
266 | 
267 | let inline traceRay objs width height cam j i =
268 |     let u = float32 i / float32 width
269 |     let v = float32 j / float32 height
270 |     let ray = getRay cam u v
271 |     rayColour objs ray 0
272 | 
273 | let colorToPixel p =
274 |     let ir = int (255.99f * p.X)
275 |     let ig = int (255.99f * p.Y)
276 |     let ib = int (255.99f * p.Z)
277 |     struct (ir, ig, ib)
278 | 
279 | [<Struct>]
280 | type Image =
281 |     { Pixels: struct(int * int * int) []
282 |       Height: int
283 |       Width: int }
284 | 
285 | let inline image2ppm img =
286 |     let sb = StringBuilder()
287 |     let inline onPixel (struct(r,g,b)) =
288 |         sb.Append(string r + " " +
289 |                   string g + " " +
290 |                   string b + "\n")
291 |     ignore (sb.Append("P3\n" +
292 |                       string img.Width + " " + string img.Height + "\n" +
293 |                       "255\n"))
294 |     for pixel in img.Pixels do ignore (onPixel pixel)
295 |     sb.ToString()
296 | 
297 | let inline render objs width height cam =
298 |     let inline pixel l =
299 |         let i = l % width
300 |         let j = height - l / width
301 |         colorToPixel (traceRay objs width height cam j i)
302 | 
303 |     let pixels = Array.Parallel.init (height*width) pixel
304 |     
305 |     { Width = width
306 |       Height = height
307 |       Pixels = pixels }
308 | 
309 | [<Struct>]
310 | type Scene =
311 |     { LookFrom: Pos
312 |       LookAt: Pos
313 |       FOV: float32
314 |       Spheres: Sphere [] }
315 | 
316 | let inline fromScene width height scene =
317 |     struct (mkBvh sphereAABB scene.Spheres,
318 |             camera scene.LookFrom scene.LookAt { X=0.0f; Y=1.0f; Z=0.0f } scene.FOV (float32 width/float32 height))
319 | 
320 | let inline tabulate2D m n f =
321 |     Array.collect (fun j -> Array.map (fun i -> f (j, i)) ([| 0 .. n-1 |])) ([| 0 .. m-1|])
322 | 
323 | let rgbbox : Scene =
324 |     let n = 10
325 |     let k = 60.0f
326 | 
327 |     let leftwall =
328 |         tabulate2D n n (fun (y, z) ->
329 |                             { Pos={X=(-k/2.0f);
330 |                                    Y=(-k/2.0f + (k/float32 n) * float32 y);
331 |                                    Z=(-k/2.0f + (k/float32 n) * float32 z)}
332 |                               Colour={X=1.0f; Y=0.0f; Z=0.0f}
333 |                               Radius = (k/(float32 n*2.0f))})
334 | 
335 |     let midwall =
336 |         tabulate2D n n (fun (x,y) ->
337 |                             { Pos={X=(-k/2.0f + (k/float32 n) * float32 x);
338 |                                    Y=(-k/2.0f + (k/float32 n) * float32 y);
339 |                                    Z=(-k/2.0f)}
340 |                               Colour={X=1.0f; Y=1.0f; Z=0.0f}
341 |                               Radius = (k/(float32 n*2.0f))})
342 | 
343 |     let rightwall =
344 |         tabulate2D n n (fun (y,z) ->
345 |                             { Pos={X=(k/2.0f);
346 |                                    Y=(-k/2.0f + (k/float32 n) * float32 y);
347 |                                    Z=(-k/2.0f + (k/float32 n) * float32 z)}
348 |                               Colour={X=0.0f; Y=0.0f; Z=1.0f}
349 |                               Radius = (k/(float32 n*2.0f))})
350 | 
351 | 
352 |     let bottom =
353 |         tabulate2D n n (fun (x,z) ->
354 |                             { Pos={X=(-k/2.0f + (k/float32 n) * float32 x);
355 |                                    Y=(-k/2.0f);
356 |                                    Z=(-k/2.0f + (k/float32 n) * float32 z)}
357 |                               Colour={X=1.0f; Y=1.0f; Z=1.0f}
358 |                               Radius = (k/(float32 n*2.0f))})
359 | 
360 | 
361 |     { Spheres =  [| yield! leftwall; yield! midwall; yield! rightwall; yield! bottom |]
362 |       LookFrom = {X=0.0f; Y=30.0f; Z=30.0f}
363 |       LookAt = {X=0.0f; Y= -1.0f; Z= -1.0f}
364 |       FOV = 75.0f }
365 | 
366 | let irreg : Scene =
367 |     let n = 100
368 |     let k = 600.0f
369 |     let bottom =
370 |         tabulate2D n n (fun (x,z) ->
371 |                             { Pos={X=(-k/2.0f + (k/float32 n) * float32 x);
372 |                                    Y=0.0f;
373 |                                    Z=(-k/2.0f + (k/float32 n) * float32 z)}
374 |                               Colour = white
375 |                               Radius = k/(float32 n * 2.0f)})
376 |     { Spheres = bottom
377 |       LookFrom = { X=0.0f; Y=12.0f; Z=30.0f }
378 |       LookAt = { X=0.0f; Y=10.0f; Z= -1.0f }
379 |       FOV = 75.0f }
380 | 
381 | let rec getopt needle argv f def =
382 |     match argv with
383 |     | opt::x::xs ->
384 |         if opt = needle
385 |         then f x else getopt needle (x::xs) f def
386 |     | _ -> def
387 | 
388 | // Call a function 'n' times.  Let's hope the compiler doesn't
389 | // optimise away the repeated applications.
390 | let rec repeat n f =
391 |     match n with
392 |     | 1 -> f ()
393 |     | n -> let _ = f () in repeat (n-1) f
394 | 
395 | [<EntryPoint>]
396 | let main argv =
397 |     let height = getopt "-m" (Array.toList argv) int 200
398 |     let width = getopt "-n" (Array.toList argv) int 200
399 |     let imgfile = getopt "-f" (Array.toList argv) Some None
400 |     let sceneName = getopt "-s" (Array.toList argv) id "rgbbox"
401 |     let runs = getopt "-r" (Array.toList argv) int 10
402 | 
403 |     let scene =
404 |         match sceneName with
405 |         | "rgbbox" -> rgbbox
406 |         | "irreg" -> irreg
407 |         | s -> failwith ("No such scene: " + s)
408 |     printfn "Using scene '%s' (-s to switch)." sceneName
409 | 
410 |     printfn "Using %d warmup runs before benchmarking (-r to change)." runs
411 | 
412 |     let w = Stopwatch()
413 | 
414 |     // Warmup
415 |     repeat runs (fun () -> fromScene width height scene) |> ignore
416 | 
417 |     w.Restart()
418 |     let struct (objs, cam) = fromScene width height scene
419 |     w.Stop()
420 |     printfn "Scene BVH construction in %fs." w.Elapsed.TotalSeconds
421 | 
422 |     // Warmup
423 |     repeat runs (fun () -> render objs width height cam) |> ignore
424 | 
425 |     w.Restart()
426 |     let result = render objs width height cam
427 |     w.Stop()
428 |     printfn "Rendering in %fs." w.Elapsed.TotalSeconds
429 | 
430 |     match imgfile with
431 |     | None ->
432 |         printfn "-f not passed, so not writing image to file."
433 |     | Some imgfile' ->
434 |         printfn "Writing image to %s." imgfile';
435 |         System.IO.File.WriteAllText(imgfile', image2ppm result)
436 |     0
437 | 


--------------------------------------------------------------------------------
/fsharp/shell.nix:
--------------------------------------------------------------------------------
1 | with import (builtins.fetchTarball {
2 |   url = "https://github.com/NixOS/nixpkgs/archive/820177eded91f3908cfc72dfee00e831ea3d0060.zip";
3 |   sha256 = "1yqx5zy154f8057inwjp2ranizgilvpblqq31cy7nryrwj2apics";
4 | }) {};
5 | stdenv.mkDerivation {
6 |     name = "ray";
7 |     buildInputs = [ dotnet-sdk_3 gnumake ];
8 | }
9 | 


--------------------------------------------------------------------------------
/fsharp/trace.fsproj:
--------------------------------------------------------------------------------
 1 | ﻿<Project Sdk="Microsoft.NET.Sdk">
 2 | 
 3 |   <PropertyGroup>
 4 |     <OutputType>Exe</OutputType>
 5 |     <TargetFramework>netcoreapp3.1</TargetFramework>
 6 |     <ServerGarbageCollection>true</ServerGarbageCollection>
 7 |   </PropertyGroup>
 8 | 
 9 |   <ItemGroup>
10 |     <Compile Include="ray.fs" />
11 |   </ItemGroup>
12 | 
13 | </Project>
14 | 


--------------------------------------------------------------------------------
/futhark/.gitignore:
--------------------------------------------------------------------------------
1 | ray.c
2 | ray.h
3 | main
4 | *.ppm
5 | 


--------------------------------------------------------------------------------
/futhark/Makefile:
--------------------------------------------------------------------------------
 1 | CC?=clang
 2 | FUTHARK_BACKEND?=multicore
 3 | 
 4 | ifeq ($(FUTHARK_BACKEND),opencl)
 5 | CFLAGS=-O3 -std=c99
 6 | LDFLAGS=-lm -lOpenCL
 7 | else ifeq ($(FUTHARK_BACKEND),multicore)
 8 | CFLAGS=-O3 -std=c99
 9 | LDFLAGS=-lm -lpthread
10 | else ifeq ($(FUTHARK_BACKEND),cuda)
11 | CFLAGS=-O3 -std=c99 -lm
12 | LDFLAGS=-lcuda -lnvrtc
13 | else
14 | CFLAGS=-O3 -std=c99 -lm
15 | LDFLAGS=-lm
16 | endif
17 | 
18 | all: rgbbox_1000.ppm irreg_1000.ppm
19 | 
20 | main: main.c ray.o
21 | 	$(CC) -o main $(CFLAGS) $(LDFLAGS) $^ -Wall -Wextra -pedantic
22 | 
23 | ray.o: ray.c
24 | 	$(CC) -c $(CFLAGS) $^
25 | 
26 | ray.c: *.fut
27 | 	futhark $(FUTHARK_BACKEND) --library ray.fut
28 | 
29 | rgbbox_1000.ppm: main
30 | 	./main -f $@ -s rgbbox -n 1000 -m 1000
31 | 
32 | irreg_1000.ppm: main
33 | 	./main -f $@ -s irreg -n 1000 -m 1000
34 | 
35 | clean:
36 | 	rm -f ray.h ray.c main
37 | 


--------------------------------------------------------------------------------
/futhark/README.md:
--------------------------------------------------------------------------------
 1 | # Futhark implementation
 2 | 
 3 | ## TL;DR
 4 | 
 5 | ```
 6 | $ make
 7 | ```
 8 | 
 9 | or
10 | 
11 | ```
12 | $ make BACKEND=multicore CC=clang
13 | ```
14 | 
15 | ## Details
16 | 
17 | Because we want to benchmark the rendering time independently of the
18 | BVH construction and scene creation, we don't use the normal `futhark
19 | bench` tool.  Instead we have a C program, `main.c` that calls the
20 | Futhark program as a library.  Run `make` to build and then benchmark
21 | with e.g.
22 | 
23 | ```
24 | $ ./main -n 1000 -m 1000 -s rgbbox -f rgbbox.ppm
25 | ```
26 | 
27 | This will also create an output image.
28 | 
29 | Use the environment variable `FUTHARK_BACKEND` to select a different
30 | Futhark backend (requires a recompile).
31 | 


--------------------------------------------------------------------------------
/futhark/bvh.fut:
--------------------------------------------------------------------------------
 1 | import "lib/github.com/diku-dk/sorts/radix_sort"
 2 | 
 3 | import "radixtree"
 4 | import "prim"
 5 | 
 6 | -- | Expands a 10-bit integer into 30 bits by inserting 2 zeros after
 7 | -- each bit.
 8 | let expand_bits (v: u32) : u32 =
 9 |   let v = (v * 0x00010001) & 0xFF0000FF
10 |   let v = (v * 0x00000101) & 0x0F00F00F
11 |   let v = (v * 0x00000011) & 0xC30C30C3
12 |   let v = (v * 0x00000005) & 0x49249249
13 |   in v
14 | 
15 | let morton_3D {x,y,z} : u32 =
16 |   let x = f32.min (f32.max(x * 1024) 0) 1023
17 |   let y = f32.min (f32.max(y * 1024) 0) 1023
18 |   let z = f32.min (f32.max(z * 1024) 0) 1023
19 |   let xx = expand_bits (u32.f32 x)
20 |   let yy = expand_bits (u32.f32 y)
21 |   let zz = expand_bits (u32.f32 z)
22 |   in xx * 4 + yy * 2 + zz
23 | 
24 | type ptr = #leaf i32 | #inner i32
25 | 
26 | type inner = {aabb: aabb, left:ptr, right:ptr, parent:i32}
27 | 
28 | type~ bvh [n] 't = {L: [n]t, I: []inner}
29 | 
30 | let bvh_mk [n] 't (bbf: t -> aabb) (ts: [n]t) : bvh [n] t =
31 |   let centers = map (bbf >-> centre) ts
32 |   let x_max = f32.maximum (map (.x) centers)
33 |   let y_max = f32.maximum (map (.y) centers)
34 |   let z_max = f32.maximum (map (.z) centers)
35 |   let x_min = f32.minimum (map (.x) centers)
36 |   let y_min = f32.minimum (map (.y) centers)
37 |   let z_min = f32.minimum (map (.z) centers)
38 |   let normalise {x,y,z} = {x=(x-x_min)/(x_max-x_min),
39 |                            y=(y-y_min)/(y_max-y_min),
40 |                            z=(z-z_min)/(z_max-z_min)}
41 |   let morton = bbf >-> centre >-> normalise >-> morton_3D
42 | 
43 |   let ts = radix_sort_by_key morton u32.num_bits u32.get_bit ts
44 |   let empty_aabb = {min = vec(0,0,0), max = vec(0,0,0)}
45 |   let empty_aabb {left, right, parent} = {aabb=empty_aabb, left, right, parent}
46 |   let inners = map empty_aabb (mk_radix_tree (map morton ts))
47 |   let depth = t32 (f32.log2 (f32.i64 n)) + 2
48 |   let get_aabb inners ptr =
49 |     match ptr
50 |     case #leaf i -> bbf (#[unsafe] ts[i])
51 |     case #inner i -> #[unsafe] inners[i].aabb
52 |   let update inners {aabb=_, left, right, parent} =
53 |     {aabb = enclosing (get_aabb inners left) (get_aabb inners right),
54 |      left,
55 |      right,
56 |      parent}
57 |   let inners = loop inners for _i < depth do
58 |                map (update inners) inners
59 |   in {L = ts, I = inners}
60 | 
61 | let bvh_fold [n] 'a 'b (contains: aabb -> bool) (op: b -> i32 -> a -> b) (init: b) (t: bvh [n] a) : b =
62 |   (.0) <|
63 |   loop (acc, cur, prev) = (init, 0, #inner (-1))
64 |   while cur != -1 do
65 |   let node = #[unsafe] t.I[cur]
66 |   let from_left = prev == node.left
67 |   let from_right = prev == node.right
68 |   let rec_child : #rec ptr | #norec =
69 |     -- Did we return from left node?
70 |     if from_left
71 |     then #rec node.right
72 |     -- First encounter and in this BB?
73 |     else if !from_right
74 |     then if contains node.aabb
75 |          then #rec node.left
76 |          else #norec
77 |     else #norec
78 |   in match rec_child
79 |      case #norec ->
80 |        (acc, node.parent, #inner cur)
81 |      case #rec ptr ->
82 |        match ptr
83 |        case #inner i -> (acc, i, #inner cur)
84 |        case #leaf i -> (op acc i (#[unsafe] t.L[i]), cur, ptr)
85 | 


--------------------------------------------------------------------------------
/futhark/futhark.pkg:
--------------------------------------------------------------------------------
1 | require {
2 |   github.com/diku-dk/sorts 0.4.0 #94b33a2ba7b4c6669da70cb688fd6d1e5b31aecb
3 | }
4 | 


--------------------------------------------------------------------------------
/futhark/lib/github.com/diku-dk/segmented/segmented.fut:
--------------------------------------------------------------------------------
  1 | -- | Irregular segmented operations, like scans and reductions.
  2 | 
  3 | -- | Segmented scan. Given a binary associative operator ``op`` with
  4 | -- neutral element ``ne``, computes the inclusive prefix scan of the
  5 | -- segments of ``as`` specified by the ``flags`` array, where `true`
  6 | -- starts a segment and `false` continues a segment.
  7 | let segmented_scan [n] 't (op: t -> t -> t) (ne: t)
  8 |                           (flags: [n]bool) (as: [n]t): [n]t =
  9 |   (unzip (scan (\(x_flag,x) (y_flag,y) ->
 10 |                 (x_flag || y_flag,
 11 |                  if y_flag then y else x `op` y))
 12 |           (false, ne)
 13 |           (zip flags as))).1
 14 | 
 15 | -- | Segmented reduction. Given a binary associative operator ``op``
 16 | -- with neutral element ``ne``, computes the reduction of the segments
 17 | -- of ``as`` specified by the ``flags`` array, where `true` starts a
 18 | -- segment and `false` continues a segment.  One value is returned per
 19 | -- segment.
 20 | let segmented_reduce [n] 't (op: t -> t -> t) (ne: t)
 21 |                             (flags: [n]bool) (as: [n]t) =
 22 |   -- Compute segmented scan.  Then we just have to fish out the end of
 23 |   -- each segment.
 24 |   let as' = segmented_scan op ne flags as
 25 |   -- Find the segment ends.
 26 |   let segment_ends = rotate 1 flags
 27 |   -- Find the offset for each segment end.
 28 |   let segment_end_offsets = segment_ends |> map i64.bool |> scan (+) 0
 29 |   let num_segments = if n > 0 then last segment_end_offsets else 0
 30 |   -- Make room for the final result.  The specific value we write here
 31 |   -- does not matter; they will all be overwritten by the segment
 32 |   -- ends.
 33 |   let scratch = replicate num_segments ne
 34 |   -- Compute where to write each element of as'.  Only segment ends
 35 |   -- are written.
 36 |   let index i f = if f then i-1 else -1
 37 |   in scatter scratch (map2 index segment_end_offsets segment_ends) as'
 38 | 
 39 | -- | Replicated iota. Given a repetition array, the function returns
 40 | -- an array with each index (starting from 0) repeated according to
 41 | -- the repetition array. As an example, replicated_iota [2,3,1]
 42 | -- returns the array [0,0,1,1,1,2].
 43 | 
 44 | let replicated_iota [n] (reps:[n]i64) : []i64 =
 45 |   let s1 = scan (+) 0 reps
 46 |   let s2 = map2 (\i x -> if i==0 then 0 else x)
 47 |                 (iota n) (rotate (-1) s1)
 48 |   let tmp = reduce_by_index (replicate (reduce (+) 0 reps) 0) i64.max 0 s2 (iota n)
 49 |   let flags = map (>0) tmp
 50 |   in segmented_scan (+) 0 flags tmp
 51 | 
 52 | -- | Segmented iota. Given a flags array, the function returns an
 53 | -- array of index sequences, each of which is reset according to the
 54 | -- flags array. As an examples, segmented_iota
 55 | -- [false,false,false,true,false,false,false] returns the array
 56 | -- [0,1,2,0,1,2,3].
 57 | 
 58 | let segmented_iota [n] (flags:[n]bool) : [n]i64 =
 59 |   let iotas = segmented_scan (+) 0 flags (replicate n 1)
 60 |   in map (\x -> x-1) iotas
 61 | 
 62 | -- | Generic expansion function. The function expands a source array
 63 | -- into a target array given (1) a function that determines, for each
 64 | -- source element, how many target elements it expands to and (2) a
 65 | -- function that computes a particular target element based on a
 66 | -- source element and the target element number associated with the
 67 | -- source. As an example, the expression expand (\x->x) (*) [2,3,1]
 68 | -- returns the array [0,2,0,3,6,0].
 69 | 
 70 | let expand 'a 'b (sz: a -> i64) (get: a -> i64 -> b) (arr:[]a) : []b =
 71 |   let szs = map sz arr
 72 |   let idxs = replicated_iota szs
 73 |   let iotas = segmented_iota (map2 (!=) idxs (rotate (i64.negate 1) idxs))
 74 |   in map2 (\i j -> get arr[i] j) idxs iotas
 75 | 
 76 | -- | Expansion function equivalent to performing a segmented reduction
 77 | -- to the result of a general expansion with a flags vector expressing
 78 | -- the beginning of the expanded segments. The function makes use of
 79 | -- the intermediate flags vector generated as part of the expansion
 80 | -- and the `expand_reduce` function is therefore more efficient than
 81 | -- if a segmented reduction (with an appropriate flags vector) is
 82 | -- explicitly followed by a call to expand.
 83 | 
 84 | let expand_reduce 'a 'b (sz: a -> i64) (get: a -> i64 -> b)
 85 |                         (op: b -> b -> b) (ne:b) (arr:[]a) : []b =
 86 |   let szs = map sz arr
 87 |   let idxs = replicated_iota szs
 88 |   let flags = map2 (!=) idxs (rotate (i64.negate 1) idxs)
 89 |   let iotas = segmented_iota flags
 90 |   let vs = map2 (\i j -> get arr[i] j) idxs iotas
 91 |   in segmented_reduce op ne flags vs
 92 | 
 93 | -- | Expansion followed by an ''outer segmented reduce'' that ensures
 94 | -- that each element in the result array corresponds to expanding and
 95 | -- reducing the corresponding element in the source array.
 96 | 
 97 | let expand_outer_reduce 'a 'b [n] (sz: a -> i64) (get: a -> i64 -> b)
 98 |                                   (op: b -> b -> b) (ne: b)
 99 |                                   (arr: [n]a) : [n]b =
100 |   let sz' x = let s = sz x
101 |               in if s == 0 then 1 else s
102 |   let get' x i = if sz x == 0 then ne else get x i
103 |   in expand_reduce sz' get' op ne arr :> [n]b
104 | 


--------------------------------------------------------------------------------
/futhark/lib/github.com/diku-dk/segmented/segmented_tests.fut:
--------------------------------------------------------------------------------
 1 | -- | ignore
 2 | 
 3 | import "segmented"
 4 | 
 5 | -- ==
 6 | -- entry: test_segmented_scan
 7 | -- input { [true,false,false,true,false,false,true,false,false,false]
 8 | --         [1i64,2i64,3i64,4i64,5i64,6i64,7i64,8i64,9i64,10i64] }
 9 | -- output { [1i64,3i64,6i64,4i64,9i64,15i64,7i64,15i64,24i64,34i64] }
10 | -- input { [true] [1i64] }
11 | -- output { [1i64] }
12 | -- input { empty([0]bool) empty([0]i64) }
13 | -- output { empty([0]i64) }
14 | 
15 | entry test_segmented_scan (flags: []bool) (as: []i64) =
16 |   segmented_scan (+) 0 flags as
17 | 
18 | -- ==
19 | -- entry: test_segmented_reduce
20 | -- input { [true,false,false,true,false,false,true,false,false,false]
21 | --         [1i64,2i64,3i64,4i64,5i64,6i64,7i64,8i64,9i64,10i64] }
22 | -- output { [6i64,15i64,34i64] }
23 | -- input { [true] [1i64] }
24 | -- output { [1i64] }
25 | 
26 | entry test_segmented_reduce (flags: []bool) (as: []i64) =
27 |   segmented_reduce (+) 0 flags as
28 | 
29 | -- ==
30 | -- entry: test_replicated_iota
31 | -- input { [2i64,3i64,1i64] } output { [0i64,0i64,1i64,1i64,1i64,2i64] }
32 | -- input { [3i64] } output { [0i64,0i64,0i64] }
33 | -- input { [2i64,0i64,1i64] } output { [0i64,0i64,2i64] }
34 | -- input { empty([0]i64) } output { empty([0]i64) }
35 | -- input { [0i64] } output { empty([0]i64) }
36 | -- input { [0i64,0i64] } output { empty([0]i64) }
37 | 
38 | entry test_replicated_iota (repl:[]i64) : []i64 =
39 |   replicated_iota repl
40 | 
41 | -- ==
42 | -- entry: test_segmented_iota
43 | -- input { [false,false,false,true,false,false,false] }
44 | -- output { [0i64,1i64,2i64,0i64,1i64,2i64,3] }
45 | -- input { [false] } output { [0i64] }
46 | -- input { [true] } output { [0i64] }
47 | -- input { empty([0]bool) } output { empty([0]i64) }
48 | 
49 | entry test_segmented_iota (flags:[]bool) : []i64 =
50 |   segmented_iota flags
51 | 
52 | -- ==
53 | -- entry: test_expand
54 | -- input { [2i64,3i64,1i64] }
55 | -- output { [0i64,2i64,0i64,3i64,6i64,0i64] }
56 | 
57 | entry test_expand (arr:[]i64) : []i64 =
58 |   expand (\ x -> x) (\x i -> x*i) arr
59 | 
60 | -- ==
61 | -- entry: test_expand_reduce
62 | -- input { [2i64,0i64,3i64,1i64] }
63 | -- output { [2i64,9i64,0i64] }
64 | 
65 | entry test_expand_reduce (arr:[]i64) : []i64 =
66 |   expand_reduce (\ x -> x) (\x i -> x*i) (+) 0 arr
67 | 
68 | -- ==
69 | -- entry: test_expand_outer_reduce
70 | -- input { [2i64,0i64,3i64,1i64] }
71 | -- output { [2i64,0i64,9i64,0i64] }
72 | 
73 | entry test_expand_outer_reduce (arr:[]i64) : []i64 =
74 |   expand_outer_reduce (\ x -> x) (\x i -> x*i) (+) 0 arr
75 | 


--------------------------------------------------------------------------------
/futhark/lib/github.com/diku-dk/sorts/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !*.fut
3 | !.gitignore
4 | 


--------------------------------------------------------------------------------
/futhark/lib/github.com/diku-dk/sorts/bubble_sort.fut:
--------------------------------------------------------------------------------
 1 | -- | Parallel bubble sort.
 2 | --
 3 | -- This may be useful if you have almost-sorted data that you want to
 4 | -- make fully-sorted in parallel.  Obviously *very* slow for
 5 | -- non-sorted data.
 6 | 
 7 | -- | Parallel bubble sort.  Runs with *O(n^2)* work and *O(n^2)* depth.
 8 | let bubble_sort [n] 't ((<=): t -> t -> bool) (xs: [n]t): [n]t =
 9 |   let f b xs i =
10 |     let dir = if i%2 == 0 then b else -b
11 |     let j = i + dir
12 |     let cmp x y = if dir == 1 then x <= y
13 |                   else ! (x <= y)
14 |     in if j >= 0 && j < n && (xs[j] `cmp` xs[i])
15 |        then (true, xs[j]) else (false, xs[i])
16 |   let iter xs b =
17 |     let (changed, xs) = tabulate n (f b xs) |> unzip
18 |     in (xs, -b, or changed)
19 |   in (loop (xs, b, continue) = (xs, 1, true) while continue do iter xs b).0
20 | 
21 | -- | Like `bubble_sort`@term, but sort based on key function.
22 | let bubble_sort_by_key [n] 't 'k (key: t -> k) ((<=): k -> k -> bool) (xs: [n]t): [n]t =
23 |   zip (map key xs) (iota n)
24 |   |> bubble_sort (\(x, _) (y, _) -> x <= y)
25 |   |> map (\(_, i) -> xs[i])
26 | 


--------------------------------------------------------------------------------
/futhark/lib/github.com/diku-dk/sorts/bubble_sort_tests.fut:
--------------------------------------------------------------------------------
 1 | -- | ignore
 2 | 
 3 | import "bubble_sort"
 4 | 
 5 | -- ==
 6 | -- entry: sort_i32
 7 | -- input { empty([0]i32) }
 8 | -- output { empty([0]i32) }
 9 | -- input { [5,4,3,2,1] }
10 | -- output { [1,2,3,4,5] }
11 | -- input { [5,4,3,3,2,1] }
12 | -- output { [1,2,3,3,4,5] }
13 | 
14 | entry sort_i32 (xs: []i32) = bubble_sort (i32.<=) xs
15 | 
16 | -- ==
17 | -- entry: sort_u16
18 | -- input { [5u16,4u16,3u16,2u16,1u16] }
19 | -- output { [1u16,2u16,3u16,4u16,5u16] }
20 | 
21 | entry sort_u16 (xs: []u16) = bubble_sort (u16.<=) xs
22 | 
23 | -- ==
24 | -- entry: sort_f32
25 | -- input { [5f32,4f32,3f32,2f32,1f32] }
26 | -- output { [1f32,2f32,3f32,4f32,5f32] }
27 | 
28 | entry sort_f32 (xs: []f32) = bubble_sort (f32.<=) xs
29 | 
30 | -- ==
31 | -- entry: sort_perm_i32
32 | -- input { [5,4,3,2,1,0,-1,-2] }
33 | -- output { [7, 6, 5, 4, 3, 2, 1, 0] }
34 | 
35 | entry sort_perm_i32 [n] (xs: [n]i32) =
36 |   zip xs (iota n)
37 |   |> bubble_sort_by_key (.0) (<=)
38 |   |> map ((.1) >-> i32.i64)
39 | 
40 | -- ==
41 | -- entry: sort_perm_f32
42 | -- input { [5f32,4f32,3f32,2f32,1f32,0f32,-1f32,-2f32] }
43 | -- output { [7, 6, 5, 4, 3, 2, 1, 0] }
44 | 
45 | entry sort_perm_f32 [n] (xs: [n]f32) =
46 |   zip xs (iota n)
47 |   |> bubble_sort_by_key (.0) (<=)
48 |   |> map ((.1) >-> i32.i64)
49 | 


--------------------------------------------------------------------------------
/futhark/lib/github.com/diku-dk/sorts/insertion_sort.fut:
--------------------------------------------------------------------------------
 1 | -- | A sequential implementation of insertion sort.
 2 | 
 3 | 
 4 | local
 5 | let swap 't [n] (i: i64) (j: i64) (xs: *[n]t): *[n]t =
 6 |   -- Need copies to prevent the uniqueness checker from getting
 7 |   -- cranky.
 8 |   let xi = copy xs[i]
 9 |   let xs[i] = copy xs[j]
10 |   let xs[j] = xi
11 |   in xs
12 | 
13 | -- | Insertion sort.  Runs with *O(n^2)* work and *O(n^2)* depth.
14 | let insertion_sort [n] 't ((<=): t -> t -> bool) (xs: [n]t): *[n]t =
15 |   -- Make a copy of the array so we can operate in-place.
16 |   loop xs = copy xs for i in 1..< i64.max n 1 do
17 |     -- Construct our own greather-than function out of <=.
18 |     let gt x y = !(x <= y)
19 |     let (_, xs') = loop (j, xs) = (i, xs) while j > 0 && (xs[j-1] `gt` xs[j]) do
20 |                      (j-1, swap j (j-1) xs)
21 |     in xs'
22 | 
23 | 
24 | -- | Like `insertion_sort`, but sort based on key function.
25 | let insertion_sort_by_key [n] 't 'k (key: t -> k) ((<=): k -> k -> bool) (xs: [n]t): [n]t =
26 |   zip (map key xs) (iota n)
27 |   |> insertion_sort (\(x, _) (y, _) -> x <= y)
28 |   |> map (\(_, i) -> xs[i])
29 | 


--------------------------------------------------------------------------------
/futhark/lib/github.com/diku-dk/sorts/insertion_sort_tests.fut:
--------------------------------------------------------------------------------
 1 | -- | ignore
 2 | 
 3 | import "insertion_sort"
 4 | 
 5 | -- ==
 6 | -- entry: sort_i32
 7 | -- input { empty([0]i32) }
 8 | -- output { empty([0]i32) }
 9 | -- input { [5,4,3,2,1] }
10 | -- output { [1,2,3,4,5] }
11 | -- input { [5,4,3,3,2,1] }
12 | -- output { [1,2,3,3,4,5] }
13 | 
14 | entry sort_i32 (xs: []i32) = insertion_sort (i32.<=) xs
15 | 
16 | -- ==
17 | -- entry: sort_u16
18 | -- input { [5u16,4u16,3u16,2u16,1u16] }
19 | -- output { [1u16,2u16,3u16,4u16,5u16] }
20 | 
21 | entry sort_u16 (xs: []u16) = insertion_sort (u16.<=) xs
22 | 
23 | -- ==
24 | -- entry: sort_f32
25 | -- input { [5f32,4f32,3f32,2f32,1f32] }
26 | -- output { [1f32,2f32,3f32,4f32,5f32] }
27 | 
28 | entry sort_f32 (xs: []f32) = insertion_sort (f32.<=) xs
29 | 
30 | -- ==
31 | -- entry: sort_perm_i32
32 | -- input { [5,4,3,2,1,0,-1,-2] }
33 | -- output { [7, 6, 5, 4, 3, 2, 1, 0] }
34 | 
35 | entry sort_perm_i32 [n] (xs: [n]i32) =
36 |   zip xs (iota n)
37 |   |> insertion_sort_by_key (.0) (<=)
38 |   |> map ((.1) >-> i32.i64)
39 | 
40 | -- ==
41 | -- entry: sort_perm_f32
42 | -- input { [5f32,4f32,3f32,2f32,1f32,0f32,-1f32,-2f32] }
43 | -- output { [7, 6, 5, 4, 3, 2, 1, 0] }
44 | 
45 | entry sort_perm_f32 [n] (xs: [n]f32) =
46 |   zip xs (iota n)
47 |   |> insertion_sort_by_key (.0) (<=)
48 |   |> map ((.1) >-> i32.i64)
49 | 


--------------------------------------------------------------------------------
/futhark/lib/github.com/diku-dk/sorts/merge_sort.fut:
--------------------------------------------------------------------------------
 1 | -- | Bitonic merge sort.
 2 | --
 3 | -- Runs in *O(n log²(n))* work and *O(log²(n))* span.  Internally pads
 4 | -- the array to the next power of two, so a poor fit for some array
 5 | -- sizes.
 6 | 
 7 | local let log2 (n: i64) : i64 =
 8 |   let r = 0
 9 |   let (r, _) = loop (r,n) while 1 < n do
10 |     let n = n / 2
11 |     let r = r + 1
12 |     in (r,n)
13 |   in r
14 | 
15 | local let ensure_pow_2 [n] 't ((<=): t -> t -> bool) (xs: [n]t): (*[]t, i64) =
16 |   if n == 0 then (copy xs, 0) else
17 |   let d = log2 n
18 |   in if n == 2**d
19 |      then (copy xs, d)
20 |      else let largest = reduce (\x y -> if x <= y then y else x) xs[0] xs
21 |           in (concat xs (replicate (2**(d+1) - n) largest),
22 |               d+1)
23 | 
24 | local let kernel_par [n] 't ((<=): t -> t -> bool) (a: *[n]t) (p: i64) (q: i64) : *[n]t =
25 |   let d = 1 << (p-q) in
26 |   tabulate n (\i -> let a_i = a[i]
27 |                     let up1 = ((i >> p) & 2) == 0
28 |                     in
29 |                     if (i & d) == 0
30 |                     then let a_iord = a[i | d] in
31 |                          if a_iord <= a_i == up1
32 |                          then a_iord else a_i
33 |                     else let a_ixord = a[i ^ d] in
34 |                          if a_i <= a_ixord == up1
35 |                          then a_ixord else a_i)
36 | 
37 | -- | Sort an array in increasing order.
38 | let merge_sort [n] 't ((<=): t -> t -> bool) (xs: [n]t): *[n]t =
39 |   -- We need to pad the array so that its size is a power of 2.  We do
40 |   -- this by first finding the largest element in the input, and then
41 |   -- using that for the padding.  Then we know that the padding will
42 |   -- all be at the end, so we can easily cut it off.
43 |   let (xs, d) = ensure_pow_2 (<=) xs
44 |   in (loop xs for i < d do
45 |         loop xs for j < i+1 do kernel_par (<=) xs i j)[:n]
46 | 
47 | -- | Like `merge_sort`, but sort based on key function.
48 | let merge_sort_by_key [n] 't 'k (key: t -> k) ((<=): k -> k -> bool) (xs: [n]t): [n]t =
49 |   zip (map key xs) (iota n)
50 |   |> merge_sort (\(x, _) (y, _) -> x <= y)
51 |   |> map (\(_, i) -> xs[i])
52 | 


--------------------------------------------------------------------------------
/futhark/lib/github.com/diku-dk/sorts/merge_sort_tests.fut:
--------------------------------------------------------------------------------
 1 | -- | ignore
 2 | 
 3 | import "merge_sort"
 4 | 
 5 | -- ==
 6 | -- entry: sort_i32
 7 | -- input { empty([0]i32) }
 8 | -- output { empty([0]i32) }
 9 | -- input { [5,4,3,2,1] }
10 | -- output { [1,2,3,4,5] }
11 | -- input { [5,4,3,3,2,1] }
12 | -- output { [1,2,3,3,4,5] }
13 | 
14 | entry sort_i32 (xs: []i32) = merge_sort (i32.<=) xs
15 | 
16 | -- ==
17 | -- entry: sort_u16
18 | -- input { [5u16,4u16,3u16,2u16,1u16] }
19 | -- output { [1u16,2u16,3u16,4u16,5u16] }
20 | 
21 | entry sort_u16 (xs: []u16) = merge_sort (u16.<=) xs
22 | 
23 | -- ==
24 | -- entry: sort_f32
25 | -- input { [5f32,4f32,3f32,2f32,1f32] }
26 | -- output { [1f32,2f32,3f32,4f32,5f32] }
27 | 
28 | entry sort_f32 (xs: []f32) = merge_sort (f32.<=) xs
29 | 
30 | -- ==
31 | -- entry: sort_perm_i32
32 | -- input { [5,4,3,2,1,0,-1,-2] }
33 | -- output { [7, 6, 5, 4, 3, 2, 1, 0] }
34 | 
35 | entry sort_perm_i32 [n] (xs: [n]i32) =
36 |   zip xs (iota n)
37 |   |> merge_sort_by_key (.0) (<=)
38 |   |> map ((.1) >-> i32.i64)
39 | 
40 | -- ==
41 | -- entry: sort_perm_f32
42 | -- input { [5f32,4f32,3f32,2f32,1f32,0f32,-1f32,-2f32] }
43 | -- output { [7, 6, 5, 4, 3, 2, 1, 0] }
44 | 
45 | entry sort_perm_f32 [n] (xs: [n]f32) =
46 |   zip xs (iota n)
47 |   |> merge_sort_by_key (.0) (<=)
48 |   |> map ((.1) >-> i32.i64)
49 | 


--------------------------------------------------------------------------------
/futhark/lib/github.com/diku-dk/sorts/quick_sort.fut:
--------------------------------------------------------------------------------
 1 | -- | Data-parallel implementation of quicksort.  Note that this
 2 | -- quicksort, while parallel, is quite slow.  In almost all cases you
 3 | -- should use radix- or merge sort instead.
 4 | 
 5 | local import "../segmented/segmented"
 6 | 
 7 | local let segmented_replicate [n] 't (reps:[n]i64) (vs:[n]t) : []t =
 8 |   let idxs = replicated_iota reps
 9 |   in map (\i -> vs[i]) idxs
10 | 
11 | local let info 't ((<=): t -> t -> bool) (x:t) (y:t) : i64 =
12 |   if x <= y then
13 |      if y <= x then 0 else -1
14 |   else 1
15 | 
16 | local let tripit (x: i64): (i64,i64,i64) =
17 |   if x < 0 then (1,0,0)
18 |   else if x > 0 then (0,0,1) else (0,1,0)
19 | 
20 | local let tripadd (a1:i64,e1:i64,b1:i64) (a2,e2,b2) =
21 |   (a1+a2,e1+e2,b1+b2)
22 | 
23 | local type sgm = {start:i64,sz:i64}  -- segment
24 | 
25 | local let step [n][k] 't ((<=): t -> t -> bool) (xs:*[n]t) (sgms:[k]sgm) : (*[n]t,[]sgm) =
26 |   --let _ = trace {NEW_STEP=()}
27 | 
28 |   -- find a pivot for each segment
29 |   let pivots : []t = map (\sgm -> xs[sgm.start + sgm.sz/2]) sgms
30 |   let sgms_szs : []i64 = map (\sgm -> sgm.sz) sgms
31 |   let idxs = replicated_iota sgms_szs
32 |   let m = length idxs
33 |   let idxs = idxs :> [m]i64
34 | 
35 |   -- find the indexes into values in segments; after a value equal to
36 |   -- a pivot has moved, it will no longer be part of a segment (it
37 |   -- need not be moved again).
38 |   let is =
39 |     let is1 = segmented_replicate sgms_szs (map (\x -> x.start) sgms)
40 |               :> [m]i64
41 |     let fs = map2 (!=) is1 (rotate (i64.negate 1) is1)
42 |     let is2 = segmented_iota fs
43 |     in map2 (+) is1 is2
44 | 
45 |   -- for each such value, how does it compare to the pivot associated
46 |   -- with the segment?
47 |   let infos : []i64 = map2 (\idx i -> info (<=) xs[i] pivots[idx])
48 |                            idxs is
49 |   let orders : [](i64,i64,i64) = map tripit infos
50 | 
51 |   -- compute segment descriptor
52 |   let flags =
53 |     let flags = map2 (!=) idxs (rotate (i64.negate 1) idxs)
54 |     in flags with [0] = true
55 | 
56 |   -- compute partition sizes for each segment
57 |   let pszs = segmented_reduce tripadd (0,0,0) flags orders :> [k](i64,i64,i64)
58 | 
59 |   -- compute the new segments
60 |   let sgms' =
61 |     map2 (\(sgm:sgm) (a,e,b) -> [{start=sgm.start,sz=a},
62 |                                  {start=sgm.start+a+e,sz=b}]) sgms pszs
63 |     |> flatten
64 |     |> filter (\sgm -> sgm.sz > 1)
65 | 
66 |   -- compute the new positions of the values in the present segments
67 |   let newpos : []i64 =
68 |     let where : [](i64,i64,i64) = segmented_scan tripadd (0,0,0) flags orders
69 |     in map3 (\i (a,e,b) info ->
70 |              let (x,y,_) = pszs[i]
71 |              let s = sgms[i].start
72 |              in if info < 0 then s+a-1
73 |                 else if info > 0 then s+b-1+x+y
74 |                 else s+e-1+x)
75 |             idxs where infos
76 | 
77 |   let vs = map (\i -> xs[i]) is
78 |   let xs' = scatter xs newpos vs
79 |   in (xs',sgms')
80 | 
81 | -- | Quicksort. Given a comparison function (<=) and an array of
82 | -- elements, `qsort (<=) xs` returns an array with the elements in
83 | -- `xs` sorted according to `<=`. The algorithm has best case work
84 | -- complexity *O(n)* (when all elements are identical), worst case
85 | -- work complexity *O(n^2)*, and an average case work complexity of
86 | -- *O(n log n)*. It has best depth complexity *O(1)*, worst depth
87 | -- complexity *O(n)* and average depth complexity *O(log n)*.
88 | 
89 | let qsort [n] 't ((<=): t -> t -> bool) (xs:[n]t) : [n]t =
90 |   if n < 2 then xs
91 |   else (loop (xs,mms) = (copy xs,[{start=0,sz=n}]) while length mms > 0 do
92 |           step (<=) xs mms).0
93 | 
94 | -- | Like `qsort`@term, but sort based on key function.
95 | let qsort_by_key [n] 't 'k (key: t -> k) ((<=): k -> k -> bool) (xs: [n]t): [n]t =
96 |   zip (map key xs) (iota n)
97 |   |> qsort (\(x, _) (y, _) -> x <= y)
98 |   |> map (\(_, i) -> xs[i])
99 | 


--------------------------------------------------------------------------------
/futhark/lib/github.com/diku-dk/sorts/quick_sort_test.fut:
--------------------------------------------------------------------------------
 1 | -- | ignore
 2 | 
 3 | import "quick_sort"
 4 | 
 5 | -- ==
 6 | -- entry: sort_i32
 7 | -- input { empty([0]i32) }
 8 | -- output { empty([0]i32) }
 9 | -- input { [5,4,3,2,1] }
10 | -- output { [1,2,3,4,5] }
11 | -- input { [5,4,3,3,2,1] }
12 | -- output { [1,2,3,3,4,5] }
13 | 
14 | entry sort_i32 (xs: []i32) = qsort (i32.<=) xs
15 | 
16 | -- ==
17 | -- entry: sort_u16
18 | -- input { [5u16,4u16,3u16,2u16,1u16] }
19 | -- output { [1u16,2u16,3u16,4u16,5u16] }
20 | 
21 | entry sort_u16 (xs: []u16) = qsort (u16.<=) xs
22 | 
23 | -- ==
24 | -- entry: sort_f32
25 | -- input { [5f32,4f32,3f32,2f32,1f32] }
26 | -- output { [1f32,2f32,3f32,4f32,5f32] }
27 | 
28 | entry sort_f32 (xs: []f32) = qsort (f32.<=) xs
29 | 
30 | -- ==
31 | -- entry: sort_perm_i32
32 | -- input { [5,4,3,2,1,0,-1,-2] }
33 | -- output { [7, 6, 5, 4, 3, 2, 1, 0] }
34 | 
35 | entry sort_perm_i32 [n] (xs: [n]i32) =
36 |   zip xs (iota n)
37 |   |> qsort_by_key (.0) (<=)
38 |   |> map ((.1) >-> i32.i64)
39 | 
40 | -- ==
41 | -- entry: sort_perm_f32
42 | -- input { [5f32,4f32,3f32,2f32,1f32,0f32,-1f32,-2f32] }
43 | -- output { [7, 6, 5, 4, 3, 2, 1, 0] }
44 | 
45 | entry sort_perm_f32 [n] (xs: [n]f32) =
46 |   zip xs (iota n)
47 |   |> qsort_by_key (.0) (<=)
48 |   |> map ((.1) >-> i32.i64)
49 | 


--------------------------------------------------------------------------------
/futhark/lib/github.com/diku-dk/sorts/radix_sort.fut:
--------------------------------------------------------------------------------
  1 | -- | A non-comparison-based sort that sorts an array in *O(k n)* work
  2 | -- and *O(k log(n))* span, where *k* is the number of bits in each element.
  3 | --
  4 | -- Generally, this is the sorting function we recommend for Futhark
  5 | -- programs, but be careful about negative integers (use
  6 | -- `radix_sort_int`@term) and floating-point numbers (use
  7 | -- `radix_sort_float`@term).  If you need a comparison-based sort,
  8 | -- consider `merge_sort`@term@"merge_sort".
  9 | --
 10 | -- ## See Also
 11 | --
 12 | -- * `merge_sort`@term@"merge_sort"
 13 | 
 14 | local let radix_sort_step [n] 't (xs: [n]t) (get_bit: i32 -> t -> i32)
 15 |                                  (digit_n: i32): [n]t =
 16 |   let num x = get_bit (digit_n+1) x * 2 + get_bit digit_n x
 17 |   let pairwise op (a1,b1,c1,d1) (a2,b2,c2,d2) =
 18 |     (a1 `op` a2, b1 `op` b2, c1 `op` c2, d1 `op` d2)
 19 |   let bins = xs |> map num
 20 |   let flags = bins |> map (\x -> if x == 0 then (1,0,0,0)
 21 |                                  else if x == 1 then (0,1,0,0)
 22 |                                  else if x == 2 then (0,0,1,0)
 23 |                                  else (0,0,0,1))
 24 |   let offsets = scan (pairwise (+)) (0,0,0,0) flags
 25 |   let (na,nb,nc,_nd) = last offsets
 26 |   let f bin (a,b,c,d) = match bin
 27 |                         case 0 -> a-1
 28 |                         case 1 -> na+b-1
 29 |                         case 2 -> na+nb+c-1
 30 |                         case _ -> na+nb+nc+d-1
 31 |   let is = map2 f bins offsets
 32 |   in scatter (copy xs) is xs
 33 | 
 34 | -- | The `num_bits` and `get_bit` arguments can be taken from one of
 35 | -- the numeric modules of module type `integral`@mtype@"/futlib/math"
 36 | -- or `float`@mtype@"/futlib/math", such as `i32`@term@"/futlib/math"
 37 | -- or `f64`@term@"/futlib/math".  However, if you know that
 38 | -- the input array only uses lower-order bits (say, if all integers
 39 | -- are less than 100), then you can profitably pass a smaller
 40 | -- `num_bits` value to reduce the number of sequential iterations.
 41 | --
 42 | -- **Warning:** while radix sort can be used with numbers, the bitwise
 43 | -- representation of of both integers and floats means that negative
 44 | -- numbers are sorted as *greater* than non-negative.  Negative floats
 45 | -- are further sorted according to their absolute value.  For example,
 46 | -- radix-sorting `[-2.0, -1.0, 0.0, 1.0, 2.0]` will produce `[0.0,
 47 | -- 1.0, 2.0, -1.0, -2.0]`.  Use `radix_sort_int`@term and
 48 | -- `radix_sort_float`@term in the (likely) cases that this is not what
 49 | -- you want.
 50 | let radix_sort [n] 't (num_bits: i32) (get_bit: i32 -> t -> i32)
 51 |                       (xs: [n]t): [n]t =
 52 |   let iters = if n == 0 then 0 else (num_bits+2-1)/2
 53 |   in loop xs for i < iters do radix_sort_step xs get_bit (i*2)
 54 | 
 55 | let with_indices [n] 'a (xs: [n]a) : [n](a, i64) =
 56 |   zip xs (iota n)
 57 | 
 58 | local let by_key_wrapper [n] 't sorter key num_bits get_bit (xs: [n]t) : [n]t =
 59 |   map key xs
 60 |   |> with_indices
 61 |   |> sorter num_bits (\i (k, _) -> get_bit i k)
 62 |   |> map (\(_, i : i64) -> xs[i]) -- OK because '0<=i<n'.
 63 | 
 64 | -- | Like `radix_sort`, but sort based on key function.
 65 | let radix_sort_by_key [n] 't 'k
 66 |     (key: t -> k)
 67 |     (num_bits: i32) (get_bit: i32 -> k -> i32) (xs: [n]t): [n]t =
 68 |   by_key_wrapper radix_sort key num_bits get_bit xs
 69 | 
 70 | -- | A thin wrapper around `radix_sort`@term that ensures negative
 71 | -- integers are sorted as expected.  Simply pass the usual `num_bits`
 72 | -- and `get_bit` definitions from e.g. `i32`@term@"/futlib/math".
 73 | let radix_sort_int [n] 't (num_bits: i32) (get_bit: i32 -> t -> i32)
 74 |                           (xs: [n]t): [n]t =
 75 |   let get_bit' i x =
 76 |     -- Flip the most significant bit.
 77 |     let b = get_bit i x
 78 |     in if i == num_bits-1 then b ^ 1 else b
 79 |   in radix_sort num_bits get_bit' xs
 80 | 
 81 | -- | Like `radix_sort_int`, but sort based on key function.
 82 | let radix_sort_int_by_key [n] 't 'k
 83 |     (key: t -> k)
 84 |     (num_bits: i32) (get_bit: i32 -> k -> i32) (xs: [n]t): [n]t =
 85 |   by_key_wrapper radix_sort_int key num_bits get_bit xs
 86 | 
 87 | -- | A thin wrapper around `radix_sort`@term that ensures floats are
 88 | -- sorted as expected.  Simply pass the usual `num_bits` and `get_bit`
 89 | -- definitions from `f32`@term@"/futlib/math" and
 90 | -- `f64`@term@"/futlib/math".
 91 | let radix_sort_float [n] 't (num_bits: i32) (get_bit: i32 -> t -> i32)
 92 |                             (xs: [n]t): [n]t =
 93 |   let get_bit' i x =
 94 |     -- We flip the bit returned if:
 95 |     --
 96 |     -- 0) the most significant bit is set (this makes more negative
 97 |     --    numbers sort before less negative numbers), or
 98 |     --
 99 |     -- 1) we are asked for the most significant bit (this makes
100 |     --    negative numbers sort before positive numbers).
101 |     let b = get_bit i x
102 |     in if get_bit (num_bits-1) x == 1 || i == num_bits-1
103 |        then b ^ 1 else b
104 |   in radix_sort num_bits get_bit' xs
105 | 
106 | -- | Like `radix_sort_float`, but sort based on key function.
107 | let radix_sort_float_by_key [n] 't 'k
108 |     (key: t -> k)
109 |     (num_bits: i32) (get_bit: i32 -> k -> i32) (xs: [n]t): [n]t =
110 |   by_key_wrapper radix_sort_float key num_bits get_bit xs
111 | 


--------------------------------------------------------------------------------
/futhark/lib/github.com/diku-dk/sorts/radix_sort_tests.fut:
--------------------------------------------------------------------------------
 1 | -- | ignore
 2 | 
 3 | import "radix_sort"
 4 | 
 5 | -- ==
 6 | -- entry: sort_i32
 7 | -- input { [5,4,3,2,1,0,-1,-2] }
 8 | -- output { [-2,-1,0,1,2,3,4,5] }
 9 | -- input { [5,4,3,3,2,1,0,-1,-2,-1] }
10 | -- output { [-2,-1,-1,0,1,2,3,3,4,5] }
11 | 
12 | entry sort_i32 = radix_sort_int i32.num_bits i32.get_bit
13 | 
14 | -- ==
15 | -- entry: sort_u16
16 | -- input { [5u16,4u16,3u16,2u16,1u16,-1u16] }
17 | -- output { [-1u16,1u16,2u16,3u16,4u16,5u16] }
18 | 
19 | entry sort_u16 = radix_sort_int u16.num_bits u16.get_bit
20 | 
21 | -- ==
22 | -- entry: sort_f32
23 | -- input { [5f32,4f32,3f32,2f32,-1f32,-2f32] }
24 | -- output { [-2f32,-1f32,2f32,3f32,4f32,5f32] }
25 | 
26 | entry sort_f32 = radix_sort_float f32.num_bits f32.get_bit
27 | 
28 | -- ==
29 | -- entry: sort_perm_i32
30 | -- input { [5,4,3,2,1,0,-1,-2] }
31 | -- output { [7, 6, 5, 4, 3, 2, 1, 0] }
32 | 
33 | entry sort_perm_i32 [n] (xs: [n]i32) =
34 |   zip xs (iota n)
35 |   |> radix_sort_int_by_key (.0) i32.num_bits i32.get_bit
36 |   |> map ((.1) >-> i32.i64)
37 | 
38 | -- ==
39 | -- entry: sort_perm_f32
40 | -- input { [5f32,4f32,3f32,2f32,1f32,0f32,-1f32,-2f32] }
41 | -- output { [7, 6, 5, 4, 3, 2, 1, 0] }
42 | 
43 | entry sort_perm_f32 [n] (xs: [n]f32) =
44 |   zip xs (iota n)
45 |   |> radix_sort_float_by_key (.0) f32.num_bits f32.get_bit
46 |   |> map ((.1) >-> i32.i64)
47 | 


--------------------------------------------------------------------------------
/futhark/main.c:
--------------------------------------------------------------------------------
  1 | #include <getopt.h>
  2 | #include <stdio.h>
  3 | #include <assert.h>
  4 | #include <string.h>
  5 | #include <stdlib.h>
  6 | #include <sys/time.h>
  7 | 
  8 | #include "ray.h"
  9 | 
 10 | void ppm_to_file(const char *filename, int *pixels, int height, int width) {
 11 |   FILE *file = fopen(filename, "w");
 12 |   assert(file != NULL);
 13 | 
 14 |   fprintf(file, "P3\n%d %d\n255\n", width, height);
 15 | 
 16 |   for (int i = 0; i < height*width; i++) {
 17 |     fprintf(file, "%d %d %d\n",
 18 |             (pixels[i]>>16)&0xFF,
 19 |             (pixels[i]>>8)&0xFF,
 20 |             (pixels[i])&0xFF);
 21 |   }
 22 | 
 23 |   fclose(file);
 24 | }
 25 | 
 26 | int main(int argc, char** argv) {
 27 |   int height = 200;
 28 |   int width = 200;
 29 |   char *imgfile = NULL;
 30 |   char *scene_name = "rgbbox";
 31 |   int runs = 10;
 32 | 
 33 |   int opt;
 34 |   while ((opt = getopt(argc, argv, "m:n:f:s:r:")) != -1) {
 35 |     switch (opt) {
 36 |     case 'n':
 37 |       height = atoi(optarg);
 38 |       break;
 39 |     case 'm':
 40 |       width = atoi(optarg);
 41 |       break;
 42 |     case 'f':
 43 |       imgfile = optarg;
 44 |       break;
 45 |     case 's':
 46 |       scene_name = optarg;
 47 |       break;
 48 |     case 'r':
 49 |       runs = atoi(optarg);
 50 |       break;
 51 |     default: /* '?' */
 52 |       fprintf(stderr,
 53 |               "Usage: %s [-m height] [-n width] [-s scene] [-f FILE.ppm]\n",
 54 |               argv[0]);
 55 |       exit(EXIT_FAILURE);
 56 |     }
 57 |   }
 58 | 
 59 |   struct futhark_context_config *cfg = futhark_context_config_new();
 60 |   assert(cfg != NULL);
 61 |   struct futhark_context *ctx = futhark_context_new(cfg);
 62 |   assert(ctx != NULL);
 63 | 
 64 |   assert(futhark_context_get_error(ctx) == NULL);
 65 | 
 66 |   int ret;
 67 |   struct futhark_opaque_scene *scene;
 68 |   struct futhark_opaque_prepared_scene *prepared_scene = NULL;
 69 |   struct futhark_i32_2d *img = NULL;
 70 |   struct timeval t_start, t_end;
 71 | 
 72 |   if (strcmp(scene_name, "rgbbox") == 0) {
 73 |     ret = futhark_entry_rgbbox(ctx, &scene);
 74 |     assert(ret == 0);
 75 |   } else if (strcmp(scene_name, "irreg") == 0) {
 76 |     ret = futhark_entry_irreg(ctx, &scene);
 77 |     assert(ret == 0);
 78 |   } else {
 79 |     fprintf(stderr, "Unknown scene: %s\n", scene_name);
 80 |     fprintf(stderr, "Known scenes: rgbbox, irreg\n");
 81 |     exit(1);
 82 |   }
 83 |   assert(scene != NULL);
 84 | 
 85 |   printf("Using scene %s (-s to switch).\n", scene_name);
 86 |   printf("Timing over average of %d runs (-r to change).\n", runs);
 87 | 
 88 |   gettimeofday(&t_start, NULL);
 89 |   for (int i = 0; i < runs; i++) {
 90 |     if (prepared_scene != NULL) {
 91 |       futhark_free_opaque_prepared_scene(ctx, prepared_scene);
 92 |     }
 93 | 
 94 |     ret = futhark_entry_prepare_scene(ctx,
 95 |                                       &prepared_scene,
 96 |                                       height, width, scene);
 97 |     assert(ret == 0);
 98 |     ret = futhark_context_sync(ctx);
 99 |     assert(ret == 0);
100 |   }
101 |   gettimeofday(&t_end, NULL);
102 | 
103 |   printf("Scene BVH construction in %fs.\n",
104 |          ((t_end.tv_sec+t_end.tv_usec/1000000.0) -
105 |           (t_start.tv_sec+t_start.tv_usec/1000000.0))/runs);
106 | 
107 |   gettimeofday(&t_start, NULL);
108 |   for (int i = 0; i < runs; i++) {
109 |     if (img != NULL) {
110 |       futhark_free_i32_2d(ctx, img);
111 |     }
112 | 
113 |     ret = futhark_entry_render(ctx,
114 |                                &img,
115 |                                height, width, prepared_scene);
116 |     assert(ret == 0);
117 |     ret = futhark_context_sync(ctx);
118 |     assert(ret == 0);
119 |   }
120 |   gettimeofday(&t_end, NULL);
121 | 
122 |   printf("Rendering in %fs.\n",
123 |          ((t_end.tv_sec+t_end.tv_usec/1000000.0) -
124 |           (t_start.tv_sec+t_start.tv_usec/1000000.0))/runs);
125 | 
126 |   if (imgfile == NULL) {
127 |     printf("-f not passed, so not writing image to file\n");
128 |   } else {
129 |     int *img_host = malloc(sizeof(int) * height * width);
130 |     ret = futhark_values_i32_2d(ctx, img, img_host);
131 |     assert(ret == 0);
132 |     printf("Writing image to %s.\n", imgfile);
133 |     ppm_to_file(imgfile, img_host, height, width);
134 |     free(img_host);
135 |   }
136 | 
137 |   futhark_free_i32_2d(ctx, img);
138 |   futhark_free_opaque_prepared_scene(ctx, prepared_scene);
139 |   futhark_free_opaque_scene(ctx, scene);
140 |   futhark_context_free(ctx);
141 |   futhark_context_config_free(cfg);
142 | }
143 | 


--------------------------------------------------------------------------------
/futhark/prim.fut:
--------------------------------------------------------------------------------
 1 | type vec3 = {x: f32, y: f32, z: f32 }
 2 | 
 3 | -- A convenient alias so we don't have to indicate the fields all the
 4 | -- time.
 5 | let vec (x, y, z) : vec3 = {x,y,z}
 6 | 
 7 | let vf f (v1: vec3) (v2: vec3) =
 8 |   {x= f v1.x v2.x,
 9 |    y= f v1.y v2.y,
10 |    z= f v1.z v2.z}
11 | 
12 | let vec_add = vf (+)
13 | let vec_sub = vf (-)
14 | let vec_mul = vf (*)
15 | let vec_div = vf (/)
16 | 
17 | let scale s (v: vec3) =
18 |   { x=s*v.x
19 |   , y=s*v.y
20 |   , z=s*v.z }
21 | 
22 | let dot (v1: vec3) (v2: vec3) =
23 |   let v3 = vec_mul v1 v2
24 |   in v3.x + v3.y + v3.z
25 | 
26 | let norm v = f32.sqrt (dot v v)
27 | 
28 | let normalise v = scale (1.0 / norm v) v
29 | 
30 | let cross (v1: vec3) (v2: vec3) =
31 |   { x=v1.y*v2.z-v1.z*v2.y
32 |   , y=v1.z*v2.x-v1.x*v2.z
33 |   , z=v1.x*v2.y-v1.y*v2.x }
34 | 
35 | -- | Axis-aligned bounding box.
36 | type aabb = { min: vec3, max: vec3 }
37 | 
38 | let enclosing (box0: aabb) (box1: aabb) : aabb =
39 |   let small = vec(f32.min box0.min.x box1.min.x,
40 |                   f32.min box0.min.y box1.min.y,
41 |                   f32.min box0.min.z box1.min.z)
42 |   let big = vec(f32.max box0.max.x box1.max.x,
43 |                 f32.max box0.max.y box1.max.y,
44 |                 f32.max box0.max.z box1.max.z)
45 |   in {min = small, max = big}
46 | 
47 | let centre ({min, max}: aabb) =
48 |   {x=min.x + 0.5 * (max.x - min.x),
49 |    y=min.y + 0.5 * (max.y - min.y),
50 |    z=min.z + 0.5 * (max.z - min.z)}
51 | 


--------------------------------------------------------------------------------
/futhark/radixtree.fut:
--------------------------------------------------------------------------------
 1 | -- Based on "Maximizing Parallelism in the Construction of BVHs,
 2 | -- Octrees, and k-d Trees" by Tero Karras.
 3 | 
 4 | local let div_rounding_up x y : i32 = (x + y - 1) / y
 5 | 
 6 | type ptr = #leaf i32 | #inner i32
 7 | 
 8 | type inner = {left:ptr, right:ptr, parent: i32}
 9 | 
10 | -- | `L` must be sorted.
11 | let mk_radix_tree [n] (L: [n]u32) : []inner =
12 | 
13 |   let delta (i, j) = if j >= 0 && j < i32.i64 n
14 |                      then let Li = #[unsafe] L[i]
15 |                           let Lj = #[unsafe] L[j]
16 |                           -- Handle duplicates by using index as
17 |                           -- tiebreaker if necessary.
18 |                           in if Li == Lj
19 |                              then 32 + u32.clz (u32.i32 i ^ u32.i32 j)
20 |                              else u32.clz (Li ^ Lj)
21 |                      else -1
22 | 
23 |   let node (i: i64) =
24 |     let i = i32.i64 i
25 | 
26 |     -- Determine direction of range.
27 |     let d = i32.sgn (delta(i,i+1) - delta(i,i-1))
28 | 
29 |     -- Compute upper bound for the length of the range.
30 |     let delta_min = delta(i,i-d)
31 |     let l_max = loop l_max = 2
32 |                 while delta(i, i+l_max*d) > delta_min do
33 |                   l_max * 2
34 | 
35 |     -- Find the other end using binary search.
36 |     let (l, _) = loop (l, t) = (0, l_max/2)
37 |                  while t > 0 do
38 |                    if delta(i, i+(l+t)*d) > delta_min
39 |                    then (l + t, t/2)
40 |                    else (l, t/2)
41 |     let j = i + l * d
42 | 
43 |     -- Find the split position using binary search.
44 |     let delta_node = delta(i, j)
45 |     let (s, _) = loop (s, q) = (0, 1)
46 |                  while q <= l do
47 |                  let t = l `div_rounding_up` (q*2)
48 |                  in if delta(i, i+(s+t)*d) > delta_node
49 |                     then (s+t, q*2)
50 |                     else (s, q*2)
51 |     let gamma = i + s*d + i32.min d 0
52 | 
53 |     -- Output child pointers
54 |     let (left, set_left_parent) =
55 |       if i32.min i j == gamma
56 |       then (#leaf gamma, -1)
57 |       else (#inner gamma, gamma)
58 | 
59 |     let (right, set_right_parent) =
60 |       if i32.max i j == gamma + 1
61 |       then (#leaf (gamma+1), -1)
62 |       else (#inner (gamma+1), gamma+1)
63 | 
64 |     in ({left, right}, (i64.i32 set_left_parent, i), (i64.i32 set_right_parent, i))
65 | 
66 |   let (inners, parents_a, parents_b) = tabulate (n-1) node |> unzip3
67 |   let k = (n-1)*2
68 |   let parents = scatter (replicate (n-1) (-1))
69 |                         (map (.0) parents_a ++ map (.0) parents_b :> [k]i64)
70 |                         (map (.1) parents_a ++ map (.1) parents_b :> [k]i32)
71 | 
72 |   in map2 (\{left, right} parent -> {left, right, parent}) inners parents
73 | 


--------------------------------------------------------------------------------
/futhark/ray.fut:
--------------------------------------------------------------------------------
  1 | import "prim"
  2 | 
  3 | let scene_epsilon:f32 = 0.1
  4 | type pos = vec3
  5 | type dir = vec3
  6 | type colour = vec3
  7 | 
  8 | let black : vec3 = {x=0.0, y=0.0, z=0.0}
  9 | let white : vec3 = {x=1.0, y=1.0, z=1.0}
 10 | 
 11 | type ray = {origin: vec3,
 12 |             dir: vec3}
 13 | 
 14 | let point_at_param (ray: ray) t =
 15 |   ray.origin `vec_add` scale t ray.dir
 16 | 
 17 | type hit = { t: f32
 18 |            , p: pos
 19 |            , normal: dir
 20 |            , colour: colour }
 21 | 
 22 | type sphere = { pos: pos
 23 |               , colour: colour
 24 |               , radius: f32 }
 25 | 
 26 | type opt 'a = #some a | #none
 27 | 
 28 | let sphere_aabb (s: sphere) : aabb =
 29 |   { min = s.pos `vec_sub` {x=s.radius, y=s.radius, z=s.radius}
 30 |   , max = s.pos `vec_add` {x=s.radius, y=s.radius, z=s.radius}}
 31 | 
 32 | let sphere_hit (s: sphere) r t_min t_max : opt hit =
 33 |   let oc = vec_sub r.origin s.pos
 34 |   let a = dot r.dir r.dir
 35 |   let b = dot oc r.dir
 36 |   let c = dot oc oc - s.radius*s.radius
 37 |   let discriminant = b*b - a*c
 38 |   let f temp =
 39 |     if temp < t_max && temp > t_min
 40 |     then #some { t = temp
 41 |                , p = point_at_param r temp
 42 |                , normal = scale (1.0/s.radius)
 43 |                                 (point_at_param r temp `vec_sub` s.pos)
 44 |                , colour = s.colour
 45 |                }
 46 |                else #none
 47 |   in if discriminant <= 0.0
 48 |      then #none
 49 |      else match f ((-b - f32.sqrt(b*b-a*c))/a)
 50 |           case #some hit -> #some hit
 51 |           case #none -> f ((-b + f32.sqrt(b*b-a*c))/a)
 52 | 
 53 | let aabb_hit (aabb: aabb) (r: ray) tmin0 tmax0 =
 54 |   let iter min' max' origin' dir' tmin' tmax' =
 55 |     let invD = 1.0 / dir'
 56 |     let t0 = (min' - origin') * invD
 57 |     let t1 = (max' - origin') * invD
 58 |     let (t0', t1') = if invD < 0.0 then (t1, t0) else (t0, t1)
 59 |     let tmin'' = f32.max t0' tmin'
 60 |     let tmax'' = f32.min t1' tmax'
 61 |     in (tmin'', tmax'')
 62 |   let (tmin1, tmax1) =
 63 |     iter aabb.min.x aabb.max.x r.origin.x r.dir.x tmin0 tmax0
 64 |   in if tmax1 <= tmin1 then false
 65 |      else let (tmin2, tmax2) =
 66 |             iter aabb.min.y aabb.max.y r.origin.y r.dir.y tmin1 tmax1
 67 |           in if tmax2 <= tmin2 then false
 68 |              else let (tmin3, tmax3) =
 69 |                     iter aabb.min.z aabb.max.z r.origin.z r.dir.z tmin2 tmax2
 70 |                   in !(tmax3 <= tmin3)
 71 | 
 72 | import "bvh"
 73 | 
 74 | type~ objs = bvh [] sphere
 75 | 
 76 | let objs_hit (bvh: objs) (r: ray) (t_min: f32) (t_max: f32) : opt hit =
 77 |   let contains aabb = aabb_hit aabb r t_min t_max
 78 |   let closest_hit (j, t_max) i s =
 79 |     match sphere_hit s r scene_epsilon t_max
 80 |     case #none -> (j, t_max)
 81 |     case #some h -> (i, h.t)
 82 |   let (j, t_max) = bvh_fold contains closest_hit (-1, t_max) bvh
 83 |   in if j >= 0
 84 |      then let s = #[unsafe] bvh.L[j]
 85 |           in sphere_hit s r t_min (t_max+1)
 86 |      else #none
 87 | 
 88 | type camera = { origin: pos
 89 |               , llc: pos
 90 |               , horizontal: dir
 91 |               , vertical: dir }
 92 | 
 93 | let camera lookfrom lookat vup vfov aspect =
 94 |   let theta = vfov * f32.pi / 180.0
 95 |   let half_height = f32.tan (theta / 2.0)
 96 |   let half_width = aspect * half_height
 97 |   let origin = lookfrom
 98 |   let w = normalise (lookfrom `vec_sub` lookat)
 99 |   let u = normalise (cross vup w)
100 |   let v = cross w u
101 |   in { origin = lookfrom
102 |      , llc = origin `vec_sub` scale half_width u
103 |                     `vec_sub` scale half_height v
104 |                     `vec_sub` w
105 |      , horizontal = scale (2.0*half_width) u
106 |      , vertical = scale (2.0*half_height) v
107 |      }
108 | 
109 | let get_ray (cam: camera) s t : ray =
110 |   { origin = cam.origin
111 |   , dir = cam.llc `vec_add` scale s cam.horizontal
112 |                   `vec_add` scale t cam.vertical
113 |                   `vec_sub` cam.origin
114 |   }
115 | 
116 | let reflect v n =
117 |   v `vec_sub` scale (2.0 * dot v n) n
118 | 
119 | let scatter (r: ray) (hit: hit) : opt (ray, colour) =
120 |   let reflected = reflect (normalise r.dir) hit.normal
121 |   let scattered = {origin = hit.p, dir = reflected}
122 |   in if dot scattered.dir hit.normal > 0.0
123 |      then #some (scattered, hit.colour)
124 |      else #none
125 | 
126 | let ray_colour objs r (max_depth: i32) =
127 |   (.3) <|
128 |   loop (r, depth, light, colour) = (r, 0, vec(1,1,1), vec(0,0,0))
129 |   while depth < max_depth do
130 |     match objs_hit objs r 0.000 1000000000.0
131 |     case #some hit ->
132 |       (match scatter r hit
133 |        case #some (scattered, attenuation) ->
134 |          (scattered, depth + 1,
135 |           light `vec_mul` attenuation,
136 |           light `vec_mul` colour)
137 |        case #none ->
138 |          (r, max_depth,
139 |           light,
140 |           light `vec_mul` colour))
141 |     case #none ->
142 |       let unit_dir = normalise r.dir
143 |       let t = 0.5 * (unit_dir.y + 1.0)
144 |       let bg = {x=0.5, y=0.7, z=1.0}
145 |       in (r, max_depth,
146 |           light,
147 |           light `vec_mul`
148 |           (scale (1.0-t) white `vec_add` scale t bg))
149 | 
150 | let trace_ray objs width height cam j i : colour =
151 |   let u = f32.i64 i / f32.i64 width
152 |   let v = f32.i64 j / f32.i64 height
153 |   let ray = get_ray cam u v
154 |   in ray_colour objs ray 50
155 | 
156 | type pixel = i32
157 | 
158 | let colour_to_pixel (p: colour) : pixel =
159 |   let ir = i32.f32 (255.99 * p.x)
160 |   let ig = i32.f32 (255.99 * p.y)
161 |   let ib = i32.f32 (255.99 * p.z)
162 |   in (ir << 16) | (ig << 8) | ib
163 | 
164 | type image [h][w] = [h][w]pixel
165 | 
166 | let render_image objs width height cam : image [height][width] =
167 |   let pixel j i =
168 |     colour_to_pixel (trace_ray objs width height cam (height-j) i)
169 |   in tabulate_2d height width pixel
170 | 
171 | type~ scene = { look_from: pos
172 |               , look_at: pos
173 |               , fov: f32
174 |               , spheres: []sphere }
175 | 
176 | entry rgbbox : scene =
177 |   let n = 10
178 |   let k = 60.0
179 | 
180 |   let leftwall =
181 |     flatten <|
182 |     tabulate_2d n n (\y z ->
183 |                        { pos={x=(-k/2.0),
184 |                               y=(-k/2.0 + (k/f32.i64 n) * f32.i64 y),
185 |                               z=(-k/2.0 + (k/f32.i64 n) * f32.i64 z)}
186 |                        , colour={x=1.0, y=0.0, z=0.0}
187 |                        , radius = (k/(f32.i64 n*2.0))})
188 | 
189 |   let midwall =
190 |     flatten <|
191 |     tabulate_2d n n (\x y ->
192 |                        { pos={x=(-k/2.0 + (k/f32.i64 n) * f32.i64 x),
193 |                               y=(-k/2.0 + (k/f32.i64 n) * f32.i64 y),
194 |                               z=(-k/2.0)}
195 |                        , colour={x=1.0, y=1.0, z=0.0}
196 |                        , radius = (k/(f32.i64 n*2.0))})
197 | 
198 |   let rightwall =
199 |     flatten <|
200 |     tabulate_2d n n (\y z ->
201 |                        { pos={x=(k/2.0),
202 |                               y=(-k/2.0 + (k/f32.i64 n) * f32.i64 y),
203 |                               z=(-k/2.0 + (k/f32.i64 n) * f32.i64 z)}
204 |                        , colour={x=0.0, y=0.0, z=1.0}
205 |                        , radius = (k/(f32.i64 n*2.0))})
206 | 
207 | 
208 |   let bottom =
209 |     flatten <|
210 |     tabulate_2d n n (\x z ->
211 |                        { pos={x=(-k/2.0 + (k/f32.i64 n) * f32.i64 x),
212 |                               y=(-k/2.0),
213 |                               z=(-k/2.0 + (k/f32.i64 n) * f32.i64 z)}
214 |                        , colour={x=1.0, y=1.0, z=1.0}
215 |                        , radius = (k/(f32.i64 n*2.0))})
216 | 
217 | 
218 |   in { spheres = leftwall ++ midwall ++ rightwall ++ bottom
219 |      , look_from = {x=0.0, y=30.0, z=30.0}
220 |      , look_at = {x=0.0, y= -1.0, z= -1.0}
221 |      , fov = 75.0 }
222 | 
223 | entry irreg : scene =
224 |     let n = 100
225 |     let k = 600.0
226 |     let bottom =
227 |       flatten <|
228 |       tabulate_2d n n (\x z ->
229 |                          { pos={x=(-k/2.0 + (k/f32.i64 n) * f32.i64 x),
230 |                                 y=0.0,
231 |                                 z=(-k/2.0 + (k/f32.i64 n) * f32.i64 z)}
232 |                          , colour = white
233 |                          , radius = k/(f32.i64 n * 2.0)})
234 |     in { spheres = bottom
235 |        , look_from = {x=0.0, y=12.0, z=30.0}
236 |        , look_at = {x=0.0, y=10.0, z= -1.0}
237 |        , fov = 75.0 }
238 | 
239 | type~ prepared_scene = {objs:objs, cam:camera}
240 | 
241 | entry prepare_scene h w (scene: scene) : prepared_scene =
242 |   {objs=bvh_mk sphere_aabb scene.spheres,
243 |    cam=camera scene.look_from scene.look_at {x=0.0, y=1.0, z=0.0}
244 |               scene.fov (f32.i64 w/f32.i64 h)}
245 | 
246 | entry render h w ({objs, cam}: prepared_scene) =
247 |   render_image objs w h cam
248 | 


--------------------------------------------------------------------------------
/futhark/shell.nix:
--------------------------------------------------------------------------------
 1 | with import (builtins.fetchTarball {
 2 |   url = "https://github.com/NixOS/nixpkgs/archive/820177eded91f3908cfc72dfee00e831ea3d0060.zip";
 3 |   sha256 = "1yqx5zy154f8057inwjp2ranizgilvpblqq31cy7nryrwj2apics";
 4 | }) {};
 5 | stdenvNoCC.mkDerivation {
 6 |   name = "ray";
 7 |   CC = "clang";
 8 |   buildInputs = [ clang futhark ];
 9 | }
10 | 


--------------------------------------------------------------------------------
/haskell/.gitignore:
--------------------------------------------------------------------------------
1 | dist-newstyle
2 | 


--------------------------------------------------------------------------------
/haskell/BVH.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE StrictData #-}
 2 | module BVH
 3 |   ( AABB(..)
 4 |   , BVH(..)
 5 |   , mkBVH
 6 |   )
 7 | where
 8 | 
 9 | import Control.DeepSeq
10 | import Control.Monad.Par
11 | import Data.Function
12 | import Data.List
13 | import Vec3
14 | 
15 | data AABB = AABB { aabbMin :: Vec3
16 |                  , aabbMax :: Vec3
17 |                  }
18 | 
19 | instance NFData AABB where
20 |   rnf (AABB minv maxv) = minv `deepseq` maxv `deepseq` ()
21 | 
22 | surroundingBox :: AABB -> AABB -> AABB
23 | surroundingBox box0 box1 =
24 |   let small = Vec3
25 |               (vecX (aabbMin box0) `min` vecX (aabbMin box1))
26 |               (vecY (aabbMin box0) `min` vecY (aabbMin box1))
27 |               (vecZ (aabbMin box0) `min` vecZ (aabbMin box1))
28 |       big = Vec3
29 |             (vecX (aabbMax box0) `max` vecX (aabbMax box1))
30 |             (vecY (aabbMax box0) `max` vecY (aabbMax box1))
31 |             (vecZ (aabbMax box0) `max` vecZ (aabbMax box1))
32 |   in AABB small big
33 | 
34 | aabbCentre :: AABB -> Vec3
35 | aabbCentre aabb =
36 |   Vec3
37 |   (vecX (aabbMin aabb) + 0.5 * (vecX (aabbMax aabb) - vecX (aabbMin aabb)))
38 |   (vecY (aabbMin aabb) + 0.5 * (vecY (aabbMax aabb) - vecY (aabbMin aabb)))
39 |   (vecZ (aabbMin aabb) + 0.5 * (vecZ (aabbMax aabb) - vecZ (aabbMin aabb)))
40 | 
41 | data BVH a = BVHLeaf AABB a
42 |            | BVHSplit AABB (BVH a) (BVH a)
43 | 
44 | instance NFData (BVH a) where
45 |   rnf (BVHLeaf box _) = box `deepseq` ()
46 |   rnf (BVHSplit box x y) = box `deepseq` x `deepseq` y `deepseq` ()
47 | 
48 | bvhAABB :: BVH a -> AABB
49 | bvhAABB (BVHLeaf box _) = box
50 | bvhAABB (BVHSplit box _ _) = box
51 | 
52 | mkBVH :: (a -> AABB) -> [a] -> BVH a
53 | mkBVH f all_objs = runPar $ mkBVH' (0::Int) (length all_objs) all_objs
54 |   where mkBVH' _ _ [] = error "mkBVH: empty no nodes"
55 |         mkBVH' _ _ [x] = return $ BVHLeaf (f x) x
56 |         mkBVH' d n xs = do
57 |           let n2 = n `div` 2
58 |               d1 = d + 1
59 |               (xs_left, xs_right) =
60 |                 splitAt n2 $ sortBy (compare `on` comparison) xs
61 |               left = mkBVH' d1 n2 xs_left
62 |               right = mkBVH' d1 (n - n2) xs_right
63 |           (left', right') <-
64 |             if n < 100
65 |             then (,) <$> left <*> right
66 |             else do left' <- spawn left
67 |                     right' <- spawn right
68 |                     (,) <$> get left' <*> get right'
69 |           let box = bvhAABB left' `surroundingBox` bvhAABB right'
70 |           return $ BVHSplit box left' right'
71 |           where axis = case d `mod` 3 of
72 |                          0 -> vecX
73 |                          1 -> vecY
74 |                          _ -> vecZ
75 |                 comparison = axis . aabbCentre . f
76 | 


--------------------------------------------------------------------------------
/haskell/Image.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveGeneric, DeriveAnyClass, DataKinds #-}
 2 | module Image (mkImage, writeImage) where
 3 | 
 4 | import Data.Massiv.Array as A
 5 | import Data.Massiv.Array.IO
 6 | 
 7 | mkImage :: Int -> Int -> (Int -> Int -> Pixel (SRGB 'NonLinear) Word8) -> Image S (SRGB 'NonLinear) Word8
 8 | mkImage height width pixel =
 9 |   makeArrayR S Par (Sz2 height width) (\(i :. j) -> pixel i j)
10 | 


--------------------------------------------------------------------------------
/haskell/Makefile:
--------------------------------------------------------------------------------
 1 | all: rgbbox_1000.ppm irreg_1000.ppm
 2 | 
 3 | rgbbox_1000.ppm:
 4 | 	cabal run ray -- rgbbox 1000 1000
 5 | 	mv rgbbox.png $@
 6 | 
 7 | irreg_1000.ppm:
 8 | 	cabal run ray -- irreg 1000 1000
 9 | 	mv irreg.png $@
10 | 
11 | bench:
12 | 	cabal bench
13 | 
14 | .PHONY: clean
15 | 
16 | clean:
17 | 	cabal clean
18 | 	rm -f *.ppm
19 | 


--------------------------------------------------------------------------------
/haskell/README.md:
--------------------------------------------------------------------------------
 1 | # Haskell implementation
 2 | 
 3 | ## TL;DR
 4 | 
 5 | ```
 6 | $ cabal bench --benchmark-options="+RTS -N8"
 7 | ```
 8 | 
 9 | Replace `8` with how many physical threads you want to use.
10 | 
11 | ## Details
12 | 
13 | This program makes use of the cabal-install build tool (the command
14 | line program is just called `cabal`).  You will need at least `cabal`
15 | 3.0.0.0.
16 | 
17 | There are two programs here: `ray`, which is used for generating
18 | images, and `bench-ray`, which is a benchmarking harness that makes
19 | use of the excellent `criterion` library.  First you must run the
20 | following command:
21 | 
22 | ```
23 | cabal update
24 | ```
25 | 
26 | Then you can build both by running
27 | 
28 | ```
29 | cabal build --enable-benchmarks
30 | ```
31 | 
32 | For running the programs, use `cabal run`.  For example, to generate
33 | a 400x400 image of the "rgbbox" scene as `rgbbox.png`:
34 | 
35 | ```
36 | cabal run -v0 ray -- rgbbox 400 400
37 | ```
38 | 
39 | For running the benchmarks:
40 | 
41 | ```
42 | cabal run -v0 bench-ray --
43 | ```
44 | 
45 | You can also specify only a subset of benchmarks to run; for example
46 | to run only the BVH calculation benchmarks:
47 | 
48 | ```
49 | cabal run -v0 bench-ray bvh --
50 | ```
51 | 
52 | ## Running with multiple threads
53 | 
54 | To run with multiple threads, place `+RTS -Nx` at the end of the
55 | command line, where *x* is the number of threads to use, e.g:
56 | 
57 | ```
58 | cabal run -v0 bench-ray bvh -- +RTS -N4
59 | ```
60 | 
61 | It is **crucial** that the `RTS` options go after `--`, as otherwise
62 | they will be consumed by the `cabal` executable itself.
63 | 


--------------------------------------------------------------------------------
/haskell/Raytracing.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE Strict #-}
  2 | {-# LANGUAGE StrictData #-}
  3 | {-# LANGUAGE DataKinds #-}
  4 | module Raytracing
  5 |   ( Pos, Dir, Camera, mkCamera
  6 |   , Sphere(..), sphereAABB
  7 |   , Objs
  8 |   , Colour, black, white
  9 |   , render
 10 |   )
 11 | where
 12 | 
 13 | import Data.Maybe (fromMaybe)
 14 | import BVH
 15 | import Image
 16 | import Vec3
 17 | import Data.Massiv.Array (S)
 18 | import Data.Massiv.Array.IO
 19 | 
 20 | type Pos = Vec3
 21 | type Dir = Vec3
 22 | 
 23 | type Colour = Vec3
 24 | black, white :: Colour
 25 | black = 0
 26 | white = 1
 27 | 
 28 | data Ray = Ray { rayOrigin :: Pos
 29 |                , rayDir :: Dir }
 30 |            deriving (Show)
 31 | 
 32 | pointAtParam :: Ray -> Float -> Pos
 33 | pointAtParam ray t = rayOrigin ray + (t `scale` rayDir ray)
 34 | 
 35 | data Hit = Hit { hitT :: Float
 36 |                , hitP :: Pos
 37 |                , hitNormal :: Dir
 38 |                , hitColour :: Colour
 39 |                }
 40 | 
 41 | data Sphere = Sphere { spherePos :: Pos
 42 |                      , sphereColour :: Colour
 43 |                      , sphereRadius :: Float }
 44 |             deriving (Show, Read)
 45 | 
 46 | sphereAABB :: Sphere -> AABB
 47 | sphereAABB (Sphere centre _ radius) =
 48 |   AABB
 49 |   (centre - Vec3 radius radius radius)
 50 |   (centre + Vec3 radius radius radius)
 51 | 
 52 | sphereHit :: Sphere -> Ray -> Float -> Float -> Maybe Hit
 53 | sphereHit (Sphere center colour radius) r t_min t_max =
 54 |   let oc = rayOrigin r - center
 55 |       a = dot (rayDir r) (rayDir r)
 56 |       b = dot oc (rayDir r)
 57 |       c = dot oc oc - radius*radius
 58 |       discriminant = b*b - a*c
 59 |       sqrtDisc = sqrt discriminant
 60 |       tryHit temp =
 61 |         if temp < t_max && temp > t_min
 62 |         then Just $ Hit
 63 |              { hitT = temp
 64 |              , hitP = pointAtParam r temp
 65 |              , hitNormal = (1/radius) `scale` (pointAtParam r temp - center)
 66 |              , hitColour = colour
 67 |              }
 68 |         else Nothing
 69 |   in if discriminant <= 0
 70 |      then Nothing
 71 |      else case tryHit ((-b - sqrtDisc)/a) of
 72 |             Just hit -> Just hit
 73 |             Nothing -> tryHit ((-b + sqrtDisc)/a)
 74 | 
 75 | type Objs = BVH Sphere
 76 | 
 77 | aabbHit :: AABB -> Ray -> Float -> Float -> Bool
 78 | aabbHit aabb (Ray origin direction) tmin0 tmax0 =
 79 |   let iter min' max' origin' dir' tmin' tmax' =
 80 |         let invD = 1 / dir'
 81 |             t0 = (min' - origin') * invD
 82 |             t1 = (max' - origin') * invD
 83 |             (t0', t1') = if invD < 0 then (t1, t0) else (t0, t1)
 84 |             tmin'' = max t0' tmin'
 85 |             tmax'' = min t1' tmax'
 86 |         in (tmin'', tmax'')
 87 |       (tmin1, tmax1) =
 88 |         iter
 89 |         (vecX (aabbMin aabb)) (vecX (aabbMax aabb))
 90 |         (vecX origin) (vecX direction)
 91 |         tmin0 tmax0
 92 |   in not $
 93 |      tmax1 <= tmin1 ||
 94 |      let (tmin2, tmax2) =
 95 |            iter (vecY (aabbMin aabb)) (vecY (aabbMax aabb))
 96 |            (vecY origin) (vecY direction)
 97 |            tmin1 tmax1
 98 |      in tmax2 <= tmin2 ||
 99 |         let (tmin3, tmax3) =
100 |               iter (vecZ (aabbMin aabb)) (vecZ (aabbMax aabb))
101 |               (vecZ origin) (vecZ direction)
102 |               tmin2 tmax2
103 |         in tmax3 <= tmin3
104 | 
105 | objsHit :: Objs -> Ray -> Float -> Float -> Maybe Hit
106 | objsHit (BVHLeaf _ sphere) r t_min t_max =
107 |   sphereHit sphere r t_min t_max
108 | objsHit (BVHSplit box left right) r t_min t_max
109 |   | not $ aabbHit box r t_min t_max =
110 |       Nothing
111 |   | otherwise =
112 |       case objsHit left r t_min t_max of
113 |         Nothing -> objsHit right r t_min t_max
114 |         Just h1 ->
115 |           Just $ fromMaybe h1 $ objsHit right r t_min (hitT h1)
116 | 
117 | data Camera = Camera { camOrigin :: Pos
118 |                      , camLLC :: Pos
119 |                      , camHorizontal :: Dir
120 |                      , camVertical :: Dir
121 |                      }
122 |               deriving (Show, Read)
123 | 
124 | mkCamera :: Pos -> Pos -> Dir -> Float -> Float -> Camera
125 | mkCamera lookfrom lookat vup vfov aspect =
126 |   let theta = vfov * pi / 180
127 |       half_height = tan (theta / 2)
128 |       half_width = aspect * half_height
129 |       origin = lookfrom
130 |       w = normalise (lookfrom - lookat)
131 |       u = normalise (cross vup w)
132 |       v = cross w u
133 |   in Camera { camOrigin = lookfrom
134 |             , camLLC = origin -
135 |                        (half_width `scale` u) -
136 |                        (half_height `scale` v) -
137 |                        w
138 |             , camHorizontal = (2*half_width) `scale` u
139 |             , camVertical = (2*half_height) `scale` v
140 |             }
141 | 
142 | getRay :: Camera -> Float -> Float -> Ray
143 | getRay cam s t =
144 |   Ray
145 |   (camOrigin cam)
146 |   (camLLC cam +
147 |    (s `scale` camHorizontal cam) +
148 |    (t `scale` camVertical cam) -
149 |    camOrigin cam)
150 | 
151 | reflect :: Vec3 -> Vec3 -> Vec3
152 | reflect v n = v - (2 * dot v n) `scale` n
153 | 
154 | scatter :: Ray -> Hit -> Maybe (Ray, Vec3)
155 | scatter r hit =
156 |   let reflected =
157 |         reflect (normalise (rayDir r)) (hitNormal hit)
158 |       scattered = Ray (hitP hit) reflected
159 |   in if dot (rayDir scattered) (hitNormal hit) > 0
160 |      then Just (scattered, hitColour hit)
161 |      else Nothing
162 | 
163 | rayColour :: Objs -> Ray -> Int -> Colour
164 | rayColour objs r depth =
165 |   case objsHit objs r 0.001 (1/0) of
166 |     Just hit ->
167 |       case scatter r hit of
168 |         Just (scattered, attenuation)
169 |           | depth < 50 ->
170 |               attenuation * (rayColour objs scattered (depth+1))
171 |         _ -> black
172 |     Nothing ->
173 |       let unit_dir = normalise (rayDir r)
174 |           t = 0.5 * (vecY unit_dir + 1)
175 |       in ((1-t) `scale` Vec3 1 1 1) + (t `scale` Vec3 0.5 0.7 1)
176 | 
177 | traceRay :: Objs -> Int -> Int -> Camera -> Int -> Int -> Colour
178 | traceRay objs width height cam =
179 |   \j i -> let u = fromIntegral i / fromIntegral width
180 |               v = fromIntegral j / fromIntegral height
181 |               ray = getRay cam u v
182 |           in rayColour objs ray 0
183 | 
184 | colourToPixel :: Colour -> Pixel (SRGB 'NonLinear) Word8
185 | colourToPixel (Vec3 r g b) = toPixel8 $ PixelSRGB r g b
186 | 
187 | render :: Objs -> Int -> Int -> Camera -> Image S (SRGB 'NonLinear) Word8
188 | render objs height width cam =
189 |   mkImage height width $ \j i ->
190 |   colourToPixel $ traceRay objs width height cam (height-j) i
191 | 


--------------------------------------------------------------------------------
/haskell/Scene.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE StrictData #-}
 2 | module Scene (Scene(..), fromScene,
 3 |               rgbbox, irreg) where
 4 | 
 5 | import BVH
 6 | import Raytracing
 7 | import Vec3
 8 | 
 9 | data Scene = Scene { sceneCamLookFrom :: Pos
10 |                    , sceneCamLookAt :: Pos
11 |                    , sceneCamFov :: Float
12 |                    , sceneSpheres :: [Sphere]
13 |                    }
14 |            deriving (Show, Read)
15 | 
16 | fromScene :: Int -> Int -> Scene -> (Objs, Camera)
17 | fromScene width height scene =
18 |   (mkBVH sphereAABB $ sceneSpheres scene,
19 |    mkCamera (sceneCamLookFrom scene) (sceneCamLookAt scene) (Vec3 0 1 0)
20 |    (sceneCamFov scene) (fromIntegral width/fromIntegral height))
21 | 
22 | rgbbox :: Scene
23 | rgbbox = Scene { sceneSpheres = leftwall ++ midwall ++ rightwall ++ bottom
24 |                , sceneCamLookFrom = Vec3 0 30 30
25 |                , sceneCamLookAt = Vec3 0 (-1) (-1)
26 |                , sceneCamFov = 75 }
27 |   where n = 10
28 |         k = 60
29 |         k2 = -k / 2
30 |         kn = k / n
31 |         kn2 = kn / 2
32 |         leftwall =
33 |           [ Sphere (Vec3
34 |                      k2
35 |                     (k2 + kn * y)
36 |                     (k2 + kn * z))
37 |             (Vec3 1 0 0) kn2
38 |           | y <- [0..n-1], z <- [0..n-1]
39 |           ]
40 |         midwall =
41 |           [ Sphere (Vec3
42 |                      (k2 + kn * x)
43 |                      (k2 + kn * y)
44 |                       k2)
45 |             (Vec3 0 1 0) kn2
46 |           | x <- [0..n-1], y <- [0..n-1]
47 |           ]
48 |         rightwall =
49 |           [ Sphere (Vec3
50 |                      (-k2)
51 |                      (k2 + kn * y)
52 |                      (k2 + kn * z))
53 |             (Vec3 0 0 1) kn2
54 |           | y <- [0..n-1], z <- [0..n-1]
55 |           ]
56 |         bottom =
57 |           [ Sphere (Vec3
58 |                      (k2 + kn * x)
59 |                       k2
60 |                      (k2 + kn * z))
61 |             (Vec3 1 1 1) kn2
62 |           | x <- [0..n-1], z <- [0..n-1]
63 |           ]
64 | 
65 | irreg :: Scene
66 | irreg = Scene { sceneSpheres = bottom
67 |               , sceneCamLookFrom = Vec3 0 12 30
68 |               , sceneCamLookAt = Vec3 0 10 (-1)
69 |               , sceneCamFov = 75 }
70 |   where n = 100
71 |         k = 600
72 |         k2 = -k / 2
73 |         kn = k / n
74 |         kn2 = kn / 2
75 |         bottom =
76 |           [ Sphere (Vec3
77 |                      (k2 + kn * x)
78 |                      0
79 |                      (k2 + kn * z))
80 |             (Vec3 1 1 1) kn2
81 |           | x <- [0..n-1], z <- [0..n-1]
82 |           ]
83 | 


--------------------------------------------------------------------------------
/haskell/Vec3.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE Strict #-}
 2 | {-# LANGUAGE StrictData #-}
 3 | module Vec3 (Vec3(..), scale, norm, normalise, cross, dot) where
 4 | 
 5 | import Control.DeepSeq
 6 | 
 7 | data Vec3 = Vec3 {vecX :: Float, vecY :: Float, vecZ :: Float}
 8 |   deriving (Eq, Ord, Show, Read)
 9 | 
10 | instance NFData Vec3 where
11 |   rnf (Vec3 x y z) = x `deepseq` y `deepseq` z `deepseq` ()
12 | 
13 | instance Num Vec3 where
14 |   Vec3 x1 y1 z1 + Vec3 x2 y2 z2 = Vec3 (x1+x2) (y1+y2) (z1+z2)
15 |   Vec3 x1 y1 z1 - Vec3 x2 y2 z2 = Vec3 (x1-x2) (y1-y2) (z1-z2)
16 |   Vec3 x1 y1 z1 * Vec3 x2 y2 z2 = Vec3 (x1*x2) (y1*y2) (z1*z2)
17 |   negate (Vec3 x y z) = Vec3 (negate x) (negate y) (negate z)
18 |   abs (Vec3 x y z) = Vec3 (abs x) (abs y) (abs z)
19 |   signum (Vec3 x y z) = Vec3 (signum x) (signum y) (signum z)
20 |   fromInteger a = Vec3 (fromInteger a) (fromInteger a) (fromInteger a)
21 | 
22 | instance Fractional Vec3 where
23 |   recip (Vec3 x y z) = Vec3 (recip x) (recip y) (recip z)
24 |   fromRational a = Vec3 (fromRational a) (fromRational a) (fromRational a)
25 | 
26 | scale :: Float -> Vec3 -> Vec3
27 | scale a (Vec3 x y z) = Vec3 (a*x) (a*y) (a*z)
28 | 
29 | dot :: Vec3 -> Vec3 -> Float
30 | dot (Vec3 x1 y1 z1) (Vec3 x2 y2 z2) =
31 |   x1*x2 + y1*y2 + z1*z2
32 | 
33 | norm :: Vec3 -> Float
34 | norm v = sqrt (dot v v)
35 | 
36 | normalise :: Vec3 -> Vec3
37 | normalise v = (1 / norm v) `scale` v
38 | 
39 | cross :: Vec3 -> Vec3 -> Vec3
40 | cross (Vec3 x1 y1 z1) (Vec3 x2 y2 z2) =
41 |   Vec3 (y1*z2-z1*y2) (z1*x2-x1*z2) (x1*y2-y1*x2)
42 | 


--------------------------------------------------------------------------------
/haskell/bench-ray.hs:
--------------------------------------------------------------------------------
 1 | module Main (main) where
 2 | 
 3 | import Criterion.Main
 4 | import Raytracing
 5 | import Scene
 6 | 
 7 | main :: IO ()
 8 | main = defaultMain
 9 |   [ bgroup "bvh"
10 |     [ bench "rgbbox" $ bvhbench rgbbox
11 |     , bench "irreg" $ bvhbench irreg
12 |     ]
13 | 
14 |   , bgroup "rendering"
15 |     [bgroup "rgbbox" $
16 |      [ bench "10x10" $ renderbench rgbbox 10 10
17 |      , bench "100x100" $ renderbench rgbbox 100 100
18 |      , bench "200x200" $ renderbench rgbbox 200 200
19 |      , bench "1000x1000" $ renderbench rgbbox 1000 1000
20 |      ]
21 |     , bgroup "irreg" $
22 |       [ bench "10x10" $ renderbench irreg 10 10
23 |       , bench "100x100" $ renderbench irreg 100 100
24 |       , bench "200x200" $ renderbench irreg 200 200
25 |       , bench "1000x1000" $ renderbench irreg 1000 1000
26 |       ]
27 |     ]
28 |   ]
29 |   where bvhbench scene =
30 |           nf (fst . fromScene 1 1) scene
31 | 
32 |         renderbench scene width height =
33 |           let (objs, cam) = fromScene width height scene
34 |           in nf (render objs width height) cam
35 | 


--------------------------------------------------------------------------------
/haskell/cabal.project:
--------------------------------------------------------------------------------
1 | packages: .
2 | index-state: 2022-05-15T08:27:37Z
3 | 


--------------------------------------------------------------------------------
/haskell/ray.cabal:
--------------------------------------------------------------------------------
 1 | cabal-version: 2.2
 2 | name: ray
 3 | version: 1.0.0
 4 | build-type: Simple
 5 | 
 6 | common shared
 7 |   ghc-options: -Wall -rtsopts -threaded -with-rtsopts=-N -funbox-strict-fields
 8 |   default-language: Haskell2010
 9 |   build-depends: base, parallel, massiv, massiv-io, deepseq, monad-par
10 |   other-modules: Raytracing, Vec3, Scene, Image, BVH
11 | 
12 | executable ray
13 |   import: shared
14 |   main-is: ray.hs
15 | 
16 | benchmark bench-ray
17 |   import: shared
18 |   type: exitcode-stdio-1.0
19 |   build-depends: criterion
20 |   main-is: bench-ray.hs
21 | 


--------------------------------------------------------------------------------
/haskell/ray.hs:
--------------------------------------------------------------------------------
 1 | module Main (main) where
 2 | 
 3 | import System.Environment (getArgs)
 4 | import Text.Read
 5 | import Image
 6 | import Raytracing
 7 | import Scene
 8 | 
 9 | scenes :: [(String, Scene)]
10 | scenes = [ ("rgbbox", rgbbox)
11 |          , ("irreg", irreg)
12 |          ]
13 | 
14 | main :: IO ()
15 | main = do
16 |   args <- getArgs
17 |   case args of
18 |     [scene, width, height]
19 |       | Just width' <- readMaybe width,
20 |         Just height' <- readMaybe height -> do
21 |           case lookup scene scenes of
22 |             Nothing -> error $ "Invalid scene.  Known scenes:\n" ++ unlines (map fst scenes)
23 |             Just scene' -> do
24 |               let (objs, cam) = fromScene width' height' scene'
25 |               writeImage (scene ++ ".png") $ render objs width' height' cam
26 |     _ ->
27 |       error $ "Usage: render <scene> <width> <height>"
28 | 


--------------------------------------------------------------------------------
/haskell/shell.nix:
--------------------------------------------------------------------------------
1 | with import (builtins.fetchTarball {
2 |   url = "https://github.com/NixOS/nixpkgs/archive/820177eded91f3908cfc72dfee00e831ea3d0060.zip";
3 |   sha256 = "1yqx5zy154f8057inwjp2ranizgilvpblqq31cy7nryrwj2apics";
4 | }) {};
5 | stdenv.mkDerivation {
6 |   name = "ray";
7 |   buildInputs = [ zlib zlib.out pkgconfig ghc cabal-install ];
8 | }
9 | 


--------------------------------------------------------------------------------
/haskell/stack.yaml:
--------------------------------------------------------------------------------
1 | resolver: nightly-2020-04-13
2 | packages:
3 | - .
4 | 


--------------------------------------------------------------------------------
/irreg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/athas/raytracers/888ac8f126499649b40b0ea8095db9235254ee9b/irreg.png


--------------------------------------------------------------------------------
/ispc/shell.nix:
--------------------------------------------------------------------------------
1 | with import (builtins.fetchTarball {
2 |   url = "https://github.com/NixOS/nixpkgs/archive/820177eded91f3908cfc72dfee00e831ea3d0060.zip";
3 |   sha256 = "1yqx5zy154f8057inwjp2ranizgilvpblqq31cy7nryrwj2apics";
4 | }) {};
5 | stdenv.mkDerivation {
6 |   name = "ray";
7 |   buildInputs = [ ispc ];
8 | }
9 | 


--------------------------------------------------------------------------------
/ocaml/.gitignore:
--------------------------------------------------------------------------------
1 | _build
2 | ray.native
3 | 


--------------------------------------------------------------------------------
/ocaml/.merlin:
--------------------------------------------------------------------------------
1 | S .
2 | B _build
3 | 
4 | 
5 | 


--------------------------------------------------------------------------------
/ocaml/Makefile:
--------------------------------------------------------------------------------
 1 | all: rgbbox_1000.ppm irreg_1000.ppm
 2 | 
 3 | ray.native: ray.ml timing.o
 4 | 	ocamlbuild -use-ocamlfind ray.native -lflags timing.o
 5 | 
 6 | timing.o: timing.c
 7 | 	ocamlbuild timing.o
 8 | 
 9 | rgbbox_1000.ppm: ray.native
10 | 	./ray.native -f $@ -s rgbbox -n 1000 -m 1000
11 | 
12 | irreg_1000.ppm: ray.native
13 | 	./ray.native -f $@ -s irreg -n 1000 -m 1000
14 | 
15 | .PHONY: clean
16 | 
17 | clean:
18 | 	rm -rf ray.native *.ppm _build
19 | 


--------------------------------------------------------------------------------
/ocaml/README.md:
--------------------------------------------------------------------------------
 1 | # Multicore OCaml implementation 
 2 | 
 3 | Multicore OCaml is still in development while this code is being written, so bad performance should 
 4 | be taken lightly.
 5 | 
 6 | ## Requisites
 7 | 
 8 | Install Opam and make the development Multicore OCaml `switch`:
 9 | 
10 | * Opam is the package manager of OCaml: https://opam.ocaml.org/doc/Install.html
11 |   * Multicore OCaml can be gotten in its development state using Opam: https://github.com/ocaml-multicore/multicore-opam
12 | 
13 | Also:
14 | ```opam install ocamlbuild domainslib```
15 | 
16 | ## Running
17 | 
18 | Just `make ray.native`. The generated `ray.native` takes the following options,
19 | all of which are optional:
20 | 
21 | * `-m height`
22 | * `-n width`
23 | * `-f file.ppm`
24 | * `-s <rgbbox|irreg>`
25 | * `--cores number-of-cores`
26 | * `--chunk-size-render chunk-size-of-pixels`
27 | 


--------------------------------------------------------------------------------
/ocaml/_tags:
--------------------------------------------------------------------------------
1 | #true: thread
2 | 
3 | true: bin_annot, debug
4 | true: package(domainslib)
5 | 


--------------------------------------------------------------------------------
/ocaml/ray.ml:
--------------------------------------------------------------------------------
  1 | module Task = Domainslib.Task
  2 | 
  3 | type vec3 = {
  4 |   x : float;
  5 |   y : float;
  6 |   z : float;
  7 | }
  8 | 
  9 | let vec_add (v1: vec3) (v2: vec3) = { x = v1.x +. v2.x; y = v1.y +. v2.y; z = v1.z +. v2.z }
 10 | let vec_sub (v1: vec3) (v2: vec3) = { x = v1.x -. v2.x; y = v1.y -. v2.y; z = v1.z -. v2.z }
 11 | let vec_mul (v1: vec3) (v2: vec3) = { x = v1.x *. v2.x; y = v1.y *. v2.y; z = v1.z *. v2.z }
 12 | let vec_div (v1: vec3) (v2: vec3) = { x = v1.x /. v2.x; y = v1.y /. v2.y; z = v1.z /. v2.z }
 13 | 
 14 | let scale s v : vec3 = {
 15 |   x = s *. v.x;
 16 |   y = s *. v.y;
 17 |   z = s *. v.z;
 18 | }
 19 | 
 20 | let dot (v1: vec3) (v2: vec3) =
 21 |   let v3 = vec_mul v1 v2
 22 |   in v3.x +. v3.y +. v3.z
 23 | 
 24 | let norm v = sqrt (dot v v)
 25 | 
 26 | let normalise v = scale (1.0 /. norm v) v
 27 | 
 28 | let cross v1 v2 : vec3 = {
 29 |   x = v1.y *. v2.z -. v1.z *. v2.y;
 30 |   y = v1.z *. v2.x -. v1.x *. v2.z;
 31 |   z = v1.x *. v2.y -. v1.y *. v2.x;
 32 | }
 33 | 
 34 | type aabb = {
 35 |   min : vec3;
 36 |   max : vec3
 37 | }
 38 | 
 39 | let min x y : float =
 40 |   if x < y then x else y
 41 | 
 42 | let max x y : float =
 43 |   if x < y then y else x
 44 | 
 45 | let enclosing (box0: aabb) (box1: aabb) =
 46 |   let small = {
 47 |     x = min box0.min.x box1.min.x;
 48 |     y = min box0.min.y box1.min.y;
 49 |     z = min box0.min.z box1.min.z;
 50 |   }
 51 |   and big = {
 52 |     x = max box0.max.x box1.max.x;
 53 |     y = max box0.max.y box1.max.y;
 54 |     z = max box0.max.z box1.max.z;
 55 |   }
 56 |   in { min = small; max = big }
 57 | 
 58 | let centre (aabb: aabb) = {
 59 |   x = aabb.min.x +. 0.5 *. (aabb.max.x -. aabb.min.x);
 60 |   y = aabb.min.y +. 0.5 *. (aabb.max.y -. aabb.min.y);
 61 |   z = aabb.min.z +. 0.5 *. (aabb.max.z -. aabb.min.z);
 62 | }
 63 | 
 64 | type 'a bvh =
 65 |   | Bvh_leaf of aabb * 'a
 66 |   | Bvh_split of aabb * 'a bvh * 'a bvh
 67 | 
 68 | let bvh_aabb bvh =
 69 |   match bvh with
 70 |   | (Bvh_leaf (box, _)) -> box
 71 |   | (Bvh_split (box, _, _)) -> box
 72 | 
 73 | let rec split n xs =
 74 |   match (n, xs) with
 75 |   | (0, _) -> ([], xs)
 76 |   | (_, []) -> ([], [])
 77 |   | (_, x::xs') ->
 78 |     let (left, right) = split (n-1) xs'
 79 |     in (x::left, right)
 80 | 
 81 | let sp = Printf.sprintf
 82 | 
 83 | let log ?id s =
 84 |   let id_str = match id with None -> "" | Some id -> sp "Worker-%d: " id in
 85 |   Printf.printf "%s%s\n%!" id_str s
 86 | 
 87 | let axis d (aabb: aabb) =
 88 |   let p = centre aabb in
 89 |   match d mod 3 with
 90 |   | 0 -> p.x
 91 |   | 1 -> p.y
 92 |   | 2 -> p.z
 93 |   | _ -> assert false
 94 | 
 95 | let mk_bvh ~pool f all_objs =
 96 |   let rec mk d n xs =
 97 |     match xs with
 98 |     | [] -> failwith "mk_bvh: no nodes"
 99 |     | [x] -> Bvh_leaf(f x, x)
100 |     | _ ->
101 |       let key x = axis d (f x) in
102 |       let sort_by_keys x y = compare (key x) (key y) in
103 |       let xs_sorted = List.sort sort_by_keys xs in
104 |       let (xs_left, xs_right) = split (n/2) xs_sorted in
105 |       let do_left () = mk (d+1) (n/2) xs_left in
106 |       let do_right () = mk (d+1) (n-(n/2)) xs_right in
107 |       let (left, right) =
108 |         if n < 100
109 |         then (do_left(), do_right())
110 |         else
111 |           let l = Task.async pool do_left in
112 |           let r = Task.async pool do_right in
113 |           (Task.await pool l, Task.await pool r)
114 |       in
115 |       let box = enclosing (bvh_aabb left) (bvh_aabb right)
116 |       in Bvh_split (box, left, right)
117 |   in
118 |   mk 0 (List.length all_objs) all_objs
119 | 
120 | type pos = vec3
121 | type dir = vec3
122 | type colour = vec3
123 | 
124 | let black : vec3 = {x=0.0; y=0.0; z=0.0}
125 | let white : vec3 = {x=1.0; y=1.0; z=1.0}
126 | 
127 | type ray = {origin: pos; dir: dir}
128 | 
129 | let point_at_param (ray: ray) t =
130 |   vec_add ray.origin (scale t ray.dir)
131 | 
132 | type hit = {
133 |   t: float;
134 |   p: pos;
135 |   normal: dir;
136 |   colour: colour;
137 | }
138 | 
139 | type sphere = {
140 |   pos: pos;
141 |   colour: colour;
142 |   radius: float;
143 | }
144 | 
145 | let sphere_aabb (s: sphere) : aabb =
146 |   { min = vec_sub s.pos {x=s.radius; y=s.radius; z=s.radius}
147 |   ; max = vec_add s.pos {x=s.radius; y=s.radius; z=s.radius}}
148 | 
149 | let sphere_hit s r t_min t_max : hit option =
150 |   let oc = vec_sub r.origin s.pos in
151 |   let a = dot r.dir r.dir in
152 |   let b = dot oc r.dir in
153 |   let c = dot oc oc -. s.radius *. s.radius in
154 |   let discriminant = b *. b -. a *. c in
155 |   let f temp =
156 |     if temp < t_max && temp > t_min
157 |     then Some { t = temp
158 |               ; p = point_at_param r temp
159 |               ; normal = scale (1.0 /. s.radius)
160 |                   (vec_sub (point_at_param r temp) s.pos)
161 |               ; colour = s.colour
162 |     }
163 |     else None
164 |   in if discriminant <= 0.0
165 |   then None
166 |   else
167 |     let sqrt_v = sqrt (b *. b -. a *. c) in
168 |     match f ((-.b -. sqrt_v) /. a) with
169 |     | Some hit -> Some hit
170 |     | None -> f ((-.b +. sqrt_v) /. a)
171 | 
172 | let aabb_hit aabb (r: ray) tmin0 tmax0 =
173 |   let iter min' max' origin' dir' tmin' tmax' =
174 |     let invD = 1.0 /. dir' in
175 |     let t0 = (min' -. origin') *. invD in
176 |     let t1 = (max' -. origin') *. invD in
177 |     let tmin'' = max (if invD < 0.0 then t1 else t0) tmin' in
178 |     let tmax'' = min (if invD < 0.0 then t0 else t1) tmax' in
179 |     (tmin'', tmax'')
180 |     [@@inline]
181 |   in
182 |   let (tmin1, tmax1) =
183 |     iter aabb.min.x aabb.max.x r.origin.x r.dir.x tmin0 tmax0
184 |   in
185 |   if tmax1 <= tmin1 then false
186 |   else
187 |     let (tmin2, tmax2) =
188 |       iter aabb.min.y aabb.max.y r.origin.y r.dir.y tmin1 tmax1
189 |     in
190 |     if tmax2 <= tmin2 then false
191 |     else
192 |       let (tmin3, tmax3) =
193 |         iter aabb.min.z aabb.max.z r.origin.z r.dir.z tmin2 tmax2
194 |       in not (tmax3 <= tmin3)
195 | 
196 | type objs = sphere bvh
197 | 
198 | let rec objs_hit bvh r t_min t_max =
199 |   match bvh with
200 |   | (Bvh_leaf (_, s)) ->
201 |     sphere_hit s r t_min t_max
202 |   | (Bvh_split (box, left, right)) ->
203 |     if not (aabb_hit box r t_min t_max)
204 |     then None
205 |     else match objs_hit left r t_min t_max with
206 |       | Some h -> (match objs_hit right r t_min h.t with
207 |         | None -> Some h
208 |         | Some h' -> Some h')
209 |       | None -> objs_hit right r t_min t_max
210 | 
211 | type camera = {
212 |   origin: pos;
213 |   llc: pos;
214 |   horizontal: dir;
215 |   vertical: dir;
216 | }
217 | 
218 | let pi = 3.14159265358979312
219 | 
220 | let camera lookfrom lookat vup vfov aspect =
221 |   let theta = vfov *. pi /. 180. in
222 |   let half_height = tan (theta /. 2.) in
223 |   let half_width = aspect *. half_height in
224 |   let origin = lookfrom in
225 |   let w = normalise (vec_sub lookfrom lookat) in
226 |   let u = normalise (cross vup w) in
227 |   let v = cross w u
228 |   in
229 |   { origin = lookfrom;
230 |     llc = vec_sub
231 |         (vec_sub (vec_sub origin (scale half_width u))
232 |             (scale half_height v)) w;
233 |     horizontal = scale (2. *. half_width) u;
234 |     vertical = scale (2. *. half_height) v;
235 |   }
236 | 
237 | let get_ray (cam: camera) s t : ray =
238 |   { origin = cam.origin
239 |   ; dir =
240 |       vec_sub
241 |         (vec_add
242 |             (vec_add cam.llc (scale s cam.horizontal))
243 |             (scale t cam.vertical))
244 |         cam.origin
245 |   }
246 | 
247 | let reflect v n =
248 |   vec_sub v (scale (2. *. dot v n) n)
249 | 
250 | let scatter (r: ray) (hit: hit) =
251 |   let reflected = reflect (normalise r.dir) hit.normal in
252 |   let scattered = {origin = hit.p; dir = reflected}
253 |   in
254 |   if dot scattered.dir hit.normal > 0.0
255 |   then Some (scattered, hit.colour)
256 |   else None
257 | 
258 | let rec ray_colour objs r depth =
259 |   match objs_hit objs r 0.001 1000000000.0 with
260 |   | Some hit ->
261 |     (match scatter r hit with
262 |       | Some (scattered, attenuation) ->
263 |         if depth < 50
264 |         then vec_mul attenuation (ray_colour objs scattered (depth+1))
265 |         else black
266 |       | None -> black)
267 |   | None ->
268 |     let unit_dir = normalise r.dir in
269 |     let t = 0.5 *. (unit_dir.y +. 1.0) in
270 |     let bg = { x = 0.5; y = 0.7; z = 1.0}
271 |     in vec_add (scale (1.0 -. t) white) (scale t bg)
272 | 
273 | let trace_ray objs width height cam j i : colour =
274 |   let u = float i /. float width in
275 |   let v = float j /. float height in
276 |   let ray = get_ray cam u v
277 |   in ray_colour objs ray 0
278 | 
279 | type pixel = int * int * int
280 | 
281 | let colour_to_pixel p =
282 |   let ir = int_of_float (255.99 *. p.x) in
283 |   let ig = int_of_float (255.99 *. p.y) in
284 |   let ib = int_of_float (255.99 *. p.z)
285 |   in (ir, ig, ib)
286 | 
287 | type image = {
288 |   pixels: pixel array;
289 |   height: int;
290 |   width: int;
291 | }
292 | 
293 | let sp = Printf.sprintf
294 | 
295 | let image2ppm : image -> string = fun image ->
296 |   let on_pixel acc (r, g, b) = sp "%d %d %d\n" r g b :: acc in
297 |   String.concat "" @@ List.rev_append (*< is tailrec in contrast with 'flatten'*)
298 |     (List.rev [
299 |       "P3\n";
300 |       sp "%d %d\n" image.width image.height;
301 |       "255\n";
302 |     ])
303 |     (image.pixels |> Array.to_list |> List.fold_left on_pixel [] |> List.rev)
304 | 
305 | let render ~objs ~width ~height ~cam ~pool ~chunk_size =
306 |   let n = height * width in
307 |   let output = Array.make n (0, 0, 0) in
308 |   let pixel l =
309 |     let i = l mod width in
310 |     let j = height - l / width in
311 |     output.(l) <- colour_to_pixel (trace_ray objs width height cam j i)
312 |   in
313 |   Task.parallel_for pool ~chunk_size ~start:0 ~finish:(n-1) ~body:pixel;
314 |   {
315 |     width;
316 |     height;
317 |     pixels = output
318 |   }
319 | 
320 | type scene = {
321 |   look_from : pos;
322 |   look_at : pos;
323 |   fov : float;
324 |   spheres : sphere list;
325 | }
326 | 
327 | let from_scene ~pool width height (scene: scene) : objs * camera =
328 |   (mk_bvh ~pool sphere_aabb scene.spheres,
329 |    camera scene.look_from scene.look_at {x=0.0; y=1.0; z=0.0}
330 |      scene.fov (float width /. float height))
331 | 
332 | (*taken from a later OCaml version*)
333 | module Seq = struct
334 | 
335 |   type +'a node =
336 |     | Nil
337 |     | Cons of 'a * 'a t
338 | 
339 |   and 'a t = unit -> 'a node
340 | 
341 |   let empty () = Nil
342 | 
343 |   let rec map f seq () = match seq() with
344 |     | Nil -> Nil
345 |     | Cons (x, next) -> Cons (f x, map f next)
346 | 
347 |   let rec flat_map f seq () = match seq () with
348 |     | Nil -> Nil
349 |     | Cons (x, next) ->
350 |       flat_map_app f (f x) next ()
351 | 
352 |   (* this is [append seq (flat_map f tail)] *)
353 |   and flat_map_app f seq tail () = match seq () with
354 |     | Nil -> flat_map f tail ()
355 |     | Cons (x, next) ->
356 |       Cons (x, flat_map_app f next tail)
357 | 
358 |   let fold_left f acc seq =
359 |     let rec aux f acc seq = match seq () with
360 |       | Nil -> acc
361 |       | Cons (x, next) ->
362 |         let acc = f acc x in
363 |         aux f acc next
364 |     in
365 |     aux f acc seq
366 | 
367 | end
368 | 
369 | let seq_range x y : int Seq.t =
370 |   let rec aux x () =
371 |     if x > y then Seq.Nil else
372 |       Seq.Cons (x, aux (succ x))
373 |   in
374 |   aux x
375 | 
376 | (*taken from a later OCaml version*)
377 | let list_of_seq seq =
378 |   let rec direct depth seq : _ list =
379 |     if depth=0
380 |     then
381 |       Seq.fold_left (fun acc x -> x::acc) [] seq
382 |       |> List.rev (* tailrec *)
383 |     else match seq() with
384 |       | Seq.Nil -> []
385 |       | Seq.Cons (x, next) -> x :: direct (depth-1) next
386 |   in
387 |   direct 500 seq
388 | 
389 | let tabulate_2d m n f =
390 |   Seq.flat_map (fun j ->
391 |     Seq.map (fun i ->
392 |       f (j, i)
393 |     ) (seq_range 0 (n-1))
394 |   ) (seq_range 0 (m-1))
395 |   |> list_of_seq
396 | 
397 | let rgbbox : scene =
398 |   let n = 10 in
399 |   let k = 60.0
400 |   in
401 |   let leftwall =
402 |     tabulate_2d n n (fun (y, z) ->
403 |       { pos={x=(-.k/.2.0);
404 |           y=(-.k/.2.0 +. (k/.float n) *. float y);
405 |           z=(-.k/.2.0 +. (k/.float n) *. float z)}
406 |       ; colour={x=1.0; y=0.0; z=0.0}
407 |       ; radius = (k/.(float n*.2.0))})
408 |   in
409 |   let midwall =
410 |     tabulate_2d n n (fun (x,y) ->
411 |       { pos={x=(-.k/.2.0 +. (k/.float n) *. float x);
412 |           y=(-.k/.2.0 +. (k/.float n) *. float y);
413 |           z=(-.k/.2.0)}
414 |       ; colour={x=1.0; y=1.0; z=0.0}
415 |       ; radius = (k/.(float n*.2.0))})
416 |   in
417 |   let rightwall =
418 |     tabulate_2d n n (fun (y,z) ->
419 |       { pos={x=(k/.2.0);
420 |           y=(-.k/.2.0 +. (k/.float n) *. float y);
421 |           z=(-.k/.2.0 +. (k/.float n) *. float z)}
422 |       ; colour={x=0.0; y=0.0; z=1.0}
423 |       ; radius = (k/.(float n*.2.0))})
424 |   in
425 |   let bottom =
426 |     tabulate_2d n n (fun (x,z) ->
427 |       { pos={x=(-.k/.2.0 +. (k/.float n) *. float x);
428 |           y=(-.k/.2.0);
429 |           z=(-.k/.2.0 +. (k/.float n) *. float z)}
430 |       ; colour={x=1.0; y=1.0; z=1.0}
431 |       ; radius = (k/.(float n*.2.0))})
432 |   in
433 |   {
434 |     spheres = leftwall @ midwall @ rightwall @ bottom;
435 |     look_from = {x=0.0; y=30.0; z=30.0};
436 |     look_at = {x=0.0; y= -.1.0; z= -.1.0};
437 |     fov = 75.0
438 |   }
439 | 
440 | let irreg : scene =
441 |   let n = 100 in
442 |   let k = 600.0 in
443 |   let bottom =
444 |     tabulate_2d n n (fun (x,z) ->
445 |       { pos={x=(-.k/.2.0 +. (k/.float n) *. float x);
446 |           y=0.0;
447 |           z=(-.k/.2.0 +. (k/.float n) *. float z)}
448 |       ; colour = white
449 |       ; radius = k/.(float n *. 2.0)})
450 |   in { spheres = bottom
451 |      ; look_from = {x=0.0; y=12.0; z=30.0}
452 |      ; look_at = {x=0.0; y=10.0; z= -.1.0}
453 |      ; fov = 75.0 }
454 | 
455 | let rec getopt needle argv f def =
456 |   match argv with
457 |   | opt::x::xs ->
458 |     if opt = needle
459 |     then f x else getopt needle (x::xs) f def
460 |   | _ -> def
461 | 
462 | let some x = Some x
463 | let id x = x
464 | 
465 | external useconds : unit -> int = "useconds"
466 | 
467 | let seconds() = float_of_int (useconds()) /. 1000000.0
468 | 
469 | let () =
470 |   let argv = Sys.argv |> Array.to_list in
471 | 
472 |   let num_domains = getopt "--cores" argv int_of_string 8 in
473 |   let chunk_size_render = getopt "--chunk-size-render" argv int_of_string 256 in
474 | 
475 |   let height = getopt "-m" argv int_of_string 200 in
476 |   let width = getopt "-n" argv int_of_string 200 in
477 |   let imgfile = getopt "-f" argv some None in
478 |   let scene_name = getopt "-s" argv id "rgbbox" in
479 |   let scene =
480 |     match scene_name with
481 |     | "rgbbox" -> rgbbox
482 |     | "irreg" -> irreg
483 |     | s -> failwith ("No such scene: " ^ s) in
484 |   log @@ sp "Using scene '%s' (-s to switch).\n" scene_name;
485 |   (*Note: Unix module was not implemented in Multicore OCaml,
486 |     .. and Sys.time times all threads time accumulated?*)
487 | 
488 |   let pool = Task.setup_pool (num_domains - 1) in
489 |   log "BVH construction";
490 |   let t = seconds() in
491 |   let (objs, cam) = from_scene ~pool width height scene in
492 |   let t' = seconds() in
493 |   log @@ sp "Scene BVH construction in %fs.\n" (t' -. t);
494 | 
495 |   log "rendering";
496 |   let t = seconds() in
497 |   let result =
498 |     render
499 |       ~pool ~chunk_size:chunk_size_render
500 |       ~objs ~width ~height ~cam
501 |   in
502 |   let t' = seconds() in
503 |   log @@ sp "Rendering in %fs.\n" (t' -. t);
504 |   Task.teardown_pool pool;
505 | 
506 |   match imgfile with
507 |   | None ->
508 |     log "-f not passed, so not writing image to file.\n"
509 |   | Some imgfile' ->
510 |     log @@ sp "Writing image to %s.\n" imgfile';
511 |     let out_channel = open_out imgfile' in
512 |     image2ppm result |> output_string out_channel;
513 |     flush out_channel;
514 |     exit 0
515 | 
516 | 
517 | 
518 | 


--------------------------------------------------------------------------------
/ocaml/shell.nix:
--------------------------------------------------------------------------------
 1 | with import (builtins.fetchTarball {
 2 |   url = "https://github.com/NixOS/nixpkgs/archive/820177eded91f3908cfc72dfee00e831ea3d0060.zip";
 3 |   sha256 = "1yqx5zy154f8057inwjp2ranizgilvpblqq31cy7nryrwj2apics";
 4 | }) {};
 5 | stdenv.mkDerivation {
 6 |   name = "ray";
 7 |   # FIXME: this is not enough.  See README.md.
 8 |   buildInputs = [ opam ];
 9 | }
10 | 


--------------------------------------------------------------------------------
/ocaml/timing.c:
--------------------------------------------------------------------------------
 1 | #include <caml/mlvalues.h>
 2 | #include <sys/time.h>
 3 | #include <stddef.h>
 4 | 
 5 | CAMLprim value useconds() {
 6 |   struct timeval t;
 7 |   gettimeofday(&t, NULL);
 8 |   return Val_int(t.tv_sec*1000000+t.tv_usec);
 9 | }
10 | 


--------------------------------------------------------------------------------
/rgbbox.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/athas/raytracers/888ac8f126499649b40b0ea8095db9235254ee9b/rgbbox.png


--------------------------------------------------------------------------------
/rust/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | *~
3 | *ppm
4 | 


--------------------------------------------------------------------------------
/rust/Cargo.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Cargo.
  2 | # It is not intended for manual editing.
  3 | version = 3
  4 | 
  5 | [[package]]
  6 | name = "ansi_term"
  7 | version = "0.11.0"
  8 | source = "registry+https://github.com/rust-lang/crates.io-index"
  9 | checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
 10 | dependencies = [
 11 |  "winapi",
 12 | ]
 13 | 
 14 | [[package]]
 15 | name = "atty"
 16 | version = "0.2.14"
 17 | source = "registry+https://github.com/rust-lang/crates.io-index"
 18 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
 19 | dependencies = [
 20 |  "hermit-abi",
 21 |  "libc",
 22 |  "winapi",
 23 | ]
 24 | 
 25 | [[package]]
 26 | name = "autocfg"
 27 | version = "1.0.0"
 28 | source = "registry+https://github.com/rust-lang/crates.io-index"
 29 | checksum = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d"
 30 | 
 31 | [[package]]
 32 | name = "bitflags"
 33 | version = "1.2.1"
 34 | source = "registry+https://github.com/rust-lang/crates.io-index"
 35 | checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
 36 | 
 37 | [[package]]
 38 | name = "bstr"
 39 | version = "0.2.12"
 40 | source = "registry+https://github.com/rust-lang/crates.io-index"
 41 | checksum = "2889e6d50f394968c8bf4240dc3f2a7eb4680844d27308f798229ac9d4725f41"
 42 | dependencies = [
 43 |  "lazy_static",
 44 |  "memchr",
 45 |  "regex-automata",
 46 |  "serde",
 47 | ]
 48 | 
 49 | [[package]]
 50 | name = "bumpalo"
 51 | version = "3.2.1"
 52 | source = "registry+https://github.com/rust-lang/crates.io-index"
 53 | checksum = "12ae9db68ad7fac5fe51304d20f016c911539251075a214f8e663babefa35187"
 54 | 
 55 | [[package]]
 56 | name = "byteorder"
 57 | version = "1.3.4"
 58 | source = "registry+https://github.com/rust-lang/crates.io-index"
 59 | checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
 60 | 
 61 | [[package]]
 62 | name = "cast"
 63 | version = "0.2.3"
 64 | source = "registry+https://github.com/rust-lang/crates.io-index"
 65 | checksum = "4b9434b9a5aa1450faa3f9cb14ea0e8c53bb5d2b3c1bfd1ab4fc03e9f33fbfb0"
 66 | dependencies = [
 67 |  "rustc_version",
 68 | ]
 69 | 
 70 | [[package]]
 71 | name = "cfg-if"
 72 | version = "0.1.10"
 73 | source = "registry+https://github.com/rust-lang/crates.io-index"
 74 | checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
 75 | 
 76 | [[package]]
 77 | name = "clap"
 78 | version = "2.33.0"
 79 | source = "registry+https://github.com/rust-lang/crates.io-index"
 80 | checksum = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9"
 81 | dependencies = [
 82 |  "ansi_term",
 83 |  "atty",
 84 |  "bitflags",
 85 |  "strsim",
 86 |  "textwrap",
 87 |  "unicode-width",
 88 |  "vec_map",
 89 | ]
 90 | 
 91 | [[package]]
 92 | name = "criterion"
 93 | version = "0.3.1"
 94 | source = "registry+https://github.com/rust-lang/crates.io-index"
 95 | checksum = "1fc755679c12bda8e5523a71e4d654b6bf2e14bd838dfc48cde6559a05caf7d1"
 96 | dependencies = [
 97 |  "atty",
 98 |  "cast",
 99 |  "clap",
100 |  "criterion-plot",
101 |  "csv",
102 |  "itertools",
103 |  "lazy_static",
104 |  "num-traits",
105 |  "oorandom",
106 |  "plotters",
107 |  "rayon",
108 |  "regex",
109 |  "serde",
110 |  "serde_derive",
111 |  "serde_json",
112 |  "tinytemplate",
113 |  "walkdir",
114 | ]
115 | 
116 | [[package]]
117 | name = "criterion-plot"
118 | version = "0.4.1"
119 | source = "registry+https://github.com/rust-lang/crates.io-index"
120 | checksum = "a01e15e0ea58e8234f96146b1f91fa9d0e4dd7a38da93ff7a75d42c0b9d3a545"
121 | dependencies = [
122 |  "cast",
123 |  "itertools",
124 | ]
125 | 
126 | [[package]]
127 | name = "crossbeam-deque"
128 | version = "0.7.4"
129 | source = "registry+https://github.com/rust-lang/crates.io-index"
130 | checksum = "c20ff29ded3204c5106278a81a38f4b482636ed4fa1e6cfbeef193291beb29ed"
131 | dependencies = [
132 |  "crossbeam-epoch",
133 |  "crossbeam-utils",
134 |  "maybe-uninit",
135 | ]
136 | 
137 | [[package]]
138 | name = "crossbeam-epoch"
139 | version = "0.8.2"
140 | source = "registry+https://github.com/rust-lang/crates.io-index"
141 | checksum = "058ed274caafc1f60c4997b5fc07bf7dc7cca454af7c6e81edffe5f33f70dace"
142 | dependencies = [
143 |  "autocfg",
144 |  "cfg-if",
145 |  "crossbeam-utils",
146 |  "lazy_static",
147 |  "maybe-uninit",
148 |  "memoffset",
149 |  "scopeguard",
150 | ]
151 | 
152 | [[package]]
153 | name = "crossbeam-queue"
154 | version = "0.2.1"
155 | source = "registry+https://github.com/rust-lang/crates.io-index"
156 | checksum = "c695eeca1e7173472a32221542ae469b3e9aac3a4fc81f7696bcad82029493db"
157 | dependencies = [
158 |  "cfg-if",
159 |  "crossbeam-utils",
160 | ]
161 | 
162 | [[package]]
163 | name = "crossbeam-utils"
164 | version = "0.7.2"
165 | source = "registry+https://github.com/rust-lang/crates.io-index"
166 | checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8"
167 | dependencies = [
168 |  "autocfg",
169 |  "cfg-if",
170 |  "lazy_static",
171 | ]
172 | 
173 | [[package]]
174 | name = "csv"
175 | version = "1.1.3"
176 | source = "registry+https://github.com/rust-lang/crates.io-index"
177 | checksum = "00affe7f6ab566df61b4be3ce8cf16bc2576bca0963ceb0955e45d514bf9a279"
178 | dependencies = [
179 |  "bstr",
180 |  "csv-core",
181 |  "itoa",
182 |  "ryu",
183 |  "serde",
184 | ]
185 | 
186 | [[package]]
187 | name = "csv-core"
188 | version = "0.1.10"
189 | source = "registry+https://github.com/rust-lang/crates.io-index"
190 | checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
191 | dependencies = [
192 |  "memchr",
193 | ]
194 | 
195 | [[package]]
196 | name = "either"
197 | version = "1.5.3"
198 | source = "registry+https://github.com/rust-lang/crates.io-index"
199 | checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
200 | 
201 | [[package]]
202 | name = "heck"
203 | version = "0.3.1"
204 | source = "registry+https://github.com/rust-lang/crates.io-index"
205 | checksum = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205"
206 | dependencies = [
207 |  "unicode-segmentation",
208 | ]
209 | 
210 | [[package]]
211 | name = "hermit-abi"
212 | version = "0.1.10"
213 | source = "registry+https://github.com/rust-lang/crates.io-index"
214 | checksum = "725cf19794cf90aa94e65050cb4191ff5d8fa87a498383774c47b332e3af952e"
215 | dependencies = [
216 |  "libc",
217 | ]
218 | 
219 | [[package]]
220 | name = "itertools"
221 | version = "0.8.2"
222 | source = "registry+https://github.com/rust-lang/crates.io-index"
223 | checksum = "f56a2d0bc861f9165be4eb3442afd3c236d8a98afd426f65d92324ae1091a484"
224 | dependencies = [
225 |  "either",
226 | ]
227 | 
228 | [[package]]
229 | name = "itoa"
230 | version = "0.4.5"
231 | source = "registry+https://github.com/rust-lang/crates.io-index"
232 | checksum = "b8b7a7c0c47db5545ed3fef7468ee7bb5b74691498139e4b3f6a20685dc6dd8e"
233 | 
234 | [[package]]
235 | name = "js-sys"
236 | version = "0.3.37"
237 | source = "registry+https://github.com/rust-lang/crates.io-index"
238 | checksum = "6a27d435371a2fa5b6d2b028a74bbdb1234f308da363226a2854ca3ff8ba7055"
239 | dependencies = [
240 |  "wasm-bindgen",
241 | ]
242 | 
243 | [[package]]
244 | name = "lazy_static"
245 | version = "1.4.0"
246 | source = "registry+https://github.com/rust-lang/crates.io-index"
247 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
248 | 
249 | [[package]]
250 | name = "libc"
251 | version = "0.2.68"
252 | source = "registry+https://github.com/rust-lang/crates.io-index"
253 | checksum = "dea0c0405123bba743ee3f91f49b1c7cfb684eef0da0a50110f758ccf24cdff0"
254 | 
255 | [[package]]
256 | name = "log"
257 | version = "0.4.8"
258 | source = "registry+https://github.com/rust-lang/crates.io-index"
259 | checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7"
260 | dependencies = [
261 |  "cfg-if",
262 | ]
263 | 
264 | [[package]]
265 | name = "maybe-uninit"
266 | version = "2.0.0"
267 | source = "registry+https://github.com/rust-lang/crates.io-index"
268 | checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00"
269 | 
270 | [[package]]
271 | name = "memchr"
272 | version = "2.3.3"
273 | source = "registry+https://github.com/rust-lang/crates.io-index"
274 | checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400"
275 | 
276 | [[package]]
277 | name = "memoffset"
278 | version = "0.5.4"
279 | source = "registry+https://github.com/rust-lang/crates.io-index"
280 | checksum = "b4fc2c02a7e374099d4ee95a193111f72d2110197fe200272371758f6c3643d8"
281 | dependencies = [
282 |  "autocfg",
283 | ]
284 | 
285 | [[package]]
286 | name = "num-traits"
287 | version = "0.2.11"
288 | source = "registry+https://github.com/rust-lang/crates.io-index"
289 | checksum = "c62be47e61d1842b9170f0fdeec8eba98e60e90e5446449a0545e5152acd7096"
290 | dependencies = [
291 |  "autocfg",
292 | ]
293 | 
294 | [[package]]
295 | name = "num_cpus"
296 | version = "1.12.0"
297 | source = "registry+https://github.com/rust-lang/crates.io-index"
298 | checksum = "46203554f085ff89c235cd12f7075f3233af9b11ed7c9e16dfe2560d03313ce6"
299 | dependencies = [
300 |  "hermit-abi",
301 |  "libc",
302 | ]
303 | 
304 | [[package]]
305 | name = "oorandom"
306 | version = "11.1.0"
307 | source = "registry+https://github.com/rust-lang/crates.io-index"
308 | checksum = "ebcec7c9c2a95cacc7cd0ecb89d8a8454eca13906f6deb55258ffff0adeb9405"
309 | 
310 | [[package]]
311 | name = "paw"
312 | version = "1.0.0"
313 | source = "registry+https://github.com/rust-lang/crates.io-index"
314 | checksum = "09c0fc9b564dbc3dc2ed7c92c0c144f4de340aa94514ce2b446065417c4084e9"
315 | dependencies = [
316 |  "paw-attributes",
317 |  "paw-raw",
318 | ]
319 | 
320 | [[package]]
321 | name = "paw-attributes"
322 | version = "1.0.2"
323 | source = "registry+https://github.com/rust-lang/crates.io-index"
324 | checksum = "0f35583365be5d148e959284f42526841917b7bfa09e2d1a7ad5dde2cf0eaa39"
325 | dependencies = [
326 |  "proc-macro2 1.0.10",
327 |  "quote 1.0.3",
328 |  "syn 1.0.17",
329 | ]
330 | 
331 | [[package]]
332 | name = "paw-raw"
333 | version = "1.0.0"
334 | source = "registry+https://github.com/rust-lang/crates.io-index"
335 | checksum = "7f0b59668fe80c5afe998f0c0bf93322bf2cd66cafeeb80581f291716f3467f2"
336 | 
337 | [[package]]
338 | name = "plotters"
339 | version = "0.2.12"
340 | source = "registry+https://github.com/rust-lang/crates.io-index"
341 | checksum = "4e3bb8da247d27ae212529352020f3e5ee16e83c0c258061d27b08ab92675eeb"
342 | dependencies = [
343 |  "js-sys",
344 |  "num-traits",
345 |  "wasm-bindgen",
346 |  "web-sys",
347 | ]
348 | 
349 | [[package]]
350 | name = "proc-macro2"
351 | version = "0.4.30"
352 | source = "registry+https://github.com/rust-lang/crates.io-index"
353 | checksum = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759"
354 | dependencies = [
355 |  "unicode-xid 0.1.0",
356 | ]
357 | 
358 | [[package]]
359 | name = "proc-macro2"
360 | version = "1.0.10"
361 | source = "registry+https://github.com/rust-lang/crates.io-index"
362 | checksum = "df246d292ff63439fea9bc8c0a270bed0e390d5ebd4db4ba15aba81111b5abe3"
363 | dependencies = [
364 |  "unicode-xid 0.2.0",
365 | ]
366 | 
367 | [[package]]
368 | name = "quote"
369 | version = "0.6.13"
370 | source = "registry+https://github.com/rust-lang/crates.io-index"
371 | checksum = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1"
372 | dependencies = [
373 |  "proc-macro2 0.4.30",
374 | ]
375 | 
376 | [[package]]
377 | name = "quote"
378 | version = "1.0.3"
379 | source = "registry+https://github.com/rust-lang/crates.io-index"
380 | checksum = "2bdc6c187c65bca4260c9011c9e3132efe4909da44726bad24cf7572ae338d7f"
381 | dependencies = [
382 |  "proc-macro2 1.0.10",
383 | ]
384 | 
385 | [[package]]
386 | name = "ray"
387 | version = "0.1.0"
388 | dependencies = [
389 |  "criterion",
390 |  "lazy_static",
391 |  "paw",
392 |  "rayon",
393 |  "structopt",
394 | ]
395 | 
396 | [[package]]
397 | name = "rayon"
398 | version = "1.3.0"
399 | source = "registry+https://github.com/rust-lang/crates.io-index"
400 | checksum = "db6ce3297f9c85e16621bb8cca38a06779ffc31bb8184e1be4bed2be4678a098"
401 | dependencies = [
402 |  "crossbeam-deque",
403 |  "either",
404 |  "rayon-core",
405 | ]
406 | 
407 | [[package]]
408 | name = "rayon-core"
409 | version = "1.7.0"
410 | source = "registry+https://github.com/rust-lang/crates.io-index"
411 | checksum = "08a89b46efaf957e52b18062fb2f4660f8b8a4dde1807ca002690868ef2c85a9"
412 | dependencies = [
413 |  "crossbeam-deque",
414 |  "crossbeam-queue",
415 |  "crossbeam-utils",
416 |  "lazy_static",
417 |  "num_cpus",
418 | ]
419 | 
420 | [[package]]
421 | name = "regex"
422 | version = "1.3.6"
423 | source = "registry+https://github.com/rust-lang/crates.io-index"
424 | checksum = "7f6946991529684867e47d86474e3a6d0c0ab9b82d5821e314b1ede31fa3a4b3"
425 | dependencies = [
426 |  "regex-syntax",
427 | ]
428 | 
429 | [[package]]
430 | name = "regex-automata"
431 | version = "0.1.9"
432 | source = "registry+https://github.com/rust-lang/crates.io-index"
433 | checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4"
434 | dependencies = [
435 |  "byteorder",
436 | ]
437 | 
438 | [[package]]
439 | name = "regex-syntax"
440 | version = "0.6.17"
441 | source = "registry+https://github.com/rust-lang/crates.io-index"
442 | checksum = "7fe5bd57d1d7414c6b5ed48563a2c855d995ff777729dcd91c369ec7fea395ae"
443 | 
444 | [[package]]
445 | name = "rustc_version"
446 | version = "0.2.3"
447 | source = "registry+https://github.com/rust-lang/crates.io-index"
448 | checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
449 | dependencies = [
450 |  "semver",
451 | ]
452 | 
453 | [[package]]
454 | name = "ryu"
455 | version = "1.0.3"
456 | source = "registry+https://github.com/rust-lang/crates.io-index"
457 | checksum = "535622e6be132bccd223f4bb2b8ac8d53cda3c7a6394944d3b2b33fb974f9d76"
458 | 
459 | [[package]]
460 | name = "same-file"
461 | version = "1.0.6"
462 | source = "registry+https://github.com/rust-lang/crates.io-index"
463 | checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
464 | dependencies = [
465 |  "winapi-util",
466 | ]
467 | 
468 | [[package]]
469 | name = "scopeguard"
470 | version = "1.1.0"
471 | source = "registry+https://github.com/rust-lang/crates.io-index"
472 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
473 | 
474 | [[package]]
475 | name = "semver"
476 | version = "0.9.0"
477 | source = "registry+https://github.com/rust-lang/crates.io-index"
478 | checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
479 | dependencies = [
480 |  "semver-parser",
481 | ]
482 | 
483 | [[package]]
484 | name = "semver-parser"
485 | version = "0.7.0"
486 | source = "registry+https://github.com/rust-lang/crates.io-index"
487 | checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
488 | 
489 | [[package]]
490 | name = "serde"
491 | version = "1.0.106"
492 | source = "registry+https://github.com/rust-lang/crates.io-index"
493 | checksum = "36df6ac6412072f67cf767ebbde4133a5b2e88e76dc6187fa7104cd16f783399"
494 | 
495 | [[package]]
496 | name = "serde_derive"
497 | version = "1.0.106"
498 | source = "registry+https://github.com/rust-lang/crates.io-index"
499 | checksum = "9e549e3abf4fb8621bd1609f11dfc9f5e50320802273b12f3811a67e6716ea6c"
500 | dependencies = [
501 |  "proc-macro2 1.0.10",
502 |  "quote 1.0.3",
503 |  "syn 1.0.17",
504 | ]
505 | 
506 | [[package]]
507 | name = "serde_json"
508 | version = "1.0.51"
509 | source = "registry+https://github.com/rust-lang/crates.io-index"
510 | checksum = "da07b57ee2623368351e9a0488bb0b261322a15a6e0ae53e243cbdc0f4208da9"
511 | dependencies = [
512 |  "itoa",
513 |  "ryu",
514 |  "serde",
515 | ]
516 | 
517 | [[package]]
518 | name = "strsim"
519 | version = "0.8.0"
520 | source = "registry+https://github.com/rust-lang/crates.io-index"
521 | checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
522 | 
523 | [[package]]
524 | name = "structopt"
525 | version = "0.2.18"
526 | source = "registry+https://github.com/rust-lang/crates.io-index"
527 | checksum = "16c2cdbf9cc375f15d1b4141bc48aeef444806655cd0e904207edc8d68d86ed7"
528 | dependencies = [
529 |  "clap",
530 |  "structopt-derive",
531 | ]
532 | 
533 | [[package]]
534 | name = "structopt-derive"
535 | version = "0.2.18"
536 | source = "registry+https://github.com/rust-lang/crates.io-index"
537 | checksum = "53010261a84b37689f9ed7d395165029f9cc7abb9f56bbfe86bee2597ed25107"
538 | dependencies = [
539 |  "heck",
540 |  "proc-macro2 0.4.30",
541 |  "quote 0.6.13",
542 |  "syn 0.15.44",
543 | ]
544 | 
545 | [[package]]
546 | name = "syn"
547 | version = "0.15.44"
548 | source = "registry+https://github.com/rust-lang/crates.io-index"
549 | checksum = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5"
550 | dependencies = [
551 |  "proc-macro2 0.4.30",
552 |  "quote 0.6.13",
553 |  "unicode-xid 0.1.0",
554 | ]
555 | 
556 | [[package]]
557 | name = "syn"
558 | version = "1.0.17"
559 | source = "registry+https://github.com/rust-lang/crates.io-index"
560 | checksum = "0df0eb663f387145cab623dea85b09c2c5b4b0aef44e945d928e682fce71bb03"
561 | dependencies = [
562 |  "proc-macro2 1.0.10",
563 |  "quote 1.0.3",
564 |  "unicode-xid 0.2.0",
565 | ]
566 | 
567 | [[package]]
568 | name = "textwrap"
569 | version = "0.11.0"
570 | source = "registry+https://github.com/rust-lang/crates.io-index"
571 | checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
572 | dependencies = [
573 |  "unicode-width",
574 | ]
575 | 
576 | [[package]]
577 | name = "tinytemplate"
578 | version = "1.0.3"
579 | source = "registry+https://github.com/rust-lang/crates.io-index"
580 | checksum = "57a3c6667d3e65eb1bc3aed6fd14011c6cbc3a0665218ab7f5daf040b9ec371a"
581 | dependencies = [
582 |  "serde",
583 |  "serde_json",
584 | ]
585 | 
586 | [[package]]
587 | name = "unicode-segmentation"
588 | version = "1.6.0"
589 | source = "registry+https://github.com/rust-lang/crates.io-index"
590 | checksum = "e83e153d1053cbb5a118eeff7fd5be06ed99153f00dbcd8ae310c5fb2b22edc0"
591 | 
592 | [[package]]
593 | name = "unicode-width"
594 | version = "0.1.7"
595 | source = "registry+https://github.com/rust-lang/crates.io-index"
596 | checksum = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479"
597 | 
598 | [[package]]
599 | name = "unicode-xid"
600 | version = "0.1.0"
601 | source = "registry+https://github.com/rust-lang/crates.io-index"
602 | checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
603 | 
604 | [[package]]
605 | name = "unicode-xid"
606 | version = "0.2.0"
607 | source = "registry+https://github.com/rust-lang/crates.io-index"
608 | checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"
609 | 
610 | [[package]]
611 | name = "vec_map"
612 | version = "0.8.1"
613 | source = "registry+https://github.com/rust-lang/crates.io-index"
614 | checksum = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a"
615 | 
616 | [[package]]
617 | name = "walkdir"
618 | version = "2.3.1"
619 | source = "registry+https://github.com/rust-lang/crates.io-index"
620 | checksum = "777182bc735b6424e1a57516d35ed72cb8019d85c8c9bf536dccb3445c1a2f7d"
621 | dependencies = [
622 |  "same-file",
623 |  "winapi",
624 |  "winapi-util",
625 | ]
626 | 
627 | [[package]]
628 | name = "wasm-bindgen"
629 | version = "0.2.60"
630 | source = "registry+https://github.com/rust-lang/crates.io-index"
631 | checksum = "2cc57ce05287f8376e998cbddfb4c8cb43b84a7ec55cf4551d7c00eef317a47f"
632 | dependencies = [
633 |  "cfg-if",
634 |  "wasm-bindgen-macro",
635 | ]
636 | 
637 | [[package]]
638 | name = "wasm-bindgen-backend"
639 | version = "0.2.60"
640 | source = "registry+https://github.com/rust-lang/crates.io-index"
641 | checksum = "d967d37bf6c16cca2973ca3af071d0a2523392e4a594548155d89a678f4237cd"
642 | dependencies = [
643 |  "bumpalo",
644 |  "lazy_static",
645 |  "log",
646 |  "proc-macro2 1.0.10",
647 |  "quote 1.0.3",
648 |  "syn 1.0.17",
649 |  "wasm-bindgen-shared",
650 | ]
651 | 
652 | [[package]]
653 | name = "wasm-bindgen-macro"
654 | version = "0.2.60"
655 | source = "registry+https://github.com/rust-lang/crates.io-index"
656 | checksum = "8bd151b63e1ea881bb742cd20e1d6127cef28399558f3b5d415289bc41eee3a4"
657 | dependencies = [
658 |  "quote 1.0.3",
659 |  "wasm-bindgen-macro-support",
660 | ]
661 | 
662 | [[package]]
663 | name = "wasm-bindgen-macro-support"
664 | version = "0.2.60"
665 | source = "registry+https://github.com/rust-lang/crates.io-index"
666 | checksum = "d68a5b36eef1be7868f668632863292e37739656a80fc4b9acec7b0bd35a4931"
667 | dependencies = [
668 |  "proc-macro2 1.0.10",
669 |  "quote 1.0.3",
670 |  "syn 1.0.17",
671 |  "wasm-bindgen-backend",
672 |  "wasm-bindgen-shared",
673 | ]
674 | 
675 | [[package]]
676 | name = "wasm-bindgen-shared"
677 | version = "0.2.60"
678 | source = "registry+https://github.com/rust-lang/crates.io-index"
679 | checksum = "daf76fe7d25ac79748a37538b7daeed1c7a6867c92d3245c12c6222e4a20d639"
680 | 
681 | [[package]]
682 | name = "web-sys"
683 | version = "0.3.37"
684 | source = "registry+https://github.com/rust-lang/crates.io-index"
685 | checksum = "2d6f51648d8c56c366144378a33290049eafdd784071077f6fe37dae64c1c4cb"
686 | dependencies = [
687 |  "js-sys",
688 |  "wasm-bindgen",
689 | ]
690 | 
691 | [[package]]
692 | name = "winapi"
693 | version = "0.3.8"
694 | source = "registry+https://github.com/rust-lang/crates.io-index"
695 | checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6"
696 | dependencies = [
697 |  "winapi-i686-pc-windows-gnu",
698 |  "winapi-x86_64-pc-windows-gnu",
699 | ]
700 | 
701 | [[package]]
702 | name = "winapi-i686-pc-windows-gnu"
703 | version = "0.4.0"
704 | source = "registry+https://github.com/rust-lang/crates.io-index"
705 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
706 | 
707 | [[package]]
708 | name = "winapi-util"
709 | version = "0.1.4"
710 | source = "registry+https://github.com/rust-lang/crates.io-index"
711 | checksum = "fa515c5163a99cc82bab70fd3bfdd36d827be85de63737b40fcef2ce084a436e"
712 | dependencies = [
713 |  "winapi",
714 | ]
715 | 
716 | [[package]]
717 | name = "winapi-x86_64-pc-windows-gnu"
718 | version = "0.4.0"
719 | source = "registry+https://github.com/rust-lang/crates.io-index"
720 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
721 | 


--------------------------------------------------------------------------------
/rust/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "ray"
 3 | version = "0.1.0"
 4 | authors = ["Philip Munksgaard <philip@munksgaard.me>"]
 5 | edition = "2018"
 6 | 
 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 8 | 
 9 | [dependencies]
10 | rayon = "1.1"
11 | structopt = { version = "0.2.18", features = ["paw"] }
12 | paw = "1.0.0"
13 | lazy_static = "1.4.0"
14 | 
15 | [profile.dev]
16 | opt-level = 3
17 | 
18 | [dev-dependencies]
19 | criterion = "0.3.1"
20 | 
21 | [[bench]]
22 | name = "ray"
23 | harness = false
24 | 


--------------------------------------------------------------------------------
/rust/Makefile:
--------------------------------------------------------------------------------
 1 | all: bench
 2 | 
 3 | .PHONY: bench
 4 | bench:
 5 | 	cargo bench
 6 | 
 7 | rgbbox_1000.ppm:
 8 | 	cargo run --release -- -f $@ -s rgbbox -n 1000 -m 1000
 9 | 
10 | irreg_1000.ppm:
11 | 	cargo run --release -- -f $@ -s irreg -n 1000 -m 1000
12 | 
13 | .PHONY: clean
14 | 
15 | clean:
16 | 	rm -f *.ppm
17 | 


--------------------------------------------------------------------------------
/rust/README.md:
--------------------------------------------------------------------------------
 1 | # Rust implementation
 2 | 
 3 | ## Requisites
 4 | 
 5 | Rust and Cargo (tested on stable 1.42.0)
 6 | 
 7 | ## Running
 8 | 
 9 | Simply run the raytracer with
10 | 
11 | ```
12 | cargo run
13 | ```
14 | 
15 | For more information, run `cargo run -- --help`
16 | 


--------------------------------------------------------------------------------
/rust/benches/ray.rs:
--------------------------------------------------------------------------------
 1 | use criterion::{black_box, criterion_group, criterion_main, Criterion};
 2 | use ray;
 3 | 
 4 | pub fn irreg_benchmark(c: &mut Criterion) {
 5 |     let mut group = c.benchmark_group("irreg");
 6 | 
 7 |     group.sample_size(10);
 8 | 
 9 |     let irreg_scene = (*ray::sample_scenes::IRREG).clone();
10 | 
11 |     group.bench_function("from_scene", |b| {
12 |         b.iter(|| ray::from_scene(1000, 1000, black_box(&irreg_scene)))
13 |     });
14 | 
15 |     let (objs, cam) = ray::from_scene(1000, 1000, &irreg_scene);
16 | 
17 |     group.bench_function("render", |b| {
18 |         b.iter(|| ray::render(black_box(&objs), 1000, 1000, black_box(&cam)))
19 |     });
20 | }
21 | 
22 | pub fn rgbbox_benchmark(c: &mut Criterion) {
23 |     let mut group = c.benchmark_group("rgbbox");
24 | 
25 |     group.sample_size(10);
26 | 
27 |     let irreg_scene = (*ray::sample_scenes::RGBBOX).clone();
28 | 
29 |     group.bench_function("from_scene", |b| {
30 |         b.iter(|| ray::from_scene(1000, 1000, black_box(&irreg_scene)))
31 |     });
32 | 
33 |     let (objs, cam) = ray::from_scene(1000, 1000, &irreg_scene);
34 | 
35 |     group.bench_function("render", |b| {
36 |         b.iter(|| ray::render(black_box(&objs), 1000, 1000, black_box(&cam)))
37 |     });
38 | }
39 | 
40 | criterion_group!(benches, irreg_benchmark, rgbbox_benchmark);
41 | criterion_main!(benches);
42 | 


--------------------------------------------------------------------------------
/rust/shell.nix:
--------------------------------------------------------------------------------
1 | with import (builtins.fetchTarball {
2 |   url = "https://github.com/NixOS/nixpkgs/archive/820177eded91f3908cfc72dfee00e831ea3d0060.zip";
3 |   sha256 = "1yqx5zy154f8057inwjp2ranizgilvpblqq31cy7nryrwj2apics";
4 | }) {};
5 | stdenv.mkDerivation {
6 |   name = "ray";
7 |   buildInputs = [ cargo rustc ];
8 | }
9 | 


--------------------------------------------------------------------------------
/rust/src/lib.rs:
--------------------------------------------------------------------------------
  1 | use std::cmp::{Ordering, PartialOrd};
  2 | use std::io;
  3 | use std::ops::{Add, Mul, Sub};
  4 | 
  5 | use rayon::prelude::*;
  6 | 
  7 | #[derive(Clone, Copy)]
  8 | pub struct Vec3 {
  9 |     x: f32,
 10 |     y: f32,
 11 |     z: f32,
 12 | }
 13 | 
 14 | impl Vec3 {
 15 |     fn scale(self, s: f32) -> Vec3 {
 16 |         Vec3 {
 17 |             x: self.x * s,
 18 |             y: self.y * s,
 19 |             z: self.z * s,
 20 |         }
 21 |     }
 22 | 
 23 |     fn norm(&self) -> f32 {
 24 |         self.dot(&self).sqrt()
 25 |     }
 26 | 
 27 |     fn normalise(&self) -> Vec3 {
 28 |         self.scale(1.0 / self.norm())
 29 |     }
 30 | 
 31 |     fn dot(&self, other: &Vec3) -> f32 {
 32 |         let v3 = *self * *other;
 33 |         v3.x + v3.y + v3.z
 34 |     }
 35 | 
 36 |     fn cross(&self, other: &Vec3) -> Vec3 {
 37 |         Vec3 {
 38 |             x: self.y * other.z - self.z * other.y,
 39 |             y: self.z * other.x - self.x * other.z,
 40 |             z: self.x * other.y - self.y * other.x,
 41 |         }
 42 |     }
 43 | 
 44 |     fn reflect(&self, n: &Vec3) -> Vec3 {
 45 |         *self - n.scale(2.0 * self.dot(n))
 46 |     }
 47 | }
 48 | 
 49 | impl Add for Vec3 {
 50 |     type Output = Vec3;
 51 | 
 52 |     fn add(self, other: Self) -> Self {
 53 |         Self {
 54 |             x: self.x + other.x,
 55 |             y: self.y + other.y,
 56 |             z: self.z + other.z,
 57 |         }
 58 |     }
 59 | }
 60 | 
 61 | impl Sub for Vec3 {
 62 |     type Output = Vec3;
 63 | 
 64 |     fn sub(self, other: Self) -> Self {
 65 |         Self {
 66 |             x: self.x - other.x,
 67 |             y: self.y - other.y,
 68 |             z: self.z - other.z,
 69 |         }
 70 |     }
 71 | }
 72 | 
 73 | impl Mul for Vec3 {
 74 |     type Output = Vec3;
 75 | 
 76 |     fn mul(self, other: Self) -> Self {
 77 |         Self {
 78 |             x: self.x * other.x,
 79 |             y: self.y * other.y,
 80 |             z: self.z * other.z,
 81 |         }
 82 |     }
 83 | }
 84 | 
 85 | #[derive(Clone)]
 86 | pub struct Aabb {
 87 |     min: Vec3,
 88 |     max: Vec3,
 89 | }
 90 | 
 91 | impl Aabb {
 92 |     fn centre(&self) -> Vec3 {
 93 |         Vec3 {
 94 |             x: self.min.x + 0.5 * (self.max.x - self.min.x),
 95 |             y: self.min.y + 0.5 * (self.max.y - self.min.y),
 96 |             z: self.min.z + 0.5 * (self.max.z - self.min.z),
 97 |         }
 98 |     }
 99 | 
100 |     fn hit(&self, r: &Ray, tmin0: f32, tmax0: f32) -> bool {
101 |         let iter = |min_, max_, origin_, dir_, tmin_, tmax_| {
102 |             let inv_d = 1.0 / dir_;
103 |             let t0: f32 = (min_ - origin_) * inv_d;
104 |             let t1 = (max_ - origin_) * inv_d;
105 |             let (t0_, t1_) = if inv_d < 0.0 { (t1, t0) } else { (t0, t1) };
106 |             let tmin__ = t0_.max(tmin_);
107 |             let tmax__ = t1_.min(tmax_);
108 |             (tmin__, tmax__)
109 |         };
110 |         let (tmin1, tmax1) = iter(self.min.x, self.max.x, r.origin.x, r.dir.x, tmin0, tmax0);
111 |         if tmax1 <= tmin1 {
112 |             false
113 |         } else {
114 |             let (tmin2, tmax2) = iter(self.min.y, self.max.y, r.origin.y, r.dir.y, tmin1, tmax1);
115 |             if tmax2 <= tmin2 {
116 |                 false
117 |             } else {
118 |                 let (tmin3, tmax3) =
119 |                     iter(self.min.z, self.max.z, r.origin.z, r.dir.z, tmin2, tmax2);
120 |                 tmax3 > tmin3
121 |             }
122 |         }
123 |     }
124 | 
125 |     fn enclosing(&self, other: &Aabb) -> Aabb {
126 |         let small = Vec3 {
127 |             x: f32::min(self.min.x, other.min.x),
128 |             y: f32::min(self.min.y, other.min.y),
129 |             z: f32::min(self.min.z, other.min.z),
130 |         };
131 |         let big = Vec3 {
132 |             x: f32::max(self.max.x, other.max.x),
133 |             y: f32::max(self.max.y, other.max.y),
134 |             z: f32::max(self.max.z, other.max.z),
135 |         };
136 |         Aabb {
137 |             min: small,
138 |             max: big,
139 |         }
140 |     }
141 | }
142 | 
143 | pub enum Bvh<T> {
144 |     Leaf(Aabb, T),
145 |     Split(Aabb, Box<Bvh<T>>, Box<Bvh<T>>),
146 | }
147 | 
148 | impl<T> Bvh<T> {
149 |     fn aabb(&self) -> &Aabb {
150 |         match self {
151 |             Bvh::Leaf(aabb, _) => aabb,
152 |             Bvh::Split(aabb, _, _) => aabb,
153 |         }
154 |     }
155 | }
156 | 
157 | type Pos = Vec3;
158 | 
159 | type Dir = Vec3;
160 | 
161 | type Colour = Vec3;
162 | 
163 | const BLACK: Colour = Vec3 {
164 |     x: 0.0,
165 |     y: 0.0,
166 |     z: 0.0,
167 | };
168 | 
169 | pub const WHITE: Colour = Vec3 {
170 |     x: 1.0,
171 |     y: 1.0,
172 |     z: 1.0,
173 | };
174 | 
175 | struct Ray {
176 |     origin: Pos,
177 |     dir: Dir,
178 | }
179 | 
180 | impl Ray {
181 |     fn point_at_param(&self, t: f32) -> Vec3 {
182 |         self.origin + self.dir.scale(t)
183 |     }
184 | 
185 |     fn scatter(&self, hit: &Hit) -> Option<(Ray, Colour)> {
186 |         let reflected = self.dir.normalise().reflect(&hit.normal);
187 |         let scattered = Ray {
188 |             origin: hit.p,
189 |             dir: reflected,
190 |         };
191 |         if scattered.dir.dot(&hit.normal) > 0.0 {
192 |             Some((scattered, hit.colour))
193 |         } else {
194 |             None
195 |         }
196 |     }
197 | 
198 |     fn colour(&self, objs: &Objs, depth: usize) -> Colour {
199 |         if let Some(hit) = objs.hit(self, 0.001, 1_000_000_000.0) {
200 |             if let Some((scattered, attenuation)) = self.scatter(&hit) {
201 |                 if depth < 50 {
202 |                     attenuation * scattered.colour(&objs, depth + 1)
203 |                 } else {
204 |                     BLACK
205 |                 }
206 |             } else {
207 |                 BLACK
208 |             }
209 |         } else {
210 |             let unit_dir = self.dir.normalise();
211 |             let t = 0.5 * (unit_dir.y + 1.0);
212 |             let bg = Vec3 {
213 |                 x: 0.5,
214 |                 y: 0.7,
215 |                 z: 1.0,
216 |             };
217 |             WHITE.scale(1.0 - t) + bg.scale(t)
218 |         }
219 |     }
220 | }
221 | 
222 | struct Hit {
223 |     t: f32,
224 |     p: Pos,
225 |     normal: Dir,
226 |     colour: Colour,
227 | }
228 | 
229 | #[derive(Clone)]
230 | pub struct Sphere {
231 |     pos: Pos,
232 |     colour: Colour,
233 |     radius: f32,
234 | }
235 | 
236 | impl Sphere {
237 |     fn hit(&self, r: &Ray, t_min: f32, t_max: f32) -> Option<Hit> {
238 |         #![allow(clippy::many_single_char_names)]
239 | 
240 |         let oc = r.origin - self.pos;
241 |         let a = r.dir.dot(&r.dir);
242 |         let b = oc.dot(&r.dir);
243 |         let c = oc.dot(&oc) - self.radius * self.radius;
244 |         let discriminant = b * b - a * c;
245 | 
246 |         let helper = |temp| {
247 |             if temp < t_max && temp > t_min {
248 |                 Some(Hit {
249 |                     t: temp,
250 |                     p: r.point_at_param(temp),
251 |                     normal: (r.point_at_param(temp) - self.pos).scale(1.0 / self.radius),
252 |                     colour: self.colour,
253 |                 })
254 |             } else {
255 |                 None
256 |             }
257 |         };
258 | 
259 |         if discriminant <= 0.0 {
260 |             None
261 |         } else {
262 |             match helper((-b - (b * b - a * c).sqrt()) / a) {
263 |                 None => helper((-b + (b * b - a * c).sqrt()) / a),
264 |                 x => x,
265 |             }
266 |         }
267 |     }
268 | }
269 | 
270 | trait ToAabb {
271 |     fn to_aabb(&self) -> Aabb;
272 | }
273 | 
274 | impl ToAabb for Sphere {
275 |     fn to_aabb(&self) -> Aabb {
276 |         Aabb {
277 |             min: self.pos
278 |                 - Vec3 {
279 |                     x: self.radius,
280 |                     y: self.radius,
281 |                     z: self.radius,
282 |                 },
283 |             max: self.pos
284 |                 + Vec3 {
285 |                     x: self.radius,
286 |                     y: self.radius,
287 |                     z: self.radius,
288 |                 },
289 |         }
290 |     }
291 | }
292 | 
293 | impl<T> Bvh<T> {
294 |     fn new(all_objs: &[T]) -> Self
295 |     where
296 |         T: Send + ToAabb + Clone,
297 |     {
298 |         fn helper<T>(d: i32, n: usize, xs: &mut [T]) -> Bvh<T>
299 |         where
300 |             T: Send + ToAabb + Clone,
301 |         {
302 |             if xs.is_empty() {
303 |                 panic!("No nodes")
304 |             } else if xs.len() == 1 {
305 |                 Bvh::Leaf(xs[0].to_aabb(), xs[0].clone())
306 |             } else {
307 |                 xs.par_sort_by(|a, b| {
308 |                     let (a_, b_) = match d % 3 {
309 |                         0 => (a.to_aabb().centre().x, b.to_aabb().centre().x),
310 |                         1 => (a.to_aabb().centre().y, b.to_aabb().centre().y),
311 |                         _ => (a.to_aabb().centre().z, a.to_aabb().centre().z),
312 |                     };
313 | 
314 |                     a_.partial_cmp(&b_).unwrap_or(Ordering::Equal)
315 |                 });
316 | 
317 |                 let (xs_left, xs_right) = xs.split_at_mut(n / 2);
318 |                 let (left, right) = if n < 100 {
319 |                     (
320 |                         helper(d + 1, n / 2, xs_left),
321 |                         helper(d + 1, n - (n / 2), xs_right),
322 |                     )
323 |                 } else {
324 |                     rayon::join(
325 |                         || helper(d + 1, n / 2, xs_left),
326 |                         || helper(d + 1, n - (n / 2), xs_right),
327 |                     )
328 |                 };
329 |                 let b = left.aabb().enclosing(right.aabb());
330 |                 Bvh::Split(b, Box::new(left), Box::new(right))
331 |             }
332 |         }
333 | 
334 |         let mut all_objs_ = Vec::from(all_objs);
335 | 
336 |         helper(0, all_objs.len(), &mut all_objs_)
337 |     }
338 | }
339 | 
340 | type Objs = Bvh<Sphere>;
341 | 
342 | impl Objs {
343 |     fn hit(&self, r: &Ray, t_min: f32, t_max: f32) -> Option<Hit> {
344 |         match self {
345 |             Bvh::Leaf(_, s) => s.hit(r, t_min, t_max),
346 |             Bvh::Split(b, left, right) => {
347 |                 if !(b.hit(r, t_min, t_max)) {
348 |                     None
349 |                 } else {
350 |                     match left.hit(r, t_min, t_max) {
351 |                         Some(h) => match right.hit(r, t_min, h.t) {
352 |                             None => Some(h),
353 |                             Some(h_) => Some(h_),
354 |                         },
355 |                         None => right.hit(r, t_min, t_max),
356 |                     }
357 |                 }
358 |             }
359 |         }
360 |     }
361 | }
362 | 
363 | pub struct Camera {
364 |     origin: Pos,
365 |     llc: Pos,
366 |     horizontal: Dir,
367 |     vertical: Dir,
368 | }
369 | 
370 | impl Camera {
371 |     fn new(lookfrom: &Pos, lookat: &Pos, vup: &Vec3, vfov: f32, aspect: f32) -> Self {
372 |         let theta = vfov * std::f32::consts::PI / 180.0;
373 |         let half_height = (theta / 2.0).tan();
374 |         let half_width = aspect * half_height;
375 |         let w = (*lookfrom - *lookat).normalise();
376 |         let u = vup.cross(&w).normalise();
377 |         let v = w.cross(&u);
378 |         Camera {
379 |             origin: *lookfrom,
380 |             llc: *lookfrom - u.scale(half_width) - v.scale(half_height) - w,
381 | 
382 |             horizontal: u.scale(2.0 * half_width),
383 |             vertical: v.scale(2.0 * half_height),
384 |         }
385 |     }
386 | 
387 |     fn ray(&self, s: f32, t: f32) -> Ray {
388 |         Ray {
389 |             origin: self.origin,
390 |             dir: self.llc + self.horizontal.scale(s) + self.vertical.scale(t) - self.origin,
391 |         }
392 |     }
393 | }
394 | 
395 | fn trace_ray(objs: &Objs, width: i32, height: i32, cam: &Camera, j: i32, i: i32) -> Colour {
396 |     let u = i as f32 / width as f32;
397 |     let v = j as f32 / height as f32;
398 |     let ray = cam.ray(u, v);
399 |     ray.colour(objs, 0)
400 | }
401 | 
402 | type Pixel = (i32, i32, i32);
403 | 
404 | impl From<Colour> for Pixel {
405 |     fn from(c: Colour) -> Self {
406 |         (
407 |             (c.x * 255.99) as i32,
408 |             (c.y * 255.99) as i32,
409 |             (c.z * 255.99) as i32,
410 |         )
411 |     }
412 | }
413 | 
414 | pub struct Image {
415 |     pixels: Vec<Pixel>,
416 |     height: i32,
417 |     width: i32,
418 | }
419 | 
420 | pub fn image2ppm(mut buf: impl io::Write, image: Image) -> io::Result<()> {
421 |     write!(buf, "P3\n{} {}\n255\n", image.width, image.height)?;
422 | 
423 |     for (r, g, b) in image.pixels.iter() {
424 |         writeln!(buf, "{} {} {}", r, g, b)?;
425 |     }
426 | 
427 |     Ok(())
428 | }
429 | 
430 | pub fn render(objs: &Objs, width: i32, height: i32, cam: &Camera) -> Image {
431 |     let pixel = |l| {
432 |         let i = l % width;
433 |         let j = height - l / width;
434 |         Pixel::from(trace_ray(objs, width, height, cam, j, i))
435 |     };
436 | 
437 |     let pixels: Vec<_> = (0..height * width).into_par_iter().map(pixel).collect();
438 | 
439 |     Image {
440 |         pixels,
441 |         height,
442 |         width,
443 |     }
444 | }
445 | 
446 | #[derive(Clone)]
447 | pub struct Scene {
448 |     cam_look_from: Pos,
449 |     cam_look_at: Pos,
450 |     cam_fov: f32,
451 |     spheres: Vec<Sphere>,
452 | }
453 | 
454 | pub fn from_scene(width: i32, height: i32, scene: &Scene) -> (Objs, Camera) {
455 |     (
456 |         Bvh::new(&scene.spheres),
457 |         Camera::new(
458 |             &scene.cam_look_from,
459 |             &scene.cam_look_at,
460 |             &Vec3 {
461 |                 x: 0.0,
462 |                 y: 1.0,
463 |                 z: 0.0,
464 |             },
465 |             scene.cam_fov,
466 |             width as f32 / height as f32,
467 |         ),
468 |     )
469 | }
470 | 
471 | pub mod sample_scenes;
472 | 


--------------------------------------------------------------------------------
/rust/src/main.rs:
--------------------------------------------------------------------------------
 1 | #[macro_use]
 2 | extern crate structopt;
 3 | 
 4 | use std::fs::File;
 5 | use std::io;
 6 | use std::io::Write;
 7 | use std::path::Path;
 8 | 
 9 | use ray;
10 | 
11 | #[derive(StructOpt)]
12 | struct Args {
13 |     #[structopt(short = "f")]
14 |     /// The file to output the image to
15 |     file_out: Option<String>,
16 | 
17 |     #[structopt(short = "m", long = "height", default_value = "200")]
18 |     /// The height
19 |     height: i32,
20 | 
21 |     #[structopt(short = "n", long = "width", default_value = "200")]
22 |     /// The width
23 |     width: i32,
24 | 
25 |     #[structopt(short = "s", long = "scene", default_value = "rgbbox")]
26 |     /// The scene to show. Possible values are 'rgbbox' and 'irreg'
27 |     scene_name: String,
28 | }
29 | 
30 | #[paw::main]
31 | fn main(args: Args) -> Result<(), Box<dyn std::error::Error>> {
32 |     let mut scene = match args.scene_name.as_ref() {
33 |         "irreg" => (*ray::sample_scenes::IRREG).clone(),
34 |         "rgbbox" => (*ray::sample_scenes::RGBBOX).clone(),
35 |         s => panic!("Invalid scene: {}", s),
36 |     };
37 | 
38 |     let (objs, cam) = ray::from_scene(args.width, args.height, &mut scene);
39 | 
40 |     let result = ray::render(&objs, args.width, args.height, &cam);
41 | 
42 |     let out_writer = match args.file_out {
43 |         Some(x) => {
44 |             let path = Path::new(&x);
45 |             Box::new(File::create(&path).unwrap()) as Box<dyn Write>
46 |         }
47 |         None => Box::new(io::stdout()) as Box<dyn Write>,
48 |     };
49 | 
50 |     ray::image2ppm(out_writer, result)?;
51 | 
52 |     Ok(())
53 | }
54 | 


--------------------------------------------------------------------------------
/rust/src/sample_scenes.rs:
--------------------------------------------------------------------------------
  1 | use lazy_static::lazy_static;
  2 | 
  3 | use super::{Scene, Sphere, Vec3, WHITE};
  4 | 
  5 | lazy_static! {
  6 |     pub static ref RGBBOX: Scene = {
  7 |         let n = 10;
  8 |         let k = 60.0;
  9 | 
 10 |         let leftwall: Vec<Sphere> = (0..n)
 11 |             .flat_map(|y| {
 12 |                 (0..n).map(move |z| Sphere {
 13 |                     pos: Vec3 {
 14 |                         x: (-k) / 2.0,
 15 |                         y: ((-k) / 2.0 + (k / n as f32) * y as f32),
 16 |                         z: ((-k) / 2.0 + (k / n as f32) * z as f32),
 17 |                     },
 18 |                     colour: Vec3 {
 19 |                         x: 1.0,
 20 |                         y: 0.0,
 21 |                         z: 0.0,
 22 |                     },
 23 |                     radius: k / (n as f32 * 2.0),
 24 |                 })
 25 |             })
 26 |             .collect();
 27 |         let midwall: Vec<Sphere> = (0..n)
 28 |             .flat_map(|x| {
 29 |                 (0..n).map(move |y| Sphere {
 30 |                     pos: Vec3 {
 31 |                         x: ((-k) / 2.0 + (k / n as f32) * x as f32),
 32 |                         y: ((-k) / 2.0 + (k / n as f32) * y as f32),
 33 |                         z: (-k) / 2.0,
 34 |                     },
 35 |                     colour: Vec3 {
 36 |                         x: 1.0,
 37 |                         y: 1.0,
 38 |                         z: 0.0,
 39 |                     },
 40 |                     radius: k / (n as f32 * 2.0),
 41 |                 })
 42 |             })
 43 |             .collect();
 44 |         let rightwall: Vec<Sphere> = (0..n)
 45 |             .flat_map(|y| {
 46 |                 (0..n).map(move |z| Sphere {
 47 |                     pos: Vec3 {
 48 |                         x: k / 2.0,
 49 |                         y: ((-k) / 2.0 + (k / n as f32) * y as f32),
 50 |                         z: ((-k) / 2.0 + (k / n as f32) * z as f32),
 51 |                     },
 52 |                     colour: Vec3 {
 53 |                         x: 0.0,
 54 |                         y: 0.0,
 55 |                         z: 1.0,
 56 |                     },
 57 |                     radius: k / (n as f32 * 2.0),
 58 |                 })
 59 |             })
 60 |             .collect();
 61 |         let bottom: Vec<Sphere> = (0..n)
 62 |             .flat_map(|x| {
 63 |                 (0..n).map(move |z| Sphere {
 64 |                     pos: Vec3 {
 65 |                         x: ((-k) / 2.0 + (k / n as f32) * x as f32),
 66 |                         y: (-k) / 2.0,
 67 |                         z: ((-k) / 2.0 + (k / n as f32) * z as f32),
 68 |                     },
 69 |                     colour: Vec3 {
 70 |                         x: 1.0,
 71 |                         y: 1.0,
 72 |                         z: 1.0,
 73 |                     },
 74 |                     radius: k / (n as f32 * 2.0),
 75 |                 })
 76 |             })
 77 |             .collect();
 78 |         Scene {
 79 |             spheres: [leftwall, midwall, rightwall, bottom].concat(),
 80 |             cam_look_from: Vec3 {
 81 |                 x: 0.0,
 82 |                 y: 30.0,
 83 |                 z: 30.0,
 84 |             },
 85 |             cam_look_at: Vec3 {
 86 |                 x: 0.0,
 87 |                 y: -1.0,
 88 |                 z: -1.0,
 89 |             },
 90 |             cam_fov: 75.0,
 91 |         }
 92 |     };
 93 | }
 94 | 
 95 | lazy_static! {
 96 |     pub static ref IRREG: Scene = {
 97 |         let n = 100;
 98 |         let k = 600.0;
 99 |         let spheres = (0..n)
100 |             .flat_map(|x| {
101 |                 (0..n).map(move |z| Sphere {
102 |                     pos: Vec3 {
103 |                         x: ((-k) / 2.0 + (k / n as f32) * x as f32),
104 |                         y: 0.0,
105 |                         z: ((-k) / 2.0 + (k / n as f32) * z as f32),
106 |                     },
107 |                     colour: WHITE,
108 |                     radius: k / (n as f32 * 2.0),
109 |                 })
110 |             })
111 |             .collect();
112 | 
113 |         Scene {
114 |             spheres,
115 |             cam_look_from: Vec3 {
116 |                 x: 0.0,
117 |                 y: 12.0,
118 |                 z: 30.0,
119 |             },
120 |             cam_look_at: Vec3 {
121 |                 x: 0.0,
122 |                 y: 10.0,
123 |                 z: -1.0,
124 |             },
125 |             cam_fov: 75.0,
126 |         }
127 |     };
128 | }
129 | 


--------------------------------------------------------------------------------
/scala/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 | *.ppm
3 | 


--------------------------------------------------------------------------------
/scala/Makefile:
--------------------------------------------------------------------------------
 1 | all: rgbbox_1000.ppm irreg_1000.ppm
 2 | 
 3 | rgbbox_1000.ppm: src/main/scala/raytracer/*
 4 | 	sbt "run rgbbox 1000 1000"
 5 | 	mv out.ppm $@
 6 | 
 7 | irreg_1000.ppm: src/main/scala/raytracer/*
 8 | 	sbt "run irreg 1000 1000"
 9 | 	mv out.ppm $@
10 | 
11 | .PHONY: clean
12 | 
13 | clean:
14 | 	sbt clean
15 | 	rm -f *.ppm
16 | 
17 | bench:
18 | 	sbt clean bench
19 | 


--------------------------------------------------------------------------------
/scala/README.md:
--------------------------------------------------------------------------------
 1 | - Make sure you have sbt installed
 2 | - run `sbt`
 3 | - then run `run <scene> <width> <height>`
 4 | - the output file will be written to `out.ppm`
 5 | 
 6 | ## Benchmark custom JVM version
 7 | 
 8 | Use [Jabba](https://github.com/shyiko/jabba) to install a custom JVM version.
 9 | For example, to run the benchmarks with GraalVM.
10 | 
11 | ```sh
12 | curl -sL https://github.com/shyiko/jabba/raw/master/install.sh | bash && . ~/.jabba/jabba.sh
13 | jabba install graalvm@20.0.0
14 | jabba use graalvm@20.0.0
15 | make bench
16 | ```
17 | 


--------------------------------------------------------------------------------
/scala/build.sbt:
--------------------------------------------------------------------------------
 1 | ThisBuild / scalaVersion := "3.1.2"
 2 | 
 3 | lazy val root = (project in file("."))
 4 |   .settings(name := "raytracer")
 5 | 
 6 | fork := true
 7 | 
 8 | javaOptions := Seq("-Xms2G", "-Xmx2G", "-XX:+UseG1GC", "-XX:+UseStringDeduplication")
 9 | 
10 | libraryDependencies += "org.scala-lang.modules" %% "scala-parallel-collections" % "1.0.4"
11 | 
12 | enablePlugins(JmhPlugin)
13 | 
14 | addCommandAlias("bench", "jmh:run -i 3 -wi 3 -f1 -t1")
15 | 
16 | // See https://www.scala-sbt.org/1.x/docs/Using-Sonatype.html for instructions on how to publish to Sonatype.
17 | 


--------------------------------------------------------------------------------
/scala/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=1.6.2
2 | 


--------------------------------------------------------------------------------
/scala/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.3.7")
2 | 


--------------------------------------------------------------------------------
/scala/shell.nix:
--------------------------------------------------------------------------------
 1 | with import (builtins.fetchTarball {
 2 |   url = "https://github.com/NixOS/nixpkgs/archive/820177eded91f3908cfc72dfee00e831ea3d0060.zip";
 3 |   sha256 = "1yqx5zy154f8057inwjp2ranizgilvpblqq31cy7nryrwj2apics";
 4 | }) {};
 5 | stdenv.mkDerivation {
 6 |   name = "ray";
 7 |   buildInputs = [ (sbt.override {jre=jdk17;})
 8 |                   jdk17
 9 |                   (scala.override {jre=jdk17;})
10 |                   gnumake
11 |                 ];
12 | }
13 | 


--------------------------------------------------------------------------------
/scala/src/main/scala/raytracer/BVH.scala:
--------------------------------------------------------------------------------
 1 | package raytracer
 2 | import scala.collection.immutable.Nil
 3 | 
 4 | final case class AABB(min: Vec3, max: Vec3) {
 5 |   inline def surroundingBox(that: AABB): AABB = {
 6 |     val small =
 7 |       Vec3( math.min(this.min.x, that.min.x)
 8 |           , math.min(this.min.y, that.min.y)
 9 |           , math.min(this.min.z, that.min.z)
10 |           )
11 |     val big =
12 |       Vec3( math.max(this.max.x, that.max.x)
13 |           , math.max(this.max.y, that.max.y)
14 |           , math.max(this.max.z, that.max.z)
15 |           )
16 |     AABB(small, big)
17 |   }
18 | 
19 |   inline def axis(d: Int): Double =
20 |     d % 3 match {
21 |       case 0 => min.x + max.x - min.x
22 |       case 1 => min.y + max.y - min.y
23 |       case 2 => min.z + max.z - min.z
24 |     }
25 | }
26 | 
27 | sealed abstract class BVH[A] extends Product with Serializable {
28 |   def getAABB: AABB = this match {
29 |     case l: Leaf[A] => l.aabb
30 |     case s: Split[A] => s.aabb
31 |   }
32 | }
33 | final case class Leaf[A](aabb: AABB, a: A) extends BVH[A]
34 | final case class Split[A](aabb: AABB, left: BVH[A], right: BVH[A]) extends BVH[A]
35 | 
36 | object BVH {
37 |   import java.util.concurrent.{Executors, ForkJoinPool, RecursiveTask}
38 |   import scala.math.Ordering.Double.IeeeOrdering
39 | 
40 |   private final lazy val pool = Executors.newWorkStealingPool().asInstanceOf[ForkJoinPool]
41 |   sys.addShutdownHook(pool.shutdown())
42 | 
43 |   def apply[A](f: A => AABB, allObjs: List[A]): BVH[A] = {
44 |     class Go(d: Int, n: Int, objs: List[A]) extends RecursiveTask[BVH[A]] {
45 |       override def compute(): BVH[A] = objs match {
46 |         case Nil => throw new RuntimeException("BVH.apply: empty no nodes")
47 |         case x :: Nil => Leaf(f(x), x)
48 |         case xs =>
49 |           val (xsLeft, xsRight) = xs.sortBy(a => f(a).axis(d)).splitAt(n / 2)
50 |           def doLeft() = new Go(d+1, n/2, xsLeft)
51 |           def doRight() = new Go(d+1, n-n/2, xsRight)
52 |           val l = doLeft().fork()
53 |           val r = doRight().fork()
54 |           val left = l.join()
55 |           val right = r.join()
56 |           val box = left.getAABB.surroundingBox(right.getAABB)
57 |           Split(box, left, right)
58 |       }
59 |     }
60 | 
61 |     pool.invoke(new Go(0, allObjs.length, allObjs))
62 |   }
63 | }
64 | 


--------------------------------------------------------------------------------
/scala/src/main/scala/raytracer/Bench.scala:
--------------------------------------------------------------------------------
 1 | package raytracer
 2 | 
 3 | import org.openjdk.jmh.annotations._
 4 | 
 5 | import java.util.concurrent.TimeUnit
 6 | 
 7 | @State(Scope.Thread)
 8 | @BenchmarkMode(Array(Mode.AverageTime))
 9 | @OutputTimeUnit(TimeUnit.MILLISECONDS)
10 | class Bench {
11 | 
12 |   val width = 1000
13 |   val height = 1000
14 |   @Param(Array("rgbbox", "irreg"))
15 |   var scene: String = _
16 | 
17 |   var objscam: (Raytracer.Objs, Raytracer.Camera) = _
18 |   var s: Scene = _
19 | 
20 |   @Setup
21 |   def setup(): Unit = {
22 |     s = Scene.fromString(scene).get
23 |     objscam = s.toObjsCam(width, height)
24 |   }
25 | 
26 |   @Benchmark def construct(): Unit = s.toObjsCam(width, height)
27 |   @Benchmark def render(): Unit = Raytracer.render(objscam._1, width, height, objscam._2)
28 | }
29 | 


--------------------------------------------------------------------------------
/scala/src/main/scala/raytracer/Image.scala:
--------------------------------------------------------------------------------
 1 | package raytracer
 2 | 
 3 | import scala.collection.parallel.immutable.ParSeq
 4 | 
 5 | final case class Image(width: Int, height: Int, pixels: Image.PixelData) {
 6 |   import java.io.PrintWriter
 7 |   import scala.util.Using
 8 | 
 9 |   def write(): Unit = Using.resource(new PrintWriter("out.ppm")) { pw =>
10 |     val writePPM = (p: Image.Pixel) => pw.write(s"${p.x & 0xFF} ${p.y & 0xFF} ${p.z & 0xFF}\n")
11 |     pw.write(s"P3\n$width $height\n255\n")
12 |     pixels.seq.foreach(writePPM)
13 |   }
14 | }
15 | 
16 | object Image {
17 |   type PixelData = ParSeq[Pixel]
18 | 
19 |   def apply(width: Int, height: Int, pixel: (Int, Int) => Pixel): Image = {
20 |     val arr = ParSeq.tabulate(width * height)(n => (height - n / height, n % width))
21 |     val pixelData = arr.map(pixel.tupled)
22 |     Image(width, height, pixelData)
23 |   }
24 | 
25 |   class Pixel(val x: Byte, val y: Byte, val z: Byte)
26 | }


--------------------------------------------------------------------------------
/scala/src/main/scala/raytracer/Main.scala:
--------------------------------------------------------------------------------
 1 | package raytracer
 2 | 
 3 | import scala.util.Try
 4 | import raytracer.Raytracer.render
 5 | 
 6 | object Main {
 7 |   def main(args: Array[String]): Unit = {
 8 |     val (scene, width, height) = (args match {
 9 |       case Array(s, w, h, _*) => for {
10 |         ss <- Scene.fromString(s)
11 |         ww <- Try(w.toInt).toOption
12 |         hh <- Try(h.toInt).toOption
13 |       } yield (ss, ww, hh)
14 |       case _ => None
15 |     }).getOrElse({
16 |       println("Error parsing command line arguments, running rgbbox 1000x1000 px")
17 |       (Scene.rgbbox, 1000, 1000)
18 |     })
19 | 
20 |     val (objs, cam) = scene.toObjsCam(width, height)
21 |     val image = render(objs, width, height, cam)
22 |     image.write()
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/scala/src/main/scala/raytracer/Raytracer.scala:
--------------------------------------------------------------------------------
  1 | package raytracer
  2 | 
  3 | object Raytracer {
  4 |   type Pos = Vec3
  5 |   type Dir = Vec3
  6 |   type Color = Vec3
  7 |   final val Black: Color = Vec3(0, 0, 0)
  8 |   final val White: Color = Vec3(1, 1, 1)
  9 | 
 10 |   final case class Ray(origin: Pos, dir: Dir) {
 11 | 
 12 |     inline def pointAtParam(t: Double): Pos = origin + dir.scale(t)
 13 | 
 14 |     def aabbHit(aabb: AABB, tMin0: Double, tMax0: Double): Boolean = {
 15 |       def go(min_ : Double, max_ : Double, origin_ : Double, dir_ : Double, tMin_ : Double, tMax_ : Double): (Double, Double) = {
 16 |         val invD = 1.0 / dir_
 17 |         val t0 = (min_ - origin_) * invD
 18 |         val t1 = (max_ - origin_) * invD
 19 |         val t0_ = if (invD < 0) t1 else t0
 20 |         val t1_ = if (invD < 0) t0 else t1
 21 |         val tMin__ = t0_ max tMin_
 22 |         val tMax__ = t1_ min tMax_
 23 |         (tMin__, tMax__)
 24 |       }
 25 |       val (tMin1, tMax1) = go(aabb.min.x, aabb.max.x, origin.x, dir.x, tMin0, tMax0)
 26 |       if(tMax1 <= tMin1) false
 27 |       else {
 28 |         val (tMin2, tMax2) = go(aabb.min.y, aabb.max.y, origin.y, dir.y, tMin1, tMax1)
 29 |         if(tMax2 <= tMin2) false
 30 |         else {
 31 |           val (tMin3, tMax3) = go(aabb.min.z, aabb.max.z, origin.z, dir.z, tMin2, tMax2)
 32 |           tMax3 > tMin3
 33 |         }
 34 |       }
 35 |     }
 36 |   }
 37 | 
 38 |   final case class Hit(t: Double, p: Pos, normal: Dir, color: Color)
 39 |   final case class Sphere(pos: Pos, color: Color, radius: Double) {
 40 |     private[this] final val radiusSqrd = radius * radius
 41 |     private[this] final val scalaFactor = 1.0 / radius
 42 | 
 43 |     def aabb: AABB = {
 44 |       val rVec = Vec3(radius, radius, radius)
 45 |       AABB(pos - rVec, pos + rVec)
 46 |     }
 47 | 
 48 |     def hit(ray: Ray, tMin: Double, tMax: Double): Option[Hit] = {
 49 |       val oc = ray.origin - pos
 50 |       val a = ray.dir dot ray.dir
 51 |       val b = oc dot ray.dir
 52 |       val c = (oc dot oc) - radiusSqrd
 53 |       val discriminant = b * b - a * c
 54 | 
 55 |       def tryHit(temp: Double) =
 56 |         if (temp < tMax && temp > tMin) {
 57 |           val pointAtParam = ray.pointAtParam(temp)
 58 |           Some(Hit(temp, pointAtParam, (pointAtParam - pos).scale(scalaFactor), color))
 59 |         }
 60 |         else None
 61 | 
 62 |       if (discriminant <= 0) None
 63 |       else {
 64 |         val sqrtDiscriminant = math.sqrt(discriminant)
 65 | 
 66 |         tryHit((-b - sqrtDiscriminant) / a) match {
 67 |           case s: Some[Hit] => s
 68 |           case None => tryHit((-b + sqrtDiscriminant) / a)
 69 |         }
 70 |       }
 71 |     }
 72 |   }
 73 | 
 74 |   type Objs = BVH[Sphere]
 75 | 
 76 |   def objsHit(objs: Objs, ray: Ray, tMin: Double, tMax: Double): Option[Hit] = objs match {
 77 |     case Leaf(_, sphere) => sphere.hit(ray, tMin, tMax)
 78 |     case Split(box, left, right) =>
 79 |       if(!ray.aabbHit(box, tMin, tMax)) None
 80 |       else objsHit(left, ray, tMin, tMax) match {
 81 |         case opt@Some(hit1) => objsHit(right, ray, tMin, hit1.t).orElse(opt)
 82 |         case None => objsHit(right, ray, tMin, tMax)
 83 |       }
 84 |   }
 85 | 
 86 |   final case class Camera(origin: Pos, llc: Pos, horizontal: Dir, vertical: Dir)
 87 | 
 88 |   object Camera {
 89 |     def apply(lookFrom: Pos, lookAt: Pos, vUp: Dir, vFov: Double, aspect: Double): Camera = {
 90 |       val theta = vFov * math.Pi / 180.0
 91 |       val halfHeight = math.tan(theta / 2.0)
 92 |       val halfWidth = aspect * halfHeight
 93 |       val origin = lookFrom
 94 |       val w = (lookFrom - lookAt).normalise
 95 |       val u = (vUp cross w).normalise
 96 |       val v = w cross u
 97 |       Camera( lookFrom
 98 |             , origin - u.scale(halfWidth) - v.scale(halfHeight) - w
 99 |             , u.scale(2*halfWidth)
100 |             , v.scale(2*halfHeight)
101 |             )
102 |     }
103 |   }
104 | 
105 |   def getRay(cam: Camera, s: Double, t: Double): Ray =
106 |     Ray( cam.origin
107 |        , cam.llc + cam.horizontal.scale(s) + cam.vertical.scale(t) - cam.origin
108 |        )
109 | 
110 |   inline def reflect(v: Vec3, n: Vec3): Vec3 =
111 |     v - n.scale(2 * (v dot n))
112 | 
113 |   inline def scatter(rayDir: Dir, hit: Hit): Option[Ray] = {
114 |     val reflected = reflect(rayDir.normalise, hit.normal)
115 |     if((reflected dot hit.normal) > 0) Some(Ray(hit.p, reflected)) else None
116 |   }
117 | 
118 |   def rayColor(objs: Objs, ray: Ray, depth: Int): Color = objsHit(objs, ray, 0.001, 1.0 / 0.0) match {
119 |       case Some(hit) => scatter(ray.dir, hit) match {
120 |         case Some(scattered) if depth < 50 => hit.color * rayColor(objs, scattered, depth+1)
121 |         case _ => Black
122 |       }
123 |       case None =>
124 |         val t = 0.5 * (ray.dir.normalise.y + 1.0)
125 |         Vec3.one.scale(1.0-t) + Vec3(0.5, 0.7, 1).scale(t)
126 |   }
127 | 
128 |   def traceRay(objs: Objs, width: Int, height: Int, camera: Camera, j: Int, i: Int): Color =
129 |     rayColor(objs,
130 |       getRay(camera,
131 |         i.toDouble / width.toDouble,
132 |         j.toDouble / height.toDouble), 0)
133 | 
134 |   def render(objs: Objs, width: Int, height: Int, camera: Camera): Image = {
135 |     import raytracer.Image.Pixel
136 |     val colorToPixel = (c: Color) => new Pixel((255.99 * c.x).toByte, (255.99 * c.y).toByte, (255.99 * c.z).toByte)
137 |     Image(width, height, (j, i) => colorToPixel(traceRay(objs, width, height, camera, j, i)))
138 |   }
139 | }
140 | 


--------------------------------------------------------------------------------
/scala/src/main/scala/raytracer/Scene.scala:
--------------------------------------------------------------------------------
 1 | package raytracer
 2 | 
 3 | import Raytracer.{Pos, Sphere, Camera, Objs}
 4 | 
 5 | final case class Scene(camLookFrom: Pos, camLookAt: Pos, camFov: Double, spheres: List[Sphere]) {
 6 |   def toObjsCam(width: Int, height: Int): (Objs, Camera) =
 7 |     (BVH(_.aabb, spheres), Camera(camLookFrom, camLookAt, Vec3(0,1,0), camFov, width.toDouble / height.toDouble))
 8 | }
 9 | 
10 | object Scene {
11 | 
12 |   def fromString(name: String): Option[Scene] = name match {
13 |     case "irreg" => Some(irreg)
14 |     case "rgbbox" => Some(rgbbox)
15 |     case _ => None
16 |   }
17 | 
18 |   def rgbbox: Scene = {
19 |     val n = 10
20 |     val k = 60.0
21 |     val leftwall = for {
22 |       y <- 0 to n-1
23 |       z <- 0 to n-1
24 |     } yield 
25 |     Sphere( Vec3( -k/2.0
26 |                 , -k/2.0 + k/n * y
27 |                 , -k/2.0 + k/n * z)
28 |           , Vec3(1,0,0)
29 |           , k/(n * 2.0)
30 |           )
31 | 
32 |     val midwall = for {
33 |       x <- 0 to n-1
34 |       y <- 0 to n-1
35 |     } yield 
36 |     Sphere( Vec3( -k/2.0 + k/n * x
37 |                 , -k/2.0 + k/n * y
38 |                 , -k/2.0)
39 |           , Vec3(0,1,0)
40 |           , k/(n * 2.0)
41 |           )
42 |     val rightwall = for {
43 |       y <- 0 to n-1
44 |       z <- 0 to n-1
45 |     } yield 
46 |     Sphere( Vec3(  k/2.0
47 |                 , -k/2.0 + k/n * y
48 |                 , -k/2.0 + k/n * z)
49 |           , Vec3(0,0,1)
50 |           , k/(n * 2.0)
51 |           )
52 | 
53 |     val bottom = for {
54 |       x <- 0 to n-1
55 |       z <- 0 to n-1
56 |     } yield
57 |     Sphere( Vec3( -k/2.0 + k/n * x
58 |                 , -k/2.0
59 |                 , -k/2.0 + k/n * z)
60 |           , Vec3(1,1,1)
61 |           , k/(n*2.0)
62 |           )
63 | 
64 |     Scene( Vec3(0, 30, 30)
65 |          , Vec3(0, -1, -1)
66 |          , 75
67 |          , (leftwall ++ midwall ++ rightwall ++ bottom).toList
68 |          )
69 |   }
70 | 
71 |   def irreg: Scene = {
72 |     val n = 100
73 |     val k = 600.0
74 |     val bottom = for {
75 |       x <- 0 to n-1
76 |       z <- 0 to n-1
77 |     } yield
78 |     Sphere( Vec3( -k/2.0 + k/n * x
79 |                 , 0
80 |                 , -k/2.0 + k/n * z)
81 |           , Vec3(1,1,1)
82 |           , k/(n*2.0)
83 |           )
84 | 
85 |     Scene(Vec3(0, 12, 30), Vec3(0, 10, -1), 75, bottom.toList)
86 |   }
87 | }
88 | 
89 | 


--------------------------------------------------------------------------------
/scala/src/main/scala/raytracer/Vec3.scala:
--------------------------------------------------------------------------------
 1 | package raytracer
 2 | 
 3 | final case class Vec3(x: Double, y: Double, z: Double) {
 4 | 
 5 |   inline def +(that: Vec3) = Vec3(x + that.x, y + that.y, z + that.z)
 6 |   inline def -(that: Vec3) = Vec3(x - that.x, y - that.y, z - that.z)
 7 |   inline def *(that: Vec3) = Vec3(x * that.x, y * that.y, z * that.z)
 8 | 
 9 |   inline def scale(a: Double): Vec3 = Vec3(a * x, a * y, a * z)
10 |   inline def dot(that: Vec3): Double =
11 |     x * that.x + y * that.y + z * that.z
12 |   inline def norm: Double = math.sqrt(this dot this)
13 |   inline def normalise: Vec3 = scale (1.0f / norm)
14 |   inline def cross(that: Vec3): Vec3 =
15 |     Vec3(y * that.z - z * that.y, z * that.x - x * that.z, x * that.y - y * that.x)
16 | 
17 | }
18 | 
19 | object Vec3 {
20 |   val one = Vec3(1, 1, 1)
21 | }
22 | 


--------------------------------------------------------------------------------
/sml/.gitignore:
--------------------------------------------------------------------------------
1 | ray
2 | *.ppm
3 | 


--------------------------------------------------------------------------------
/sml/Makefile:
--------------------------------------------------------------------------------
 1 | all: rgbbox_1000.ppm irreg_1000.ppm
 2 | 
 3 | ray: ray.sml
 4 | 	polyc ray.sml -o ray
 5 | 
 6 | rgbbox_1000.ppm: ray
 7 | 	./ray -f $@ -s rgbbox -n 1000 -m 1000
 8 | 
 9 | irreg_1000.ppm: ray
10 | 	./ray -f $@ -s irreg -n 1000 -m 1000
11 | 
12 | .PHONY: clean
13 | 
14 | clean:
15 | 	rm -f *.ppm
16 | 


--------------------------------------------------------------------------------
/sml/ray.sml:
--------------------------------------------------------------------------------
  1 | type vec3 = {x: real, y: real, z: real}
  2 | 
  3 | local
  4 |     fun vf f (v1: vec3) (v2: vec3) =
  5 |         {x= f (#x v1, #x v2),
  6 |          y= f (#y v1, #y v2),
  7 |          z= f (#z v1, #z v2)}
  8 | in
  9 | 
 10 | val vec_add = vf (op+)
 11 | val vec_sub = vf (op-)
 12 | val vec_mul = vf (op* )
 13 | val vec_div = vf (op/)
 14 | 
 15 | fun scale s {x,y,z} = {x=s*x, y=s*y, z=s*z} : vec3
 16 | 
 17 | fun dot (v1: vec3) (v2: vec3) =
 18 |     let val v3 = vec_mul v1 v2
 19 |     in #x v3 + #y v3 + #z v3 end
 20 | 
 21 | fun norm v = Math.sqrt (dot v v)
 22 | 
 23 | fun normalise v = scale (1.0 / norm v) v
 24 | 
 25 | fun cross {x=x1, y=y1, z=z1} {x=x2, y=y2, z=z2} =
 26 |     {x=y1*z2-z1*y2, y=z1*x2-x1*z2, z=x1*y2-y1*x2} : vec3
 27 | 
 28 | end
 29 | 
 30 | type aabb = { min: vec3, max: vec3 }
 31 | 
 32 | fun min x y : real =
 33 |     if x < y then x else y
 34 | 
 35 | fun max x y : real =
 36 |     if x < y then y else x
 37 | 
 38 | fun enclosing (box0: aabb) (box1: aabb) =
 39 |     let val small = { x = min (#x (#min box0)) (#x (#min box1))
 40 |                     , y = min (#y (#min box0)) (#y (#min box1))
 41 |                     , z = min (#z (#min box0)) (#z (#min box1))
 42 |                     }
 43 |         val big = { x = max (#x (#max box0)) (#x (#max box1))
 44 |                   , y = max (#y (#max box0)) (#y (#max box1))
 45 |                   , z = max (#z (#max box0)) (#z (#max box1))
 46 |                   }
 47 |   in {min=small, max=big} end
 48 | 
 49 | fun centre (aabb: aabb) =
 50 |     { x = #x (#min aabb) + 0.5 * (#x (#max aabb) - #x (#min aabb)),
 51 |       y = #y (#min aabb) + 0.5 * (#y (#max aabb) - #y (#min aabb)),
 52 |       z = #z (#min aabb) + 0.5 * (#z (#max aabb) - #z (#min aabb))
 53 |     }
 54 | 
 55 | datatype 'a bvh = bvh_leaf of aabb * 'a
 56 |                 | bvh_split of aabb * 'a bvh * 'a bvh
 57 | 
 58 | fun bvh_aabb (bvh_leaf (box, _)) = box
 59 |   | bvh_aabb (bvh_split (box, _, _)) = box
 60 | 
 61 | (* Couldn't find a sorting function in MLtons stdlib - this is from Rosetta Code. *)
 62 | local
 63 |     fun merge cmp ([], ys) = ys
 64 |       | merge cmp (xs, []) = xs
 65 |       | merge cmp (xs as x::xs', ys as y::ys') =
 66 |           case cmp (x, y) of
 67 |                GREATER => y :: merge cmp (xs, ys')
 68 |              | _       => x :: merge cmp (xs', ys)
 69 |     fun sort cmp [] = []
 70 |       | sort cmp [x] = [x]
 71 |       | sort cmp xs =
 72 |         let
 73 |           val ys = List.take (xs, length xs div 2)
 74 |           val zs = List.drop (xs, length xs div 2)
 75 |         in
 76 |           merge cmp (sort cmp ys, sort cmp zs)
 77 |         end
 78 | in
 79 | fun mk_bvh f all_objs =
 80 |     let fun mk _ _ [] = raise Fail "mk_bvh: no nodes"
 81 |           | mk _ _ [x] = bvh_leaf(f x, x)
 82 |           | mk d n xs =
 83 |             let val axis = case d mod 3 of 0 => #x
 84 |                                          | 1 => #y
 85 |                                          | _ => #z
 86 |                 fun cmp (x, y) =
 87 |                     Real.compare(axis(centre(f x)),
 88 |                                  axis(centre(f y)))
 89 |                 val xs_sorted = sort cmp xs
 90 |                 val xs_left = List.take(xs_sorted, n div 2)
 91 |                 val xs_right = List.drop(xs_sorted, n div 2)
 92 |                 fun do_left () = mk (d+1) (n div 2) xs_left
 93 |                 fun do_right () = mk (d+1) (n-(n div 2)) xs_right
 94 |                 val (left, right) =
 95 |                     if n < 100
 96 |                     then (do_left(), do_right())
 97 |                     else (do_left(), do_right())
 98 |                 val box = enclosing (bvh_aabb left) (bvh_aabb right)
 99 |             in bvh_split (box, left, right) end
100 |     in mk 0 (length all_objs) all_objs end
101 | end
102 | 
103 | type pos = vec3
104 | type dir = vec3
105 | type colour = vec3
106 | 
107 | val black : vec3 = {x=0.0, y=0.0, z=0.0}
108 | val white : vec3 = {x=1.0, y=1.0, z=1.0}
109 | 
110 | type ray = {origin: pos, dir: dir}
111 | 
112 | fun point_at_param (ray: ray) t =
113 |     vec_add (#origin ray) (scale t (#dir ray))
114 | 
115 | type hit = { t: real
116 |            , p: pos
117 |            , normal: dir
118 |            , colour: colour
119 |            }
120 | 
121 | type sphere = { pos: pos
122 |               , colour: colour
123 |               , radius: real
124 |               }
125 | 
126 | fun sphere_aabb {pos, colour=_, radius} =
127 |     {min = vec_sub pos {x=radius, y=radius, z=radius},
128 |      max = vec_add pos {x=radius, y=radius, z=radius}}
129 | 
130 | fun sphere_hit {pos, colour, radius} r t_min t_max : hit option =
131 |     let val oc = vec_sub (#origin r) pos
132 |         val a = dot (#dir r) (#dir r)
133 |         val b = dot oc (#dir r)
134 |         val c = dot oc oc - radius*radius
135 |         val discriminant = b*b - a*c
136 |         fun try temp =
137 |             if temp < t_max andalso temp > t_min
138 |             then SOME { t = temp
139 |                       , p = point_at_param r temp
140 |                       , normal = scale (1.0/radius)
141 |                                  (vec_sub (point_at_param r temp) pos)
142 |                       , colour = colour
143 |                       }
144 |         else NONE
145 |   in if discriminant <= 0.0
146 |      then NONE
147 |      else case try ((~b - Math.sqrt(b*b-a*c))/a) of
148 |               SOME hit => SOME hit
149 |             | NONE => try ((~b + Math.sqrt(b*b-a*c))/a)
150 |     end
151 | 
152 | fun aabb_hit aabb ({origin, dir}: ray) tmin0 tmax0 =
153 |   let fun iter min' max' origin' dir' tmin' tmax' =
154 |           let val invD = 1.0 / dir'
155 |               val t0 = (min' - origin') * invD
156 |               val t1 = (max' - origin') * invD
157 |               val (t0', t1') = if invD < 0.0 then (t1, t0) else (t0, t1)
158 |               val tmin'' = max t0' tmin'
159 |               val tmax'' = min t1' tmax'
160 |           in (tmin'', tmax'') end
161 |       val (tmin1, tmax1) =
162 |           iter
163 |           (#x (#min aabb)) (#x (#max aabb))
164 |           (#x origin) (#x dir)
165 |           tmin0 tmax0
166 |   in if tmax1 <= tmin1 then false
167 |      else let val (tmin2, tmax2) =
168 |                   iter (#y (#min aabb)) (#y (#max aabb))
169 |                   (#y origin) (#y dir)
170 |                   tmin1 tmax1
171 |           in if tmax2 <= tmin2 then false
172 |              else let val (tmin3, tmax3) =
173 |                           iter (#z (#min aabb)) (#z (#max aabb))
174 |                           (#z origin) (#z dir)
175 |                           tmin2 tmax2
176 |                   in not (tmax3 <= tmin3) end
177 |           end
178 |   end
179 | 
180 | type objs = sphere bvh
181 | 
182 | fun objs_hit (bvh_leaf (_, s)) r t_min t_max =
183 |     sphere_hit s r t_min t_max
184 |   | objs_hit (bvh_split (box, left, right)) r t_min t_max =
185 |     if not (aabb_hit box r t_min t_max)
186 |     then NONE
187 |     else case objs_hit left r t_min t_max of
188 |              SOME h => (case objs_hit right r t_min (#t h) of
189 |                             NONE => SOME h
190 |                           | SOME h' => SOME h')
191 |            | NONE => objs_hit right r t_min t_max
192 | 
193 | type camera = { origin: pos
194 |               , llc: pos
195 |               , horizontal: dir
196 |               , vertical: dir
197 |               }
198 | 
199 | fun camera lookfrom lookat vup vfov aspect =
200 |   let val theta = vfov * Math.pi / 180.0
201 |       val half_height = Math.tan (theta / 2.0)
202 |       val half_width = aspect * half_height
203 |       val origin = lookfrom
204 |       val w = normalise (vec_sub lookfrom lookat)
205 |       val u = normalise (cross vup w)
206 |       val v = cross w u
207 |   in { origin = lookfrom
208 |      , llc = vec_sub
209 |              (vec_sub (vec_sub origin (scale half_width u))
210 |                      (scale half_height v)) w
211 |      , horizontal = scale (2.0*half_width) u
212 |      , vertical = scale (2.0*half_height) v
213 |      }
214 |   end
215 | 
216 | fun get_ray (cam: camera) s t : ray=
217 |     { origin = #origin cam
218 |     , dir = vec_sub (vec_add (vec_add (#llc cam) (scale s (#horizontal cam)))
219 |                              (scale t (#vertical cam)))
220 |                     (#origin cam)
221 |     }
222 | 
223 | fun reflect v n =
224 |     vec_sub v (scale (2.0 * dot v n) n)
225 | 
226 | fun scatter (r: ray) (hit: hit) =
227 |     let val reflected =
228 |             reflect (normalise (#dir r)) (#normal hit)
229 |         val scattered = {origin = #p hit, dir = reflected}
230 |     in if dot (#dir scattered) (#normal hit) > 0.0
231 |        then SOME (scattered, #colour hit)
232 |        else NONE
233 |     end
234 | 
235 | fun ray_colour objs r depth =
236 |     case objs_hit objs r 0.001 1000000000.0 of
237 |         SOME hit => (case scatter r hit of
238 |                          SOME (scattered, attenuation) =>
239 |                          if depth < 50
240 |                          then vec_mul attenuation (ray_colour objs scattered (depth+1))
241 |                          else black
242 |                       |  NONE => black)
243 |       | NONE => let val unit_dir = normalise (#dir r)
244 |                     val t = 0.5 * (#y unit_dir + 1.0)
245 |                     val bg = {x=0.5, y=0.7, z=1.0}
246 |                 in vec_add (scale (1.0-t) white) (scale t bg)
247 |                 end
248 | 
249 | fun trace_ray objs width height cam j i : colour =
250 |     let val u = real i / real width
251 |         val v = real j / real height
252 |         val ray = get_ray cam u v
253 |     in ray_colour objs ray 0 end
254 | 
255 | type pixel = int * int * int
256 | 
257 | fun colour_to_pixel {x=r,y=g,z=b} =
258 |     let val ir = trunc (255.99 * r)
259 |         val ig = trunc (255.99 * g)
260 |         val ib = trunc (255.99 * b)
261 |     in (ir, ig, ib) end
262 | 
263 | type image = { pixels: pixel Array.array
264 |              , height: int
265 |              , width: int}
266 | 
267 | fun image2ppm out ({pixels, height, width}: image) =
268 |     let fun onPixel (r,g,b) =
269 |             TextIO.output(out,
270 |                           Int.toString r ^ " " ^
271 |                           Int.toString g ^ " " ^
272 |                           Int.toString b ^ "\n")
273 |     in TextIO.output(out,
274 |                      "P3\n" ^
275 |                      Int.toString width ^ " " ^ Int.toString height ^ "\n" ^
276 |                      "255\n")
277 |        before Array.app onPixel pixels
278 |     end
279 | 
280 | fun render objs width height cam : image =
281 |     let fun pixel l =
282 |             let val i = l mod width
283 |                 val j = height - l div width
284 |             in colour_to_pixel (trace_ray objs width height cam j i)
285 |             end
286 |         val pixels = Array.tabulate (height*width, pixel)
287 |     in {width = width,
288 |         height = height,
289 |         pixels = pixels
290 |        }
291 |     end
292 | 
293 | type scene = { camLookFrom: pos
294 |              , camLookAt: pos
295 |              , camFov: real
296 |              , spheres: sphere list
297 |              }
298 | 
299 | fun from_scene width height (scene: scene) : objs * camera =
300 |   (mk_bvh sphere_aabb (#spheres scene),
301 |    camera (#camLookFrom scene) (#camLookAt scene) {x=0.0, y=1.0, z=0.0}
302 |    (#camFov scene) (real width/real height))
303 | 
304 | fun tabulate_2d m n f =
305 |     List.concat (List.tabulate (m, fn j => List.tabulate (n, fn i => f (j, i))))
306 | 
307 | val rgbbox : scene =
308 |     let val n = 10
309 |         val k = 60.0
310 | 
311 |         val leftwall =
312 |             tabulate_2d n n (fn (y, z) =>
313 |                                 { pos={x=(~k/2.0),
314 |                                        y=(~k/2.0 + (k/real n) * real y),
315 |                                        z=(~k/2.0 + (k/real n) * real z)}
316 |                                 , colour={x=1.0, y=0.0, z=0.0}
317 |                                 , radius = (k/(real n*2.0))
318 |                                 })
319 | 
320 |         val midwall =
321 |             tabulate_2d n n (fn (x,y) =>
322 |                                 { pos={x=(~k/2.0 + (k/real n) * real x),
323 |                                        y=(~k/2.0 + (k/real n) * real y),
324 |                                        z=(~k/2.0)}
325 |                                 , colour={x=1.0, y=1.0, z=0.0}
326 |                                 , radius = (k/(real n*2.0))})
327 | 
328 |         val rightwall =
329 |             tabulate_2d n n (fn (y,z) =>
330 |                                 { pos={x=(k/2.0),
331 |                                        y=(~k/2.0 + (k/real n) * real y),
332 |                                        z=(~k/2.0 + (k/real n) * real z)}
333 |                                 , colour={x=0.0, y=0.0, z=1.0}
334 |                                 , radius = (k/(real n*2.0))
335 |                                 })
336 | 
337 | 
338 |         val bottom =
339 |             tabulate_2d n n (fn (x,z) =>
340 |                                 { pos={x=(~k/2.0 + (k/real n) * real x),
341 |                                        y=(~k/2.0),
342 |                                        z=(~k/2.0 + (k/real n) * real z)}
343 |                                 , colour={x=1.0, y=1.0, z=1.0}
344 |                                 , radius = (k/(real n*2.0))
345 |                                 })
346 | 
347 | 
348 |     in { spheres = leftwall @ midwall @ rightwall @ bottom
349 |        , camLookFrom = {x=0.0, y=30.0, z=30.0}
350 |        , camLookAt = {x=0.0, y= ~1.0, z= ~1.0}
351 |        , camFov = 75.0
352 |        }
353 |     end
354 | 
355 | val irreg : scene =
356 |     let val n = 100
357 |         val k = 600.0
358 |         val bottom =
359 |             tabulate_2d n n (fn (x,z) =>
360 |                                 { pos={x=(~k/2.0 + (k/real n) * real x),
361 |                                        y=0.0,
362 |                                        z=(~k/2.0 + (k/real n) * real z)}
363 |                                 , colour = white
364 |                                 , radius = k/(real n * 2.0)
365 |                                 })
366 |     in { spheres = bottom
367 |        , camLookFrom = {x=0.0, y=12.0, z=30.0}
368 |        , camLookAt = {x=0.0, y=10.0, z= ~1.0}
369 |        , camFov = 75.0 }
370 |     end
371 | 
372 | fun getopt needle argv f def =
373 |     case argv of
374 |         opt::x::xs =>
375 |           if opt = needle
376 |           then f x else getopt needle (x::xs) f def
377 |         | _ => def
378 | 
379 | fun int s = valOf (Int.fromString s)
380 | fun id x = x
381 | 
382 | fun main () =
383 |     let val height = getopt "-m" (CommandLine.arguments()) int 200
384 |         val width = getopt "-n" (CommandLine.arguments()) int 200
385 |         val imgfile = getopt "-f" (CommandLine.arguments()) SOME NONE
386 |         val scene_name = getopt "-s" (CommandLine.arguments()) id "rgbbox"
387 | 
388 |         val scene = case scene_name of
389 |                         "rgbbox" => rgbbox
390 |                       | "irreg" => irreg
391 |                       | s => raise Fail ("No such scene: " ^ s)
392 | 
393 |         val _ = print ("Using scene '" ^ scene_name ^ "' (-s to switch).\n")
394 | 
395 |         val t0 = Time.now ()
396 |         val (objs, cam) = from_scene width height scene
397 |         val t1 = Time.now ()
398 |         val _ = print ("Scene BVH construction in " ^ Time.fmt 4 (Time.- (t1, t0)) ^ "s.\n")
399 | 
400 |         val t0 = Time.now ()
401 |         val result = render objs width height cam
402 |         val t1 = Time.now ()
403 | 
404 |         val _ = print ("Rendering in " ^ Time.fmt 4 (Time.- (t1, t0)) ^ "s.\n")
405 | 
406 |         val writeImage = image2ppm
407 | 
408 |         val _ = case imgfile of
409 |                     SOME imgfile' =>
410 |                     let val out = TextIO.openOut imgfile'
411 |                     in print ("Writing image to " ^ imgfile' ^ ".\n")
412 |                        before writeImage out (render objs width height cam)
413 |                        before TextIO.closeOut out
414 |                     end
415 |                   | NONE =>
416 |                     print ("-f not passed, so not writing image to file.\n")
417 |     in () end
418 | 


--------------------------------------------------------------------------------
/sml/shell.nix:
--------------------------------------------------------------------------------
1 | with import <nixpkgs> {};
2 | stdenv.mkDerivation {
3 |     name = "ray";
4 |     buildInputs = [ polyml gnumake ];
5 | }
6 | 


--------------------------------------------------------------------------------