├── .gitignore
├── CMakeLists.txt
├── LICENSE.txt
├── README.md
├── benchmarks
    ├── bench.sh
    ├── benchmark.py
    ├── benchmark_aila.py
    ├── fix_results_arm.py
    ├── gen_formulas.py
    ├── gen_table.py
    ├── profile.sh
    ├── results.txt
    ├── results_aila.txt
    ├── results_arm_par.txt
    ├── results_par.txt
    └── scenes
    │   └── generate.sh
├── cmake
    ├── modules
    │   ├── FindLZ4.cmake
    │   └── FindSDL2.cmake
    └── test
    │   ├── run_rodent.cmake
    │   └── run_traversal.cmake
├── refs
    ├── CMakeLists.txt
    ├── cmake
    │   └── modules
    │   │   ├── FindEmbree.cmake
    │   │   ├── FindISPC.cmake
    │   │   ├── FindOptiX.cmake
    │   │   ├── FindSDL2.cmake
    │   │   └── FindTBB.cmake
    └── src
    │   ├── CMakeLists.txt
    │   ├── bench_shading.cpp
    │   ├── bench_shading.h
    │   ├── bench_shading.ispc
    │   ├── color.h
    │   ├── common.h
    │   ├── common.isph
    │   ├── embree_path_tracer.cpp
    │   ├── embree_path_tracer.h
    │   ├── embree_path_tracer.ispc
    │   ├── file_path.h
    │   ├── float2.h
    │   ├── float3.h
    │   ├── float4.h
    │   ├── image.cpp
    │   ├── image.h
    │   ├── math.isph
    │   ├── obj.cpp
    │   ├── obj.h
    │   ├── optix_path_tracer.cpp
    │   ├── optix_path_tracer.cu
    │   └── optix_path_tracer.h
├── src
    ├── CMakeLists.txt
    ├── core
    │   ├── color.impala
    │   ├── common.impala
    │   ├── cpu_common.impala
    │   ├── matrix.impala
    │   ├── random.impala
    │   ├── sort.impala
    │   └── vector.impala
    ├── driver
    │   ├── bbox.h
    │   ├── buffer.h
    │   ├── bvh.h
    │   ├── color.h
    │   ├── common.h
    │   ├── converter.cpp
    │   ├── driver.cpp
    │   ├── embree_bvh.h
    │   ├── file_path.h
    │   ├── float2.h
    │   ├── float3.h
    │   ├── float4.h
    │   ├── image.cpp
    │   ├── image.h
    │   ├── interface.cpp
    │   ├── obj.cpp
    │   ├── obj.h
    │   └── tri.h
    ├── dummy_main.impala
    ├── render
    │   ├── camera.impala
    │   ├── driver.impala
    │   ├── geometry.impala
    │   ├── image.impala
    │   ├── light.impala
    │   ├── mapping_cpu.impala
    │   ├── mapping_gpu.impala
    │   ├── material.impala
    │   ├── renderer.impala
    │   └── scene.impala
    └── traversal
    │   ├── intersection.impala
    │   ├── mapping_cpu.impala
    │   ├── mapping_gpu.impala
    │   └── stack.impala
├── testing
    ├── cornell_box.mtl
    ├── cornell_box.obj
    ├── ref-cornell.png
    ├── ref-primary.png
    ├── ref-random.png
    ├── sponza-primary.rays
    ├── sponza-random.rays
    └── sponza.bvh
└── tools
    ├── CMakeLists.txt
    ├── bench_aila
        ├── CMakeLists.txt
        ├── CudaTracerKernels.hpp
        ├── bench_aila.cpp
        └── kepler_dynamic_fetch.cu
    ├── bench_embree
        ├── CMakeLists.txt
        └── bench_embree.cpp
    ├── bench_interface
        ├── CMakeLists.txt
        ├── bench_interface.cpp
        └── bench_interface.impala
    ├── bench_shading
        ├── CMakeLists.txt
        ├── bench_shading.cpp
        └── bench_shading.impala
    ├── bench_traversal
        ├── CMakeLists.txt
        ├── bench_traversal.cpp
        └── bench_traversal.impala
    ├── bvh_extractor
        ├── CMakeLists.txt
        ├── bvh_extractor.cpp
        ├── extract_bvh2.cpp
        └── extract_bvh4_8.cpp
    ├── common
        ├── load_bvh.h
        └── load_rays.h
    ├── fbuf2png
        ├── CMakeLists.txt
        └── fbuf2png.cpp
    └── ray_gen
        ├── CMakeLists.txt
        └── ray_gen.cpp


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Auto generated interfaces
 2 | tools/common/traversal.h
 3 | tools/common/shading.h
 4 | src/driver/interface.h
 5 | 
 6 | # Compiled Object files
 7 | *.slo
 8 | *.lo
 9 | *.o
10 | 
11 | # Configuration file
12 | *.conf
13 | 
14 | # Float buffers
15 | *.fbuf
16 | 
17 | # Image files
18 | *.png
19 | 
20 | # Precompiled Headers
21 | *.gch
22 | *.pch
23 | 
24 | # Compiled Dynamic libraries
25 | *.so
26 | *.dylib
27 | *.dll
28 | 
29 | # Fortran module files
30 | *.mod
31 | 
32 | # Compiled Static libraries
33 | *.lai
34 | *.la
35 | *.a
36 | *.lib
37 | 
38 | # Executables
39 | *.exe
40 | *.out
41 | *.app
42 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | project(rodent)
  2 | 
  3 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
  4 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
  5 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
  6 | 
  7 | cmake_minimum_required(VERSION 3.1)
  8 | 
  9 | find_package(AnyDSL_runtime REQUIRED)
 10 | include_directories(${AnyDSL_runtime_INCLUDE_DIRS})
 11 | 
 12 | set(CLANG_FLAGS -O3 -march=native -ffast-math CACHE STRING "Clang compilation options")
 13 | 
 14 | set(CMAKE_CXX_STANDARD 14)
 15 | 
 16 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules)
 17 | 
 18 | set(COLOR_TTY_AVAILABLE TRUE)
 19 | if (WIN32)
 20 |     # By default, Windows console does not support ANSI escape codes
 21 |     set(COLOR_TTY_AVAILABLE FALSE)
 22 | endif ()
 23 | set(COLORIZE ${COLOR_TTY_AVAILABLE} CACHE BOOL "Set to TRUE to enable colorized output. Requires an ANSI compliant terminal.")
 24 | 
 25 | # Try to find Embree
 26 | find_path(EMBREE_ROOT_DIR include/embree3/rtcore.h DOC "Embree source code directory")
 27 | find_path(EMBREE_LIBRARY_DIR
 28 |     NAMES
 29 |     embree_sse42.dll
 30 |     embree_avx.dll
 31 |     embree_avx2.dll
 32 |     libembree_sse42.so
 33 |     libembree_avx.so
 34 |     libembree_avx2.so
 35 |     libembree_sse42.a
 36 |     libembree_avx.a
 37 |     libembree_avx2.a
 38 |     HINTS ${EMBREE_ROOT_DIR}/build
 39 |     DOC "Embree library directory")
 40 | find_path(EMBREE_CMAKE_DIR
 41 |     NAMES
 42 |     embree-config.cmake
 43 |     HINTS ${EMBREE_LIBRARY_DIR}
 44 |     DOC "Embree CMake directory")
 45 | 
 46 | if (NOT ${EMBREE_ROOT_DIR}    STREQUAL "EMBREE_ROOT_DIR-NOTFOUND"    AND
 47 |     NOT ${EMBREE_LIBRARY_DIR} STREQUAL "EMBREE_LIBRARY_DIR-NOTFOUND" AND
 48 |     NOT ${EMBREE_CMAKE_DIR}   STREQUAL "EMBREE_CMAKE_DIR-NOTFOUND")
 49 |     set(EMBREE_VERSION 3)
 50 |     find_library(EMBREE3_SHARED_LIBRARY NAMES embree3.dll libembree3.so HINTS ${EMBREE_LIBRARY_DIR} DOC "Embree library")
 51 |     if (EMBREE3_SHARED_LIBRARY)
 52 |         set(EMBREE_SHARED_LIBRARY ${EMBREE3_SHARED_LIBRARY})
 53 |     else ()
 54 |         set(EMBREE_VERSION 2)
 55 |         find_library(EMBREE2_SHARED_LIBRARY NAMES embree.dll libembree.so HINTS ${EMBREE_LIBRARY_DIR} DOC "Embree library")
 56 |         set(EMBREE_SHARED_LIBRARY ${EMBREE2_SHARED_LIBRARY})
 57 |     endif()
 58 |     add_definitions(-DEMBREE_VERSION=${EMBREE_VERSION})
 59 | 
 60 |     find_library(EMBREE_SSE42_LIBRARY   NAMES embree_sse42.lib libembree_sse42.a HINTS ${EMBREE_LIBRARY_DIR} DOC "Embree SSE42 library")
 61 |     find_library(EMBREE_AVX_LIBRARY     NAMES embree_avx.lib   libembree_avx.a   HINTS ${EMBREE_LIBRARY_DIR} DOC "Embree AVX library")
 62 |     find_library(EMBREE_AVX2_LIBRARY    NAMES embree_avx2.lib  libembree_avx2.a  HINTS ${EMBREE_LIBRARY_DIR} DOC "Embree AVX2 library")
 63 |     find_library(EMBREE_SIMD_LIBRARY    NAMES simd.lib         libsimd.a         HINTS ${EMBREE_LIBRARY_DIR} DOC "Embree SIMD library")
 64 |     find_library(EMBREE_TASKING_LIBRARY NAMES tasking.lib      libtasking.a      HINTS ${EMBREE_LIBRARY_DIR} DOC "Embree tasking library")
 65 |     find_library(EMBREE_SYS_LIBRARY     NAMES sys.lib          libsys.a          HINTS ${EMBREE_LIBRARY_DIR} DOC "Embree system library")
 66 | 
 67 |     find_library(PTHREAD_LIBRARY        NAMES pthread.dll      libpthread.so     HINTS /usr/lib /usr/lib64)
 68 |     find_library(TBB_LIBRARY            NAMES tbb.dll          libtbb.so         HINTS /usr/lib /usr/lib64)
 69 |     find_library(TBB_MALLOC_LIBRARY     NAMES tbbmalloc.dll    libtbbmalloc.so   HINTS /usr/lib /usr/lib64)
 70 | 
 71 |     set(EMBREE_DEPENDENCIES
 72 |         ${EMBREE_SHARED_LIBRARY}
 73 |         ${EMBREE_SSE42_LIBRARY}
 74 |         ${EMBREE_AVX_LIBRARY}
 75 |         ${EMBREE_AVX2_LIBRARY}
 76 |         ${EMBREE_TASKING_LIBRARY}
 77 |         ${EMBREE_SYS_LIBRARY}
 78 |         ${EMBREE_SIMD_LIBRARY})
 79 | 
 80 |     if (NOT PTHREAD_LIBRARY STREQUAL "PTHREAD_LIBRARY-NOTFOUND")
 81 |         # pthreads is not required on Windows
 82 |         set(EMBREE_DEPENDENCIES ${EMBREE_DEPENDENCIES} ${PTHREAD_LIBRARY})
 83 |     endif()
 84 | 
 85 |     function(get_embree_tasking_system defs libs)
 86 |         include(${EMBREE_CMAKE_DIR}/embree-config.cmake)
 87 |         set(${defs} "" PARENT_SCOPE)
 88 |         set(${libs} "" PARENT_SCOPE)
 89 |         if (EMBREE_TASKING_SYSTEM STREQUAL "TBB")
 90 |             set(${defs} -DTASKING_TBB PARENT_SCOPE)
 91 |             set(${libs} ${TBB_LIBRARY} ${TBB_MALLOC_LIBRARY} PARENT_SCOPE)
 92 |         elseif (EMBREE_TASKING_SYSTEM STREQUAL "PPL")
 93 |             set(${defs} -DTASKING_PPL PARENT_SCOPE)
 94 |         else ()
 95 |             set(${defs} -DTASKING_INTERNAL PARENT_SCOPE)
 96 |         endif()
 97 |     endfunction()
 98 | 
 99 |     get_embree_tasking_system(EMBREE_DEFINITIONS EMBREE_TASKING_DEPENDENCIES)
100 |     set(EMBREE_DEPENDENCIES ${EMBREE_DEPENDENCIES} ${EMBREE_TASKING_DEPENDENCIES})
101 | 
102 |     message(STATUS "Embree found")
103 |     set(EMBREE_FOUND ON)
104 | endif()
105 | 
106 | # For tests
107 | include(CTest)
108 | find_package(ImageMagick COMPONENTS compare QUIET)
109 | 
110 | add_subdirectory(src)
111 | add_subdirectory(tools)
112 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright 2019 Arsène Pérard-Gayot
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Rodent
 2 | 
 3 | Rodent is a BVH traversal library and renderer implemented using the AnyDSL compiler framework (https://anydsl.github.io/).
 4 | 
 5 | # Building
 6 | 
 7 | The dependencies are: CMake, AnyDSL, libpng, SDL2, and optionally the Embree sources for the benchmarking tools.
 8 | Once the dependencies are installed, use the following commands to build the project:
 9 | 
10 |     mkdir build
11 |     cd build
12 |     # Set the OBJ file to use with the SCENE_FILE variable
13 |     # By default, SCENE_FILE=../testing/cornell_box.obj
14 |     cmake .. -DSCENE_FILE=myfile.obj
15 |     # Optional: Create benchmarking tools for Embree and BVH extractor tools
16 |     # cmake .. -DEMBREE_ROOT_DIR=<path to Embree sources>
17 |     make
18 | 
19 | # Testing
20 | 
21 | This section assumes that the current directory is the build directory. To run rodent, just type:
22 | 
23 |     bin/rodent
24 | 
25 | You may want to change the initial camera parameters using the command line options `--eye`, `--dir` and `--up`. Run `bin/rodent --help` to get a full list of options.
26 | 
27 | When ImageMagick is found by Cmake, use the following commands to test the traversal code with the provided test scene:
28 | 
29 |     make test
30 | 
31 | This will only test the primary ray distribution with the packet, single, and hybrid variants.
32 | To test all possible combinations, or if you do not have ImageMagick installed, use the benchmarking tool directly:
33 | 
34 |     bin/bench_traversal -bvh ../testing/sponza.bvh -ray ../testing/sponza-primary.rays --bench 50 --warmup 10 --tmax 5000 -o output-hybrid-primary.fbuf
35 |     bin/bench_traversal -bvh ../testing/sponza.bvh -ray ../testing/sponza-primary.rays --bench 50 --warmup 10 --tmax 5000 -s -o output-single-primary.fbuf
36 |     bin/bench_traversal -bvh ../testing/sponza.bvh -ray ../testing/sponza-random.rays --bench 50 --warmup 10 --tmax 1 -o output-hybrid-random.fbuf
37 |     bin/bench_traversal -bvh ../testing/sponza.bvh -ray ../testing/sponza-random.rays --bench 50 --warmup 10 --tmax 1 -s -o output-single-random.fbuf
38 |     bin/fbuf2png -n output-hybrid-primary.fbuf output-hybrid-primary.png
39 |     bin/fbuf2png -n output-single-primary.fbuf output-single-primary.png
40 |     bin/fbuf2png -n output-hybrid-random.fbuf output-hybrid-random.png
41 |     bin/fbuf2png -n output-single-random.fbuf output-single-random.png
42 | 
43 | This will run the traversal on the test set, and generate images as a result. For the primary ray distribution, the _hybrid_ and _single_ variants should generate the same images. The reference images for primary and random rays are in the `testing` directory.
44 | 
45 | Running `bin/bench_traversal --help` will provide a list of additional options.
46 | 


--------------------------------------------------------------------------------
/benchmarks/bench.sh:
--------------------------------------------------------------------------------
 1 | # Requires the following variables:
 2 | # - ANYDSL_DIR: Path to the AnyDSL installation directory
 3 | # - EMBREE_ROOT_DIR: Path to the root of the Embree sources
 4 | # - SCENES_DIR: Path to the scenes directory
 5 | 
 6 | mkdir -p renderers
 7 | cd renderers
 8 | 
 9 | # Number of benchmarking iterations
10 | ITER=20
11 | 
12 | # Set to OFF to disable fusion for megakernel mappings (will be slower)
13 | FUSION=ON
14 | 
15 | # Paths to Embree and the AnyDSL runtime
16 | ANYDSL_RUNTIME_DIR=$ANYDSL_DIR/runtime/build/share/anydsl/cmake
17 | 
18 | # Paths to scene files
19 | LIVING_ROOM_SCENE=$SCENES_DIR/living_room/living_room.obj
20 | BATHROOM_SCENE=$SCENES_DIR/salle_de_bain/salle_de_bain.obj
21 | BEDROOM_SCENE=$SCENES_DIR/bedroom/bedroom.obj
22 | DINING_ROOM_SCENE=$SCENES_DIR/dining_room/dining_room.obj
23 | KITCHEN_SCENE=$SCENES_DIR/kitchen/kitchen.obj
24 | STAIRCASE_SCENE=$SCENES_DIR/wooden_staircase/wooden_staircase.obj
25 | 
26 | # Enable more NVPTX opts (currently disabled because these options are detrimental to perf.)
27 | #if [ "$2" == "nvvm-megakernel" -o "$2" == "nvvm-streaming" -o "$2" == "nvvm" ]; then
28 | #    export ANYDSL_LLVM_ARGS="-nvptx-f32ftz -nvptx-prec-divf32=0 -nvptx-prec-sqrtf32=0 -nvptx-sched4reg"
29 | #fi
30 | 
31 | # The compiler may need a large stack space
32 | ulimit -s 65536
33 | 
34 | echo "Benchmarking device $1 on platform $2"
35 | 
36 | BENCH_COMPILATION=false
37 | echo "Building..."
38 | if $BENCH_COMPILATION ; then
39 |     mkdir -p living_room && cd living_room && cmake ../../.. -DEMBREE_ROOT_DIR=${EMBREE_ROOT_DIR} -DAnyDSL_runtime_DIR=${ANYDSL_RUNTIME_DIR} -DCMAKE_BUILD_TYPE=Release -DMAX_PATH_LEN=20 -DSPP=4 -DTARGET_DEVICE=$1 -DTARGET_PLATFORM=$2 -DSCENE_FILE=${LIVING_ROOM_SCENE} -DDISABLE_GUI=ON -DMEGAKERNEL_FUSION=$FUSION && make -j convert driver && time make -j rodent && cd .. &&
40 |     mkdir -p bathroom && cd bathroom && cmake ../../.. -DEMBREE_ROOT_DIR=${EMBREE_ROOT_DIR} -DAnyDSL_runtime_DIR=${ANYDSL_RUNTIME_DIR} -DCMAKE_BUILD_TYPE=Release -DMAX_PATH_LEN=20 -DSPP=4 -DTARGET_DEVICE=$1 -DTARGET_PLATFORM=$2 -DSCENE_FILE=${BATHROOM_SCENE} -DDISABLE_GUI=ON -DMEGAKERNEL_FUSION=$FUSION && make -j convert driver && time make -j rodent && cd .. &&
41 |     mkdir -p bedroom && cd bedroom && cmake ../../.. -DEMBREE_ROOT_DIR=${EMBREE_ROOT_DIR} -DAnyDSL_runtime_DIR=${ANYDSL_RUNTIME_DIR} -DCMAKE_BUILD_TYPE=Release -DMAX_PATH_LEN=20 -DSPP=4 -DTARGET_DEVICE=$1 -DTARGET_PLATFORM=$2 -DSCENE_FILE=${BEDROOM_SCENE} -DDISABLE_GUI=ON -DMEGAKERNEL_FUSION=$FUSION && make -j convert driver && time make -j rodent && cd .. &&
42 |     mkdir -p dining_room && cd dining_room && cmake ../../.. -DEMBREE_ROOT_DIR=${EMBREE_ROOT_DIR} -DAnyDSL_runtime_DIR=${ANYDSL_RUNTIME_DIR} -DCMAKE_BUILD_TYPE=Release -DMAX_PATH_LEN=20 -DSPP=4 -DTARGET_DEVICE=$1 -DTARGET_PLATFORM=$2 -DSCENE_FILE=${DINING_ROOM_SCENE} -DDISABLE_GUI=ON -DMEGAKERNEL_FUSION=$FUSION && make -j convert driver && time make -j rodent && cd .. &&
43 |     mkdir -p kitchen && cd kitchen && cmake ../../.. -DEMBREE_ROOT_DIR=${EMBREE_ROOT_DIR} -DAnyDSL_runtime_DIR=${ANYDSL_RUNTIME_DIR} -DCMAKE_BUILD_TYPE=Release -DMAX_PATH_LEN=20 -DSPP=4 -DTARGET_DEVICE=$1 -DTARGET_PLATFORM=$2 -DSCENE_FILE=${KITCHEN_SCENE} -DDISABLE_GUI=ON -DMEGAKERNEL_FUSION=$FUSION && make -j convert driver && time make -j rodent && cd .. &&
44 |     mkdir -p staircase && cd staircase && cmake ../../.. -DEMBREE_ROOT_DIR=${EMBREE_ROOT_DIR} -DAnyDSL_runtime_DIR=${ANYDSL_RUNTIME_DIR} -DCMAKE_BUILD_TYPE=Release -DMAX_PATH_LEN=20 -DSPP=4 -DTARGET_DEVICE=$1 -DTARGET_PLATFORM=$2 -DSCENE_FILE=${STAIRCASE_SCENE} -DDISABLE_GUI=ON -DMEGAKERNEL_FUSION=$FUSION && make -j convert driver && time make -j rodent && cd .. || { echo "Compilation failed" ; exit 1 ; }
45 |     exit 0
46 | else
47 |     mkdir -p living_room && cd living_room && cmake ../../.. -DEMBREE_ROOT_DIR=${EMBREE_ROOT_DIR} -DAnyDSL_runtime_DIR=${ANYDSL_RUNTIME_DIR} -DCMAKE_BUILD_TYPE=Release -DMAX_PATH_LEN=20 -DSPP=4 -DTARGET_DEVICE=$1 -DTARGET_PLATFORM=$2 -DSCENE_FILE=${LIVING_ROOM_SCENE} -DDISABLE_GUI=ON -DMEGAKERNEL_FUSION=$FUSION && cmake --build . --target rodent &
48 |     mkdir -p bathroom && cd bathroom && cmake ../../.. -DEMBREE_ROOT_DIR=${EMBREE_ROOT_DIR} -DAnyDSL_runtime_DIR=${ANYDSL_RUNTIME_DIR} -DCMAKE_BUILD_TYPE=Release -DMAX_PATH_LEN=20 -DSPP=4 -DTARGET_DEVICE=$1 -DTARGET_PLATFORM=$2 -DSCENE_FILE=${BATHROOM_SCENE} -DDISABLE_GUI=ON -DMEGAKERNEL_FUSION=$FUSION && cmake --build . --target rodent &
49 |     mkdir -p bedroom && cd bedroom && cmake ../../.. -DEMBREE_ROOT_DIR=${EMBREE_ROOT_DIR} -DAnyDSL_runtime_DIR=${ANYDSL_RUNTIME_DIR} -DCMAKE_BUILD_TYPE=Release -DMAX_PATH_LEN=20 -DSPP=4 -DTARGET_DEVICE=$1 -DTARGET_PLATFORM=$2 -DSCENE_FILE=${BEDROOM_SCENE} -DDISABLE_GUI=ON -DMEGAKERNEL_FUSION=$FUSION && cmake --build . --target rodent &
50 |     mkdir -p dining_room && cd dining_room && cmake ../../.. -DEMBREE_ROOT_DIR=${EMBREE_ROOT_DIR} -DAnyDSL_runtime_DIR=${ANYDSL_RUNTIME_DIR} -DCMAKE_BUILD_TYPE=Release -DMAX_PATH_LEN=20 -DSPP=4 -DTARGET_DEVICE=$1 -DTARGET_PLATFORM=$2 -DSCENE_FILE=${DINING_ROOM_SCENE} -DDISABLE_GUI=ON -DMEGAKERNEL_FUSION=$FUSION && cmake --build . --target rodent &
51 |     mkdir -p kitchen && cd kitchen && cmake ../../.. -DEMBREE_ROOT_DIR=${EMBREE_ROOT_DIR} -DAnyDSL_runtime_DIR=${ANYDSL_RUNTIME_DIR} -DCMAKE_BUILD_TYPE=Release -DMAX_PATH_LEN=20 -DSPP=4 -DTARGET_DEVICE=$1 -DTARGET_PLATFORM=$2 -DSCENE_FILE=${KITCHEN_SCENE} -DDISABLE_GUI=ON -DMEGAKERNEL_FUSION=$FUSION && cmake --build . --target rodent &
52 |     mkdir -p staircase && cd staircase && cmake ../../.. -DEMBREE_ROOT_DIR=${EMBREE_ROOT_DIR} -DAnyDSL_runtime_DIR=${ANYDSL_RUNTIME_DIR} -DCMAKE_BUILD_TYPE=Release -DMAX_PATH_LEN=20 -DSPP=4 -DTARGET_DEVICE=$1 -DTARGET_PLATFORM=$2 -DSCENE_FILE=${STAIRCASE_SCENE} -DDISABLE_GUI=ON -DMEGAKERNEL_FUSION=$FUSION && cmake --build . --target rodent &
53 |     # Wait for all tasks to finish before benchmarking
54 |     wait || { echo "Compilation failed" ; exit 1 ; }
55 | fi
56 | 
57 | echo "Running..."
58 | 
59 | cd living_room
60 | bin/rodent --bench $ITER --eye -1.8 1 -5 --dir -0.1 0 1 --up 0 1 0 --fov 60 --width 1920 --height 1088 -o render.png 2> /dev/null | sed -n 's/#/Living Room/p'
61 | cd ..
62 | 
63 | cd bathroom
64 | bin/rodent --bench $ITER --eye -2.26 15.62 35.23 --dir -22.18 -5.32 -97.36 --up 0 1 0 --fov 60 --width 1920 --height 1088 -o render.png 2> /dev/null | sed -n 's/#/Bathroom/p'
65 | cd ..
66 | 
67 | cd bedroom
68 | bin/rodent --bench $ITER --eye 3.5 1 3.5 --dir -1 0 -1 --up 0 1 0 --fov 60 --width 1920 --height 1088 -o render.png 2> /dev/null | sed -n 's/#/Bedroom/p'
69 | cd ..
70 | 
71 | cd dining_room
72 | bin/rodent --bench $ITER --eye -4 1.3 0.0 --dir 1 -0.1 0 --up 0 1 0 --fov 48 --width 1920 --height 1088 -o render.png 2> /dev/null | sed -n 's/#/Dining Room/p'
73 | cd ..
74 | 
75 | cd kitchen
76 | bin/rodent --bench $ITER --eye 0.5 1.6 3 --dir -0.4 -0.05 -1 --up 0 1 0 --fov 60 --width 1920 --height 1088 -o render.png 2> /dev/null | sed -n 's/#/Kitchen/p'
77 | cd ..
78 | 
79 | cd staircase
80 | bin/rodent --bench $ITER --eye 0 1.6 4.5 --dir 0 0 -1 --up 0 1 0 --fov 38 --height 1280 --width 720 -o render.png 2> /dev/null | sed -n 's/#/Staircase/p'
81 | cd ..
82 | 


--------------------------------------------------------------------------------
/benchmarks/benchmark.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python3
 2 | import subprocess
 3 | 
 4 | iters = "20"
 5 | warmups = "5"
 6 | bench_rodent = "../build_embree2/bin/bench_traversal"
 7 | bench_embree = "../build_embree2/bin/bench_embree"
 8 | variants = [
 9 |     "--ray-width 8 --bvh-width 4",
10 |     "--ray-width 8 --bvh-width 4 -p",
11 |     "--bvh-width 4 -s",
12 |     "--ray-width 8 --bvh-width 8",
13 |     "--ray-width 8 --bvh-width 8 -p",
14 |     "--bvh-width 8 -s"
15 | ]
16 | scenes = [
17 |     "sponza", 
18 |     "crown",
19 |     "san-miguel",
20 |     "powerplant"
21 | ]
22 | offsets = {
23 |     "sponza":     (0.01, 10.0),
24 |     "crown":      (0.01, 10.0),
25 |     "san-miguel": (0.01, 5.0),
26 |     "powerplant": (0.01, 1000.0)
27 | }
28 | 
29 | def bench_mrays(args):
30 |     pipe = subprocess.Popen(args, stdout = subprocess.PIPE)  
31 |     for line in pipe.stdout:
32 |         elems = line.split()
33 |         if elems[1] == b'Mrays/sec':
34 |             return float(elems[0])
35 |     return None
36 | 
37 | def main():
38 |     distribs = ["primary", "ao", "bounces"]
39 |     for scene in scenes:
40 |         for variant in variants:
41 |             for rays in distribs:
42 |                 (tmin, ao_max) = offsets[scene]
43 |                 tmax = 1.0e9
44 |                 args = ["-ray", "scenes/" + scene + "/" + rays + ".rays", "--bench", iters, "--warmup", warmups]
45 |                 if rays == "ao":
46 |                     tmax = ao_max
47 |                     args += ["-any"]
48 |                 args += ["--tmin", str(tmin), "--tmax", str(tmax)]
49 |                 args += variant.split()
50 |                 #print(scene, ": ", " ".join(args))
51 |                 mrays_embree = bench_mrays([bench_embree, "-obj", "scenes/" + scene + "/" + scene + ".obj"] + args) if not "-p" in variant else None
52 |                 mrays_rodent = bench_mrays([bench_rodent, "-bvh", "scenes/" + scene + "/" + scene + ".bvh"] + args)
53 |                 print("{} : {} : {} : {} : {}".format(scene, rays, variant, mrays_embree, mrays_rodent))
54 | 
55 | if __name__ == "__main__":
56 |     main()
57 | 


--------------------------------------------------------------------------------
/benchmarks/benchmark_aila.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python3
 2 | import subprocess
 3 | import os
 4 | 
 5 | iters = "500"
 6 | warmups = "100"
 7 | bin_dir = "/space/perard/sources/rodent/build_embree2/bin"
 8 | bench_dir = "/space/perard/sources/rodent/benchmarks"
 9 | bench_rodent = "./bench_traversal"
10 | bench_aila = "./bench_aila"
11 | scenes = [
12 |     "sponza", 
13 |     "crown",
14 |     "san-miguel",
15 |     "powerplant"
16 | ]
17 | offsets = {
18 |     "sponza":     (0.01, 10.0),
19 |     "crown":      (0.01, 10.0),
20 |     "san-miguel": (0.01, 5.0),
21 |     "powerplant": (0.01, 1000.0)
22 | }
23 | 
24 | def bench_mrays(args):
25 |     pipe = subprocess.Popen(args, stdout = subprocess.PIPE, env=dict(os.environ, ANYDSL_PROFILE='full'), cwd=bin_dir)
26 |     for line in pipe.stdout:
27 |         elems = line.split()
28 |         if elems[1] == b'Mrays/sec':
29 |             return float(elems[0])
30 |     return None
31 | 
32 | def main():
33 |     distribs = ["primary", "ao", "bounces"]
34 |     for scene in scenes:
35 |         for rays in distribs:
36 |             (tmin, ao_max) = offsets[scene]
37 |             tmax = 1.0e9
38 |             if rays == "ao":
39 |                 tmax = ao_max
40 |             args = ["-ray", bench_dir + "/scenes/" + scene + "/" + rays + ".rays", "--tmin", str(tmin), "--tmax", str(tmax), "--bench", iters, "--warmup", warmups]
41 |             #print(scene, ": ", " ".join(args))
42 |             mrays_aila = bench_mrays([bench_aila, "-bvh", bench_dir + "/scenes/" + scene + "/" + scene + ".bvh"] + args)
43 |             mrays_rodent = bench_mrays([bench_rodent, "-gpu", "nvvm", "-bvh", bench_dir + "/scenes/" + scene + "/" + scene + ".bvh"] + args)
44 |             print("{} : {} : {} : {}".format(scene, rays, mrays_aila, mrays_rodent))
45 | 
46 | if __name__ == "__main__":
47 |     main()
48 | 


--------------------------------------------------------------------------------
/benchmarks/fix_results_arm.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | import sys
 3 | 
 4 | def main():
 5 |     rays = ["primary", "ao", "bounces"]
 6 |     scenes = [
 7 |         ("sponza"),
 8 |         ("crown"),
 9 |         ("san-miguel"),
10 |         ("powerplant")
11 |     ]
12 |     ref_variant = "-w 4 -s"
13 |     results = []
14 | 
15 |     for line in sys.stdin.readlines():
16 |         elems = line.split(":")
17 |         scene = elems[0].strip()
18 |         ray = elems[1].strip()
19 |         variant = elems[2].strip()
20 |         ref = elems[-2].strip()
21 |         res = elems[-1].strip()
22 |         results.append((scene, ray, variant, ref, res))
23 | 
24 |     for scene in scenes:
25 |         kept_scene = list(filter(lambda res: res[0] == scene, results))
26 |         for ray in rays:
27 |             res_a, res_b = list(filter(lambda res: res[1] == ray, kept_scene))
28 |             if res_b[2] == ref_variant:
29 |                 res_a, res_b = res_b, res_a
30 |             print("{} : {} : {} : {} : {}".format(scene, ray, "fixed", res_a[-1], res_b[-1]))
31 |             
32 | 
33 | if __name__ == "__main__":
34 |     main()
35 | 


--------------------------------------------------------------------------------
/benchmarks/gen_formulas.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python3
 2 | import sys
 3 | 
 4 | def main():
 5 |     formulas = {}
 6 |     with open(sys.argv[1], "r") as f:
 7 |         for line in f:
 8 |             elems = line.split(':')
 9 |             ray = elems[1].strip()
10 |             ref = float(elems[-2].strip())
11 |             ours = float(elems[-1].strip())
12 |             if not ray in formulas:
13 |                 formulas[ray] = [(ref, ours)]
14 |             else:
15 |                 formulas[ray].append((ref, ours))
16 | 
17 |     for ray, factors in formulas.items():
18 |         print(ray + " = pow(", end="")
19 |         for i, factor in enumerate(factors):
20 |             ref, ours = factor
21 |             print("({}/{})".format(ours, ref), end="")
22 |             if i != len(factors)-1:
23 |                 print(" * ",end="")
24 |         print(", 1.0/{})".format(float(len(factors))))                
25 | 
26 | if __name__ == "__main__":
27 |     main()
28 | 


--------------------------------------------------------------------------------
/benchmarks/gen_table.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | import sys
 3 | 
 4 | def print_results(results, rays):
 5 |     for ray, ray_name in rays:
 6 |         elem = next((res for res in results if res[1] == ray), None)
 7 |         if elem == None:
 8 |             print("& -- & -- ", end="")
 9 |         else:
10 |             ref = float(elem[-2]) if elem[-2] != 'None' else None
11 |             res = float(elem[-1]) if elem[-1] != 'None' else None
12 |             if ref == None or res == None:
13 |                 if res == None:
14 |                     print("& -- ", end="")
15 |                 else:
16 |                     print("& {:0.2f} ".format(res), end="")
17 |                 if ref == None:
18 |                     print("& -- ", end="")
19 |                 else:
20 |                     print("& {:0.2f} ".format(ref), end="")
21 |             else:
22 |                 print("& {:0.2f} ({:+0.0f}\\%) ".format(res, 100.0*(res - ref) / ref), end = "")
23 |                 print("& {:0.2f} ".format(ref), end = "")
24 | 
25 | 
26 | def main():
27 |     rays = [
28 |         ("primary", "Primary"),
29 |         ("ao", "AO"),
30 |         ("bounces", "Diffuse")
31 |     ]
32 |     tables = [
33 |         {
34 |             "title"    : "BVH2",
35 |             "variants" : []
36 |         },
37 |         {
38 |             "title"    : "BVH4",
39 |             "variants" : [
40 |                 ("--bvh-width 4 -s", "Single"),
41 |                 ("--ray-width 8 --bvh-width 4 -p", "Packet"),
42 |                 ("--ray-width 8 --bvh-width 4",    "Hybrid")
43 |             ]
44 |         },
45 |         {
46 |             "title"    : "BVH8",
47 |             "variants" : [
48 |                 ("--bvh-width 8 -s", "Single"),
49 |                 ("--ray-width 8 --bvh-width 8 -p", "Packet"),
50 |                 ("--ray-width 8 --bvh-width 8",    "Hybrid")
51 |             ]
52 |         }
53 |     ]
54 |     scenes = [
55 |         ("sponza", "Sponza"),
56 |         ("crown", "Crown"),
57 |         ("san-miguel", "San-Miguel"),
58 |         ("powerplant", "Powerplant")
59 |     ]
60 |     results = []
61 | 
62 |     for line in sys.stdin.readlines():
63 |         elems = line.split(":")
64 |         scene = elems[0].strip()
65 |         ray = elems[1].strip()
66 |         variant = elems[2].strip()
67 |         ref = elems[-2].strip()
68 |         res = elems[-1].strip()
69 |         results.append((scene, ray, variant, ref, res))
70 | 
71 |     for table in tables:
72 |         title = table["title"]
73 |         variants = table["variants"]
74 |         print("% {}".format(title))
75 |         for scene, scene_name in scenes:
76 |             if len(variants) > 0:
77 |                 print("\midrule")
78 |                 print("\\multirow{{{}}}{{*}}{{{}}} & ".format(len(variants), scene_name), end="")
79 |             else:
80 |                 print("{} ".format(scene_name), end="")
81 |             kept_scene = list(filter(lambda x: x[0] == scene, results))
82 |             if len(variants) > 0:
83 |                 for i, (variant, variant_name) in enumerate(variants):
84 |                     if i > 0:
85 |                         print(" & ", end="")
86 |                     print("{} ".format(variant_name), end="")
87 |                     kept_variant = filter(lambda x: x[2] == variant, kept_scene)
88 |                     print_results(kept_variant, rays)
89 |                     print("\\\\")
90 |             else:
91 |                 print_results(kept_scene, rays)
92 |                 print("\\\\")
93 | 
94 | if __name__ == "__main__":
95 |     main()
96 | 


--------------------------------------------------------------------------------
/benchmarks/profile.sh:
--------------------------------------------------------------------------------
 1 | echo "Profiling..."
 2 | 
 3 | echo "" > profile.txt
 4 | 
 5 | ITER=5
 6 | 
 7 | cd renderers
 8 | 
 9 | cd living_room
10 | taskset 0x1 bin/rodent --bench $ITER --eye -1.8 1 -5 --dir -0.1 0 1 --up 0 1 0 --fov 60 --width 1920 --height 1088 -o render.png 2> /dev/null >> ../../profile.txt
11 | cd ..
12 | 
13 | cd bathroom
14 | taskset 0x1 bin/rodent --bench $ITER --eye -2.26 15.62 35.23 --dir -22.18 -5.32 -97.36 --up 0 1 0 --fov 60 --width 1920 --height 1088 -o render.png 2> /dev/null >> ../../profile.txt
15 | cd ..
16 | 
17 | cd bedroom
18 | taskset 0x1 bin/rodent --bench $ITER --eye 3.5 1 3.5 --dir -1 0 -1 --up 0 1 0 --fov 60 --width 1920 --height 1088 -o render.png 2> /dev/null >> ../../profile.txt
19 | cd ..
20 | 
21 | cd dining_room
22 | taskset 0x1 bin/rodent --bench $ITER --eye -4 1.3 0.0 --dir 1 -0.1 0 --up 0 1 0 --fov 48 --width 1920 --height 1088 -o render.png 2> /dev/null >> ../../profile.txt
23 | cd ..
24 | 
25 | cd kitchen
26 | taskset 0x1 bin/rodent --bench $ITER --eye 0.5 1.6 3 --dir -0.4 -0.05 -1 --up 0 1 0 --fov 60 --width 1920 --height 1088 -o render.png 2> /dev/null >> ../../profile.txt
27 | cd ..
28 | 
29 | cd staircase
30 | taskset 0x1 bin/rodent --bench $ITER --eye 0 1.6 4.5 --dir 0 0 -1 --up 0 1 0 --fov 38 --height 1280 --width 720 -o render.png 2> /dev/null >> ../../profile.txt
31 | cd ..
32 | 


--------------------------------------------------------------------------------
/benchmarks/results.txt:
--------------------------------------------------------------------------------
 1 | sponza : primary : -w 4 : 7.54638 : 7.3053
 2 | sponza : ao : -w 4 : 14.7275 : 16.3316
 3 | sponza : bounces : -w 4 : 1.88431 : 1.73046
 4 | sponza : primary : -w 4 -p : None : 7.86724
 5 | sponza : ao : -w 4 -p : None : 16.3132
 6 | sponza : bounces : -w 4 -p : None : 1.42685
 7 | sponza : primary : -w 4 -s : 3.10173 : 2.86832
 8 | sponza : ao : -w 4 -s : 6.06024 : 5.68684
 9 | sponza : bounces : -w 4 -s : 1.86277 : 1.65231
10 | sponza : primary : -w 8 : 7.60413 : 7.38906
11 | sponza : ao : -w 8 : 14.5298 : 16.4992
12 | sponza : bounces : -w 8 : 2.2073 : 2.05149
13 | sponza : primary : -w 8 -p : None : 8.15224
14 | sponza : ao : -w 8 -p : None : 16.2574
15 | sponza : bounces : -w 8 -p : None : 1.48211
16 | sponza : primary : -w 8 -s : 4.18268 : 3.83035
17 | sponza : ao : -w 8 -s : 8.12357 : 7.13171
18 | sponza : bounces : -w 8 -s : 2.28985 : 2.13325
19 | crown : primary : -w 4 : 19.8355 : 21.7261
20 | crown : ao : -w 4 : 8.53402 : 8.00505
21 | crown : bounces : -w 4 : 3.98229 : 3.5772
22 | crown : primary : -w 4 -p : None : 22.9433
23 | crown : ao : -w 4 -p : None : 7.23937
24 | crown : bounces : -w 4 -p : None : 2.85439
25 | crown : primary : -w 4 -s : 11.5832 : 10.198
26 | crown : ao : -w 4 -s : 6.10901 : 5.25563
27 | crown : bounces : -w 4 -s : 3.72163 : 3.25043
28 | crown : primary : -w 8 : 18.4549 : 20.0882
29 | crown : ao : -w 8 : 8.86579 : 8.55806
30 | crown : bounces : -w 8 : 4.45182 : 4.04803
31 | crown : primary : -w 8 -p : None : 22.3033
32 | crown : ao : -w 8 -p : None : 7.32274
33 | crown : bounces : -w 8 -p : None : 2.98485
34 | crown : primary : -w 8 -s : 13.1341 : 11.7769
35 | crown : ao : -w 8 -s : 7.14388 : 5.9681
36 | crown : bounces : -w 8 -s : 4.40347 : 3.95503
37 | san-miguel : primary : -w 4 : 4.63054 : 4.51849
38 | san-miguel : ao : -w 4 : 2.9492 : 2.52732
39 | san-miguel : bounces : -w 4 : 1.29591 : 1.11484
40 | san-miguel : primary : -w 4 -p : None : 4.93072
41 | san-miguel : ao : -w 4 -p : None : 2.26067
42 | san-miguel : bounces : -w 4 -p : None : 0.891493
43 | san-miguel : primary : -w 4 -s : 2.67431 : 2.09576
44 | san-miguel : ao : -w 4 -s : 2.51641 : 2.11315
45 | san-miguel : bounces : -w 4 -s : 1.35706 : 1.178
46 | san-miguel : primary : -w 8 : 3.94365 : 3.84696
47 | san-miguel : ao : -w 8 : 3.17926 : 2.80414
48 | san-miguel : bounces : -w 8 : 1.45565 : 1.25251
49 | san-miguel : primary : -w 8 -p : None : 4.2969
50 | san-miguel : ao : -w 8 -p : None : 2.32471
51 | san-miguel : bounces : -w 8 -p : None : 0.901289
52 | san-miguel : primary : -w 8 -s : 3.22086 : 2.29832
53 | san-miguel : ao : -w 8 -s : 2.95476 : 2.5676
54 | san-miguel : bounces : -w 8 -s : 1.60873 : 1.41938
55 | powerplant : primary : -w 4 : 10.2047 : 10.3249
56 | powerplant : ao : -w 4 : 19.8713 : 21.8186
57 | powerplant : bounces : -w 4 : 2.4224 : 2.1449
58 | powerplant : primary : -w 4 -p : None : 10.8075
59 | powerplant : ao : -w 4 -p : None : 23.0244
60 | powerplant : bounces : -w 4 -p : None : 1.70842
61 | powerplant : primary : -w 4 -s : 5.93011 : 4.96106
62 | powerplant : ao : -w 4 -s : 9.52665 : 8.2603
63 | powerplant : bounces : -w 4 -s : 2.46661 : 2.20007
64 | powerplant : primary : -w 8 : 9.33797 : 8.99668
65 | powerplant : ao : -w 8 : 18.8956 : 20.9723
66 | powerplant : bounces : -w 8 : 2.75902 : 2.46662
67 | powerplant : primary : -w 8 -p : None : 9.96959
68 | powerplant : ao : -w 8 -p : None : 22.0081
69 | powerplant : bounces : -w 8 -p : None : 1.80079
70 | powerplant : primary : -w 8 -s : 6.27344 : 5.87107
71 | powerplant : ao : -w 8 -s : 12.1059 : 10.6569
72 | powerplant : bounces : -w 8 -s : 2.90945 : 2.60646
73 | 


--------------------------------------------------------------------------------
/benchmarks/results_aila.txt:
--------------------------------------------------------------------------------
 1 | sponza : primary : 363.22 : 373.097
 2 | sponza : ao : 975.008 : 1031.68
 3 | sponza : bounces : 143.587 : 146.283
 4 | crown : primary : 816.357 : 788.193
 5 | crown : ao : 401.609 : 372.405
 6 | crown : bounces : 164.526 : 157.337
 7 | san-miguel : primary : 204.338 : 194.698
 8 | san-miguel : ao : 153.253 : 149.804
 9 | san-miguel : bounces : 59.0768 : 67.614
10 | powerplant : primary : 525.016 : 473.339
11 | powerplant : ao : 1112.64 : 1086.86
12 | powerplant : bounces : 142.308 : 130.621
13 | 


--------------------------------------------------------------------------------
/benchmarks/results_arm_par.txt:
--------------------------------------------------------------------------------
 1 | sponza : primary : -w 4 : None : 2.7481
 2 | sponza : ao : -w 4 : None : 5.35983
 3 | sponza : bounces : -w 4 : None : 0.953911
 4 | sponza : primary : -w 4 -s : None : 1.3769
 5 | sponza : ao : -w 4 -s : None : 2.66208
 6 | sponza : bounces : -w 4 -s : None : 0.99925
 7 | crown : primary : -w 4 : None : 9.81564
 8 | crown : ao : -w 4 : None : 3.64903
 9 | crown : bounces : -w 4 : None : 1.86756
10 | crown : primary : -w 4 -s : None : 5.8034
11 | crown : ao : -w 4 -s : None : 3.00981
12 | crown : bounces : -w 4 -s : None : 1.90784
13 | san-miguel : primary : -w 4 : None : 2.07353
14 | san-miguel : ao : -w 4 : None : 1.49059
15 | san-miguel : bounces : -w 4 : None : 0.720948
16 | san-miguel : primary : -w 4 -s : None : 1.06802
17 | san-miguel : ao : -w 4 -s : None : 1.31238
18 | san-miguel : bounces : -w 4 -s : None : 0.781979
19 | powerplant : primary : -w 4 : None : 4.44325
20 | powerplant : ao : -w 4 : None : 8.19047
21 | powerplant : bounces : -w 4 : None : 1.09488
22 | powerplant : primary : -w 4 -s : None : 2.59246
23 | powerplant : ao : -w 4 -s : None : 4.05906
24 | powerplant : bounces : -w 4 -s : None : 1.22522
25 | 


--------------------------------------------------------------------------------
/benchmarks/results_par.txt:
--------------------------------------------------------------------------------
 1 | sponza : primary : -w 4 : 36.3476 : 34.725
 2 | sponza : ao : -w 4 : 70.6553 : 76.3449
 3 | sponza : bounces : -w 4 : 11.0716 : 9.77876
 4 | sponza : primary : -w 4 -p : None : 36.0903
 5 | sponza : ao : -w 4 -p : None : 75.9812
 6 | sponza : bounces : -w 4 -p : None : 7.74625
 7 | sponza : primary : -w 4 -s : 18.1877 : 14.8939
 8 | sponza : ao : -w 4 -s : 36.4553 : 28.598
 9 | sponza : bounces : -w 4 -s : 11.2891 : 9.36764
10 | sponza : primary : -w 8 : 36.3964 : 34.8446
11 | sponza : ao : -w 8 : 67.8053 : 76.7301
12 | sponza : bounces : -w 8 : 12.7442 : 11.461
13 | sponza : primary : -w 8 -p : None : 38.2885
14 | sponza : ao : -w 8 -p : None : 77.2587
15 | sponza : bounces : -w 8 -p : None : 7.88341
16 | sponza : primary : -w 8 -s : 22.7849 : 18.7401
17 | sponza : ao : -w 8 -s : 44.717 : 35.7588
18 | sponza : bounces : -w 8 -s : 13.3862 : 11.8594
19 | crown : primary : -w 4 : 97.8603 : 102.512
20 | crown : ao : -w 4 : 44.2639 : 40.2826
21 | crown : bounces : -w 4 : 22.2004 : 19.4772
22 | crown : primary : -w 4 -p : None : 104.206
23 | crown : ao : -w 4 -p : None : 34.9943
24 | crown : bounces : -w 4 -p : None : 14.952
25 | crown : primary : -w 4 -s : 63.9428 : 53.0047
26 | crown : ao : -w 4 -s : 33.7507 : 28.0173
27 | crown : bounces : -w 4 -s : 21.4867 : 17.9545
28 | crown : primary : -w 8 : 89.923 : 95.4834
29 | crown : ao : -w 8 : 44.2455 : 42.1222
30 | crown : bounces : -w 8 : 23.1584 : 21.0389
31 | crown : primary : -w 8 -p : None : 103.194
32 | crown : ao : -w 8 -p : None : 35.5137
33 | crown : bounces : -w 8 -p : None : 15.2423
34 | crown : primary : -w 8 -s : 70.0337 : 59.2134
35 | crown : ao : -w 8 -s : 38.9787 : 32.6553
36 | crown : bounces : -w 8 -s : 23.8139 : 20.9049
37 | san-miguel : primary : -w 4 : 23.0406 : 22.0645
38 | san-miguel : ao : -w 4 : 15.9148 : 13.8224
39 | san-miguel : bounces : -w 4 : 7.32533 : 6.45785
40 | san-miguel : primary : -w 4 -p : None : 23.2798
41 | san-miguel : ao : -w 4 -p : None : 11.859
42 | san-miguel : bounces : -w 4 -p : None : 4.93395
43 | san-miguel : primary : -w 4 -s : 15.4667 : 10.8329
44 | san-miguel : ao : -w 4 -s : 14.0318 : 11.4903
45 | san-miguel : bounces : -w 4 -s : 7.6656 : 6.8292
46 | san-miguel : primary : -w 8 : 19.1285 : 18.7394
47 | san-miguel : ao : -w 8 : 16.3181 : 14.7668
48 | san-miguel : bounces : -w 8 : 7.62373 : 6.98282
49 | san-miguel : primary : -w 8 -p : None : 20.2968
50 | san-miguel : ao : -w 8 -p : None : 11.8617
51 | san-miguel : bounces : -w 8 -p : None : 4.85699
52 | san-miguel : primary : -w 8 -s : 17.9463 : 11.3314
53 | san-miguel : ao : -w 8 -s : 16.133 : 13.5902
54 | san-miguel : bounces : -w 8 -s : 8.51997 : 7.75063
55 | powerplant : primary : -w 4 : 50.633 : 49.3437
56 | powerplant : ao : -w 4 : 95.4166 : 102.892
57 | powerplant : bounces : -w 4 : 13.8832 : 11.8606
58 | powerplant : primary : -w 4 -p : None : 50.9431
59 | powerplant : ao : -w 4 -p : None : 106.134
60 | powerplant : bounces : -w 4 -p : None : 8.97721
61 | powerplant : primary : -w 4 -s : 33.1721 : 25.3444
62 | powerplant : ao : -w 4 -s : 52.6649 : 42.33
63 | powerplant : bounces : -w 4 -s : 14.254 : 12.0286
64 | powerplant : primary : -w 8 : 44.8194 : 43.0222
65 | powerplant : ao : -w 8 : 88.0362 : 98.0998
66 | powerplant : bounces : -w 8 : 14.658 : 13.2946
67 | powerplant : primary : -w 8 -p : None : 46.8461
68 | powerplant : ao : -w 8 -p : None : 101.687
69 | powerplant : bounces : -w 8 -p : None : 9.25009
70 | powerplant : primary : -w 8 -s : 34.5967 : 29.0171
71 | powerplant : ao : -w 8 -s : 64.9127 : 52.1655
72 | powerplant : bounces : -w 8 -s : 15.9856 : 13.9619
73 | 


--------------------------------------------------------------------------------
/benchmarks/scenes/generate.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ARTY=/space/perard/sources/arty/build/src/arty
 3 | cd sponza
 4 | export ARTY_SCENE_SIZE=4000
 5 | export ARTY_AO_FACTOR=0.8
 6 | export ARTY_AO_OFFSET=0.1
 7 | ${ARTY} sponza.yml -a 4 -s 75 -o sponza-ao.png
 8 | ${ARTY} sponza.yml -a 1 -s 10 -o sponza-pt.png
 9 | cd ..
10 | cd crown
11 | export ARTY_SCENE_SIZE=200
12 | export ARTY_AO_FACTOR=0.9
13 | export ARTY_AO_OFFSET=0.0
14 | ${ARTY} crown.yml -a 4 -s 75 -o crown-ao.png
15 | ${ARTY} crown.yml -a 1 -s 10 -o crown-pt.png
16 | cd ..
17 | cd san-miguel
18 | export ARTY_SCENE_SIZE=50
19 | export ARTY_AO_FACTOR=0.9
20 | export ARTY_AO_OFFSET=0.0
21 | ${ARTY} san-miguel.yml -a 4 -s 75 -o san-miguel-ao.png
22 | ${ARTY} san-miguel.yml -a 1 -s 10 -o san-miguel-pt.png
23 | cd ..
24 | cd powerplant
25 | export ARTY_SCENE_SIZE=200000
26 | export ARTY_AO_FACTOR=0.9
27 | export ARTY_AO_OFFSET=0.0
28 | ${ARTY} powerplant.yml -a 4 -s 75 -o powerplant-ao.png
29 | ${ARTY} powerplant.yml -a 1 -s 10 -o powerplant-pt.png
30 | cd ..
31 | 


--------------------------------------------------------------------------------
/cmake/modules/FindLZ4.cmake:
--------------------------------------------------------------------------------
1 | find_path(LZ4_INCLUDE_DIR NAMES lz4.h)
2 | find_library(LZ4_LIBRARY NAMES lz4)
3 | 
4 | include(FindPackageHandleStandardArgs)
5 | find_package_handle_standard_args(LZ4 DEFAULT_MSG LZ4_LIBRARY LZ4_INCLUDE_DIR)
6 | 


--------------------------------------------------------------------------------
/cmake/test/run_rodent.cmake:
--------------------------------------------------------------------------------
1 | execute_process(COMMAND ${RODENT} --bench 50 -o ${CMAKE_CURRENT_BINARY_DIR}/${RODENT_OUTPUT}.png ${RODENT_ARGS} RESULT_VARIABLE CMD_RESULT WORKING_DIRECTORY ${RODENT_DIR})
2 | if (CMD_RESULT)
3 |     message(FATAL_ERROR "Error running rodent")
4 | endif()
5 | execute_process(COMMAND ${IM_COMPARE} -metric MSE ${TESTING_DIR}/ref-cornell.png ${RODENT_OUTPUT}.png ${RODENT_OUTPUT}-diff.png RESULT_VARIABLE CMD_RESULT)
6 | if (CMD_RESULT)
7 |     message(FATAL_ERROR "The output of rodent '${RODENT_OUTPUT}.png' does not match the reference '${TESTING_DIR}/ref-cornell.png'")
8 | endif()
9 | 


--------------------------------------------------------------------------------
/cmake/test/run_traversal.cmake:
--------------------------------------------------------------------------------
 1 | execute_process(COMMAND ${BENCH_TRAVERSAL} -bvh ${TESTING_DIR}/sponza.bvh -ray ${TESTING_DIR}/sponza-primary.rays --bench 1 --warmup 0 --tmin 0.01 --tmax 5000 -o ${TRAVERSAL_OUTPUT}.fbuf ${BENCH_TRAVERSAL_ARGS} RESULT_VARIABLE CMD_RESULT)
 2 | if (CMD_RESULT)
 3 |     message(FATAL_ERROR "Error running the traversal benchmark tool")
 4 | endif()
 5 | execute_process(COMMAND ${FBUF2PNG} -n ${TRAVERSAL_OUTPUT}.fbuf ${TRAVERSAL_OUTPUT}.png RESULT_VARIABLE CMD_RESULT)
 6 | if (CMD_RESULT)
 7 |     message(FATAL_ERROR "Error running fbuf2png")
 8 | endif()
 9 | execute_process(COMMAND ${IM_COMPARE} -metric MSE ${TESTING_DIR}/ref-primary.png ${TRAVERSAL_OUTPUT}.png ${TRAVERSAL_OUTPUT}-diff.png RESULT_VARIABLE CMD_RESULT)
10 | if (CMD_RESULT)
11 |     message(FATAL_ERROR "The output of the traversal '${TRAVERSAL_OUTPUT}.png' does not match the reference '${TESTING_DIR}/ref-primary.png'")
12 | endif()
13 | 


--------------------------------------------------------------------------------
/refs/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(cats)
 2 | cmake_minimum_required(VERSION 3.0)
 3 | 
 4 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules)
 5 | 
 6 | find_package(SDL2 REQUIRED)
 7 | find_package(CUDA REQUIRED)
 8 | find_package(JPEG REQUIRED)
 9 | find_package(PNG REQUIRED)
10 | find_package(OptiX REQUIRED)
11 | find_package(Embree REQUIRED)
12 | find_package(ISPC REQUIRED)
13 | find_package(TBB REQUIRED)
14 | 
15 | add_subdirectory(src)
16 | 


--------------------------------------------------------------------------------
/refs/cmake/modules/FindEmbree.cmake:
--------------------------------------------------------------------------------
1 | find_path(Embree_DIR include/embree3/rtcore.h HINTS /usr)
2 | find_path(Embree_INCLUDE_DIR embree3/rtcore.h HINTS ${Embree_DIR}/include)
3 | find_library(Embree_LIBRARY NAMES embree3 PATHS ${Embree_DIR}/lib64 ${Embree_DIR}/lib)
4 | 
5 | include(FindPackageHandleStandardArgs)
6 | find_package_handle_standard_args(Embree REQUIRED_VARS Embree_LIBRARY Embree_INCLUDE_DIR)
7 | 


--------------------------------------------------------------------------------
/refs/cmake/modules/FindISPC.cmake:
--------------------------------------------------------------------------------
1 | find_path(ISPC_DIR ispc HINTS /usr/bin)
2 | 
3 | include(FindPackageHandleStandardArgs)
4 | find_package_handle_standard_args(ISPC REQUIRED_VARS ISPC_DIR)
5 | 


--------------------------------------------------------------------------------
/refs/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | include_directories(${OptiX_INCLUDE} ${CUDA_INCLUDE_DIRS} ${SDL2_INCLUDE_DIRS} ${Embree_INCLUDE_DIR} ${TBB_INCLUDE_DIRS})
 2 | 
 3 | cuda_wrap_srcs(optix_path_tracer PTX OPTIX_GENERATED_FILES optix_path_tracer.cu)
 4 | add_executable(optix_path_tracer optix_path_tracer.cpp obj.cpp obj.h image.cpp ${OPTIX_GENERATED_FILES})
 5 | target_link_libraries(optix_path_tracer PUBLIC ${SDL2_LIBRARY} ${optix_LIBRARY} ${optixu_LIBRARY} ${PNG_LIBRARY} ${JPEG_LIBRARY})
 6 | target_compile_definitions(optix_path_tracer PUBLIC -DGENERATED_PTX_FILE="${OPTIX_GENERATED_FILES}")
 7 | 
 8 | set(ENABLE_TIMING FALSE CACHE BOOL "Enables/disables timing information for Embree path tracer")
 9 | add_executable(embree_path_tracer embree_path_tracer.cpp obj.cpp image.cpp ${CMAKE_CURRENT_BINARY_DIR}/embree_path_tracer.ispc.generated.o)
10 | if (${ENABLE_TIMING})
11 |     set(ISPC_FLAGS -DENABLE_TIMING)
12 |     target_compile_definitions(embree_path_tracer PUBLIC -DENABLE_TIMING -DFORCE_SERIAL_TIMING)
13 | endif()
14 | add_custom_command(
15 |     OUTPUT embree_path_tracer.ispc.generated.o
16 |     COMMAND ${ISPC_DIR}/ispc ${CMAKE_CURRENT_SOURCE_DIR}/embree_path_tracer.ispc -o embree_path_tracer.ispc.generated.o -O3 --target=avx2-i32x8 -I${Embree_INCLUDE_DIR} ${ISPC_FLAGS}
17 |     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
18 |     DEPENDS ${ISPC_DIR}/ispc ${CMAKE_CURRENT_SOURCE_DIR}/embree_path_tracer.ispc ${CMAKE_CURRENT_SOURCE_DIR}/common.isph ${CMAKE_CURRENT_SOURCE_DIR}/math.isph)
19 | target_link_libraries(embree_path_tracer PUBLIC ${SDL2_LIBRARY} ${Embree_LIBRARY} ${PNG_LIBRARY} ${JPEG_LIBRARY} ${TBB_LIBRARIES})
20 | 
21 | add_executable(bench_shading bench_shading.cpp ${CMAKE_CURRENT_BINARY_DIR}/bench_shading.ispc.generated.o)
22 | add_custom_command(
23 |     OUTPUT bench_shading.ispc.generated.o
24 |     COMMAND ${ISPC_DIR}/ispc ${CMAKE_CURRENT_SOURCE_DIR}/bench_shading.ispc -o bench_shading.ispc.generated.o -O3 --target=avx2-i32x8 -I${Embree_INCLUDE_DIR} ${ISPC_FLAGS}
25 |     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
26 |     DEPENDS ${ISPC_DIR}/ispc ${CMAKE_CURRENT_SOURCE_DIR}/bench_shading.ispc ${CMAKE_CURRENT_SOURCE_DIR}/common.isph ${CMAKE_CURRENT_SOURCE_DIR}/math.isph)
27 | 


--------------------------------------------------------------------------------
/refs/src/bench_shading.h:
--------------------------------------------------------------------------------
 1 | #ifndef BENCH_SHADING_H
 2 | #define BENCH_SHADING_H
 3 | 
 4 | #if defined(ISPC) && !defined(ISPC_STD_C99_DATATYPES)
 5 | #define ISPC_STD_C99_DATATYPES
 6 | typedef unsigned int32 uint32_t;
 7 | typedef unsigned int64 uint64_t;
 8 | typedef int32 int32_t;
 9 | typedef int64 int64_t;
10 | #endif
11 | 
12 | struct stream_s {
13 |     uint32_t* rnd;
14 |     int32_t* depth;
15 |     int32_t* geom_id;
16 |     int32_t* prim_id;
17 |     float* mis;
18 |     float* contrib_r;
19 |     float* contrib_g;
20 |     float* contrib_b;
21 |     float* org_x;
22 |     float* org_y;
23 |     float* org_z;
24 |     float* dir_x;
25 |     float* dir_y;
26 |     float* dir_z;
27 |     float* tmin;
28 |     float* tmax;
29 |     float* t;
30 |     float* u;
31 |     float* v;
32 | };
33 | 
34 | #endif // BENCH_SHADING
35 | 


--------------------------------------------------------------------------------
/refs/src/color.h:
--------------------------------------------------------------------------------
 1 | #ifndef COLOR_H
 2 | #define COLOR_H
 3 | 
 4 | #include "float3.h"
 5 | #include "float4.h"
 6 | 
 7 | struct rgba;
 8 | 
 9 | struct rgb : public float3 {
10 |     rgb() {}
11 |     rgb(const float3& rgb) : float3(rgb) {}
12 |     rgb(float r, float g, float b) : float3(r, g, b) {}
13 |     explicit rgb(float x) : float3(x) {}
14 |     explicit rgb(const rgba& rgba);
15 | 
16 |     rgb& operator += (const rgb& p) {
17 |         *this = *this + p;
18 |         return *this;
19 |     }
20 | };
21 | 
22 | struct rgba : public float4 {
23 |     rgba() {}
24 |     rgba(const float4& rgba) : float4(rgba) {}
25 |     rgba(float r, float g, float b, float a) : float4(r, g, b, a) {}
26 |     explicit rgba(float x) : float4(x) {}
27 |     explicit rgba(const rgb& rgb, float a) : float4(rgb, a) {}
28 | 
29 |     rgba& operator += (const rgba& p) {
30 |         *this = *this + p;
31 |         return *this;
32 |     }
33 | };
34 | 
35 | inline rgb::rgb(const rgba& rgba) : float3(rgba) {}
36 | 
37 | inline rgb gamma(const rgb& c, float g = 0.5f) {
38 |     return rgb(std::pow(c.x, g), std::pow(c.y, g), std::pow(c.z, g));
39 | }
40 | 
41 | inline rgba gamma(const rgba& c, float g = 0.5f) {
42 |     return rgba(std::pow(c.x, g), std::pow(c.y, g), std::pow(c.z, g), c.w);
43 | }
44 | 
45 | inline rgb clamp(const rgb& val, const rgb& min, const rgb& max) {
46 |     return rgb(clamp(val.x, min.x, max.x),
47 |                clamp(val.y, min.y, max.y),
48 |                clamp(val.z, min.z, max.z));
49 | }
50 | 
51 | inline rgba clamp(const rgba& val, const rgba& min, const rgba& max) {
52 |     return rgba(clamp(val.x, min.x, max.x),
53 |                 clamp(val.y, min.y, max.y),
54 |                 clamp(val.z, min.z, max.z),
55 |                 clamp(val.w, min.w, max.w));
56 | }
57 | 
58 | #endif // COLOR_H
59 | 


--------------------------------------------------------------------------------
/refs/src/common.h:
--------------------------------------------------------------------------------
 1 | #ifndef COMMON_H
 2 | #define COMMON_H
 3 | 
 4 | #include <iostream>
 5 | #include <cstdlib>
 6 | #include <cstdint>
 7 | #include <random>
 8 | 
 9 | // Round to the integer above the division.
10 | inline uint32_t round_up(uint32_t val, uint32_t div) {
11 |     auto mod = val % div;
12 |     return val + (mod ? div - mod : 0);
13 | }
14 | 
15 | /// Clamps a between b and c.
16 | template <typename T>
17 | inline T clamp(T a, T b, T c) {
18 |     return (a < b) ? b : ((a > c) ? c : a);
19 | }
20 | 
21 | /// Returns the integer that is greater or equal to the logarithm base 2 of the argument.
22 | template <typename T>
23 | inline T closest_log2(T i) {
24 |     T p = 1, q = 0;
25 |     while (i > p) p <<= 1, q++;
26 |     return q;
27 | }
28 | 
29 | /// Reinterprets a floating point number as an integer.
30 | inline int32_t float_as_int(float f) {
31 |     union { float vf; int32_t vi; } v;
32 |     v.vf = f;
33 |     return v.vi;
34 | }
35 | 
36 | /// Reinterprets an integer as a floating point number.
37 | inline float int_as_float(int32_t i) {
38 |     union { float vf; int32_t vi; } v;
39 |     v.vi = i;
40 |     return v.vf;
41 | }
42 | 
43 | inline void error [[noreturn]] () {
44 |     std::cerr << std::endl;
45 |     abort();
46 | }
47 | 
48 | /// Outputs an error message in the console.
49 | template <typename T, typename... Args>
50 | inline void error [[noreturn]] (T t, Args... args) {
51 | #if COLORIZE
52 |     std::cerr << "\033[1;31m";
53 | #endif
54 |     std::cerr << t;
55 | #if COLORIZE
56 |     std::cerr << "\033[0m";
57 | #endif
58 |     error(args...);
59 | }
60 | 
61 | inline void info() {
62 |     std::cout << std::endl;
63 | }
64 | 
65 | /// Outputs an information message in the console.
66 | template <typename T, typename... Args>
67 | inline void info(T t, Args... args) {
68 |     std::cout << t;
69 |     info(args...);
70 | }
71 | 
72 | inline void warn() {
73 |     std::clog << std::endl;
74 | }
75 | 
76 | /// Outputs an warning message in the console.
77 | template <typename T, typename... Args>
78 | inline void warn(T t, Args... args) {
79 | #if COLORIZE
80 |     std::clog << "\033[1;33m";
81 | #endif
82 |     std::clog << t;
83 | #if COLORIZE
84 |     std::clog << "\033[0m";
85 | #endif
86 |     warn(args...);
87 | }
88 | 
89 | #endif // COMMON_H
90 | 


--------------------------------------------------------------------------------
/refs/src/embree_path_tracer.h:
--------------------------------------------------------------------------------
 1 | #ifndef EMBREE_PATH_TRACER_H
 2 | #define EMBREE_PATH_TRACER_H
 3 | 
 4 | #if defined(ISPC) && !defined(ISPC_STD_C99_DATATYPES)
 5 | #define ISPC_STD_C99_DATATYPES
 6 | typedef unsigned int32 uint32_t;
 7 | typedef unsigned int64 uint64_t;
 8 | typedef int32 int32_t;
 9 | typedef int64 int64_t;
10 | #endif
11 | 
12 | struct rgb_s {
13 |     float r, g, b;
14 | };
15 | 
16 | struct float3_s {
17 |     float x, y, z;
18 | };
19 | 
20 | struct float2_s {
21 |     float x, y;
22 | };
23 | 
24 | struct image_s {
25 |     uint32_t* pixels;
26 |     uint32_t width;
27 |     uint32_t height;
28 | };
29 | 
30 | struct camera_s {
31 |     struct float3_s eye;
32 |     struct float3_s dir;
33 |     struct float3_s right;
34 |     struct float3_s up;
35 |     float w, h;
36 | };
37 | 
38 | struct light_s {
39 |     struct float3_s v0;
40 |     struct float3_s v1;
41 |     struct float3_s v2;
42 |     struct float3_s n;
43 |     float inv_area;
44 |     struct rgb_s color;
45 | };
46 | 
47 | struct material_s {
48 |     struct rgb_s ke;
49 |     struct rgb_s kd;
50 |     struct rgb_s ks;
51 |     struct rgb_s tf;
52 |     float ns;
53 |     float ni;
54 |     int32_t map_kd;
55 |     int32_t map_ks;
56 |     int32_t light;
57 |     uint32_t illum;
58 | };
59 | 
60 | struct counters_s {
61 |     int64_t total_rays;
62 |     int64_t primary;
63 |     int64_t shadow;
64 |     int64_t shade;
65 |     int64_t bounces;
66 |     int64_t total;
67 | };
68 | 
69 | struct scene_s {
70 |     RTCScene scene;
71 | 
72 |     float* film;
73 |     uint32_t film_width;
74 |     uint32_t film_height;
75 | 
76 |     uint32_t*          indices;
77 |     struct float3_s*   vertices;
78 |     struct float3_s*   normals;
79 |     struct float2_s*   texcoords;
80 |     struct camera_s*   camera;
81 |     struct image_s*    images;
82 |     struct material_s* materials;
83 |     struct light_s*    lights;
84 | 
85 |     float pdf_lightpick;
86 |     uint32_t num_lights;
87 | };
88 | 
89 | #endif // EMBREE_PATH_TRACER_H
90 | 


--------------------------------------------------------------------------------
/refs/src/file_path.h:
--------------------------------------------------------------------------------
 1 | #ifndef FILE_PATH_H
 2 | #define FILE_PATH_H
 3 | 
 4 | #include <string>
 5 | #include <algorithm>
 6 | 
 7 | /// Represents a path in the file system.
 8 | class FilePath {
 9 | public:
10 |     FilePath(const std::string& path)
11 |         : path_(path)
12 |     {
13 |         std::replace(path_.begin(), path_.end(), '\\', '/');
14 |         auto pos = path_.rfind('/');
15 |         base_ = (pos != std::string::npos) ? path_.substr(0, pos)  : ".";
16 |         file_ = (pos != std::string::npos) ? path_.substr(pos + 1) : path_;
17 |     }
18 | 
19 |     const std::string& path() const { return path_; }
20 |     const std::string& base_name() const { return base_; }
21 |     const std::string& file_name() const { return file_; }
22 | 
23 |     std::string extension() const {
24 |         auto pos = file_.rfind('.');
25 |         return (pos != std::string::npos) ? file_.substr(pos + 1) : std::string();
26 |     }
27 | 
28 |     std::string remove_extension() const {
29 |         auto pos = file_.rfind('.');
30 |         return (pos != std::string::npos) ? file_.substr(0, pos) : file_;
31 |     }
32 | 
33 |     operator const std::string& () const {
34 |         return path();
35 |     }
36 | 
37 | private:
38 |     std::string path_;
39 |     std::string base_;
40 |     std::string file_;
41 | };
42 | 
43 | #endif // FILE_PATH_H
44 | 


--------------------------------------------------------------------------------
/refs/src/float2.h:
--------------------------------------------------------------------------------
  1 | #ifndef FLOAT2_H
  2 | #define FLOAT2_H
  3 | 
  4 | #include <cmath>
  5 | #include "common.h"
  6 | 
  7 | struct float3;
  8 | struct float4;
  9 | 
 10 | struct float2 {
 11 |     union {
 12 |         struct { float x, y; };
 13 |         float values[2];
 14 |     };
 15 | 
 16 |     float2() {}
 17 |     explicit float2(float x) : x(x), y(x) {}
 18 |     explicit float2(const float3& xy);
 19 |     explicit float2(const float4& xy);
 20 |     float2(float x, float y) : x(x), y(y) {}
 21 | 
 22 |     bool operator == (const float2& other) const {
 23 |         return x == other.x && y == other.y;
 24 |     }
 25 | 
 26 |     bool operator != (const float2& other) const {
 27 |         return x != other.x || y != other.y;
 28 |     }
 29 | 
 30 |     float operator [] (size_t i) const { return values[i]; }
 31 |     float& operator [] (size_t i) { return values[i]; }
 32 | 
 33 |     float2& operator += (const float2& a) {
 34 |         x += a.x; y += a.y;
 35 |         return *this;
 36 |     }
 37 | 
 38 |     float2& operator -= (const float2& a) {
 39 |         x -= a.x; y -= a.y;
 40 |         return *this;
 41 |     }
 42 | 
 43 |     float2& operator *= (float a) {
 44 |         x *= a; y *= a;
 45 |         return *this;
 46 |     }
 47 | 
 48 |     float2& operator *= (const float2& a) {
 49 |         x *= a.x; y *= a.y;
 50 |         return *this;
 51 |     }
 52 | };
 53 | 
 54 | inline float2 operator * (float a, const float2& b) {
 55 |     return float2(a * b.x, a * b.y);
 56 | }
 57 | 
 58 | inline float2 operator * (const float2& a, float b) {
 59 |     return float2(a.x * b, a.y * b);
 60 | }
 61 | 
 62 | inline float2 operator / (const float2& a, float b) {
 63 |     return a * (1.0f / b);
 64 | }
 65 | 
 66 | inline float2 operator - (const float2& a, const float2& b) {
 67 |     return float2(a.x - b.x, a.y - b.y);
 68 | }
 69 | 
 70 | inline float2 operator + (const float2& a, const float2& b) {
 71 |     return float2(a.x + b.x, a.y + b.y);
 72 | }
 73 | 
 74 | inline float2 operator * (const float2& a, const float2& b) {
 75 |     return float2(a.x * b.x, a.y * b.y);
 76 | }
 77 | 
 78 | inline float2 min(const float2& a, const float2& b) {
 79 |     return float2(a.x < b.x ? a.x : b.x,
 80 |                   a.y < b.y ? a.y : b.y);
 81 | }
 82 | 
 83 | inline float2 max(const float2& a, const float2& b) {
 84 |     return float2(a.x > b.x ? a.x : b.x,
 85 |                   a.y > b.y ? a.y : b.y);
 86 | }
 87 | 
 88 | inline float dot(const float2& a, const float2& b) {
 89 |     return a.x * b.x + a.y * b.y;
 90 | }
 91 | 
 92 | inline float lensqr(const float2& a) {
 93 |     return dot(a, a);
 94 | }
 95 | 
 96 | inline float length(const float2& a) {
 97 |     return std::sqrt(dot(a, a));
 98 | }
 99 | 
100 | inline float2 normalize(const float2& a) {
101 |     return a * (1.0f / length(a));
102 | }
103 | 
104 | #endif // FLOAT2_H
105 | 


--------------------------------------------------------------------------------
/refs/src/float3.h:
--------------------------------------------------------------------------------
  1 | #ifndef FLOAT3_H
  2 | #define FLOAT3_H
  3 | 
  4 | #include <cmath>
  5 | #include "common.h"
  6 | #include "float2.h"
  7 | 
  8 | struct float4;
  9 | 
 10 | struct float3 {
 11 |     union {
 12 |         struct { float x, y, z; };
 13 |         float values[3];
 14 |     };
 15 | 
 16 |     float3() {}
 17 |     explicit float3(float x) : x(x), y(x), z(x) {}
 18 |     explicit float3(const float4& xyz);
 19 |     float3(float x, float y, float z) : x(x), y(y), z(z) {}
 20 |     float3(const float2& xy, float z) : x(xy.x), y(xy.y), z(z) {}
 21 |     float3(float x, const float2& yz) : x(x), y(yz.x), z(yz.y) {}
 22 | 
 23 |     bool operator == (const float3& other) const {
 24 |         return x == other.x && y == other.y && z == other.z;
 25 |     }
 26 | 
 27 |     bool operator != (const float3& other) const {
 28 |         return x != other.x || y != other.y || z != other.z;
 29 |     }
 30 | 
 31 |     float operator [] (size_t i) const { return values[i]; }
 32 |     float& operator [] (size_t i) { return values[i]; }
 33 | 
 34 |     float3& operator += (const float3& a) {
 35 |         x += a.x; y += a.y; z += a.z;
 36 |         return *this;
 37 |     }
 38 | 
 39 |     float3& operator -= (const float3& a) {
 40 |         x -= a.x; y -= a.y; z -= a.z;
 41 |         return *this;
 42 |     }
 43 | 
 44 |     float3& operator *= (float a) {
 45 |         x *= a; y *= a; z *= a;
 46 |         return *this;
 47 |     }
 48 | 
 49 |     float3& operator *= (const float3& a) {
 50 |         x *= a.x; y *= a.y; z *= a.z;
 51 |         return *this;
 52 |     }
 53 | };
 54 | 
 55 | inline float2::float2(const float3& xy)
 56 |     : x(xy.x), y(xy.y)
 57 | {}
 58 | 
 59 | inline float3 operator * (float a, const float3& b) {
 60 |     return float3(a * b.x, a * b.y, a * b.z);
 61 | }
 62 | 
 63 | inline float3 operator * (const float3& a, float b) {
 64 |     return float3(a.x * b, a.y * b, a.z * b);
 65 | }
 66 | 
 67 | inline float3 operator / (const float3& a, float b) {
 68 |     return a * (1.0f / b);
 69 | }
 70 | 
 71 | inline float3 operator - (const float3& a, const float3& b) {
 72 |     return float3(a.x - b.x, a.y - b.y, a.z - b.z);
 73 | }
 74 | 
 75 | inline float3 operator - (const float3& a) {
 76 |     return float3(-a.x, -a.y, -a.z);
 77 | }
 78 | 
 79 | inline float3 operator + (const float3& a, const float3& b) {
 80 |     return float3(a.x + b.x, a.y + b.y, a.z + b.z);
 81 | }
 82 | 
 83 | inline float3 operator * (const float3& a, const float3& b) {
 84 |     return float3(a.x * b.x, a.y * b.y, a.z * b.z);
 85 | }
 86 | 
 87 | inline float3 operator / (const float3& a, const float3& b) {
 88 |     return float3(a.x / b.x, a.y / b.y, a.z / b.z);
 89 | }
 90 | 
 91 | inline float3 cross(const float3& a, const float3& b) {
 92 |     return float3(a.y * b.z - a.z * b.y,
 93 |                   a.z * b.x - a.x * b.z,
 94 |                   a.x * b.y - a.y * b.x);
 95 | }
 96 | 
 97 | inline float3 rotate(const float3& v, const float3& axis, float angle) {
 98 |     float q[4];
 99 |     q[0] = axis.x * sinf(angle / 2);
100 |     q[1] = axis.y * sinf(angle / 2);
101 |     q[2] = axis.z * sinf(angle / 2);
102 |     q[3] = std::cos(angle / 2);
103 | 
104 |     float p[4];
105 |     p[0] = q[3] * v.x + q[1] * v.z - q[2] * v.y;
106 |     p[1] = q[3] * v.y - q[0] * v.z + q[2] * v.x;
107 |     p[2] = q[3] * v.z + q[0] * v.y - q[1] * v.x;
108 |     p[3] = -(q[0] * v.x + q[1] * v.y + q[2] * v.z);
109 | 
110 |     return float3(p[3] * -q[0] + p[0] * q[3] + p[1] * -q[2] - p[2] * -q[1],
111 |                   p[3] * -q[1] - p[0] * -q[2] + p[1] * q[3] + p[2] * -q[0],
112 |                   p[3] * -q[2] + p[0] * -q[1] - p[1] * -q[0] + p[2] * q[3]);
113 | }
114 | 
115 | inline float3 min(const float3& a, const float3& b) {
116 |     return float3(a.x < b.x ? a.x : b.x,
117 |                   a.y < b.y ? a.y : b.y,
118 |                   a.z < b.z ? a.z : b.z);
119 | }
120 | 
121 | inline float3 max(const float3& a, const float3& b) {
122 |     return float3(a.x > b.x ? a.x : b.x,
123 |                   a.y > b.y ? a.y : b.y,
124 |                   a.z > b.z ? a.z : b.z);
125 | }
126 | 
127 | inline float dot(const float3& a, const float3& b) {
128 |     return a.x * b.x + a.y * b.y + a.z * b.z;
129 | }
130 | 
131 | inline float lensqr(const float3& a) {
132 |     return dot(a, a);
133 | }
134 | 
135 | inline float length(const float3& a) {
136 |     return std::sqrt(dot(a, a));
137 | }
138 | 
139 | inline float3 normalize(const float3& a) {
140 |     return a * (1.0f / length(a));
141 | }
142 | 
143 | #endif // FLOAT3_H
144 | 


--------------------------------------------------------------------------------
/refs/src/float4.h:
--------------------------------------------------------------------------------
  1 | #ifndef FLOAT4_H
  2 | #define FLOAT4_H
  3 | 
  4 | #include <cmath>
  5 | #include "common.h"
  6 | #include "float2.h"
  7 | #include "float3.h"
  8 | 
  9 | struct float4 {
 10 |     union {
 11 |         struct { float x, y, z, w; };
 12 |         float values[4];
 13 |     };
 14 | 
 15 |     float4() {}
 16 |     explicit float4(float x) : x(x), y(x), z(x), w(x) {}
 17 |     float4(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {}
 18 |     float4(const float3& xyz, float w) : x(xyz.x), y(xyz.y), z(xyz.z), w(w) {}
 19 |     float4(float x, const float3& yzw) : x(x), y(yzw.x), z(yzw.y), w(yzw.z) {}
 20 |     float4(const float2& xy, float z, float w) : x(xy.x), y(xy.y), z(z), w(w) {}
 21 |     float4(float x, const float2& yz, float w) : x(x), y(yz.x), z(yz.y), w(w) {}
 22 |     float4(float x, float y, const float2& zw) : x(x), y(y), z(zw.x), w(zw.y) {}
 23 |     float4(const float2& xy, const float2& zw) : x(xy.x), y(xy.y), z(zw.x), w(zw.y) {}
 24 | 
 25 |     bool operator == (const float4& other) const {
 26 |         return x == other.x && y == other.y && z == other.z && w != other.w;
 27 |     }
 28 | 
 29 |     bool operator != (const float4& other) const {
 30 |         return x != other.x || y != other.y || z != other.z || w != other.w;
 31 |     }
 32 | 
 33 |     float operator [] (size_t i) const { return values[i]; }
 34 |     float& operator [] (size_t i) { return values[i]; }
 35 | 
 36 |     float4& operator += (const float4& a) {
 37 |         x += a.x; y += a.y; z += a.z; w += a.w;
 38 |         return *this;
 39 |     }
 40 | 
 41 |     float4& operator -= (const float4& a) {
 42 |         x -= a.x; y -= a.y; z -= a.z; w -= a.w;
 43 |         return *this;
 44 |     }
 45 | 
 46 |     float4& operator *= (float a) {
 47 |         x *= a; y *= a; z *= a; w *= a;
 48 |         return *this;
 49 |     }
 50 | 
 51 |     float4& operator *= (const float4& a) {
 52 |         x *= a.x; y *= a.y; z *= a.z; w *= a.w;
 53 |         return *this;
 54 |     }
 55 | };
 56 | 
 57 | inline float2::float2(const float4& xy)
 58 |     : x(xy.x), y(xy.y)
 59 | {}
 60 | 
 61 | inline float3::float3(const float4& xyz)
 62 |     : x(xyz.x), y(xyz.y), z(xyz.z)
 63 | {}
 64 | 
 65 | inline float4 operator * (float a, const float4& b) {
 66 |     return float4(a * b.x, a * b.y, a * b.z, a * b.w);
 67 | }
 68 | 
 69 | inline float4 operator * (const float4& a, float b) {
 70 |     return float4(a.x * b, a.y * b, a.z * b, a.w * b);
 71 | }
 72 | 
 73 | inline float4 operator / (const float4& a, float b) {
 74 |     return a * (1.0f / b);
 75 | }
 76 | 
 77 | inline float4 operator - (const float4& a, const float4& b) {
 78 |     return float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
 79 | }
 80 | 
 81 | inline float4 operator - (const float4& a) {
 82 |     return float4(-a.x, -a.y, -a.z, -a.w);
 83 | }
 84 | 
 85 | inline float4 operator + (const float4& a, const float4& b) {
 86 |     return float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
 87 | }
 88 | 
 89 | inline float4 operator * (const float4& a, const float4& b) {
 90 |     return float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
 91 | }
 92 | 
 93 | inline float4 abs(const float4& a) {
 94 |     return float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
 95 | }
 96 | 
 97 | inline float4 min(const float4& a, const float4& b) {
 98 |     return float4(a.x < b.x ? a.x : b.x,
 99 |                   a.y < b.y ? a.y : b.y,
100 |                   a.z < b.z ? a.z : b.z,
101 |                   a.w < b.w ? a.w : b.w);
102 | }
103 | 
104 | inline float4 max(const float4& a, const float4& b) {
105 |     return float4(a.x > b.x ? a.x : b.x,
106 |                   a.y > b.y ? a.y : b.y,
107 |                   a.z > b.z ? a.z : b.z,
108 |                   a.w > b.w ? a.w : b.w);
109 | }
110 | 
111 | inline float dot(const float4& a, const float4& b) {
112 |     return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
113 | }
114 | 
115 | inline float lensqr(const float4& a) {
116 |     return dot(a, a);
117 | }
118 | 
119 | inline float length(const float4& a) {
120 |     return std::sqrt(dot(a, a));
121 | }
122 | 
123 | inline float4 normalize(const float4& a) {
124 |     return a * (1.0f / length(a));
125 | }
126 | 
127 | inline float4 clamp(const float4& val, const float4& min, const float4& max) {
128 |     return float4(clamp(val.x, min.x, max.x),
129 |                   clamp(val.y, min.y, max.y),
130 |                   clamp(val.z, min.z, max.z),
131 |                   clamp(val.w, min.w, max.w));
132 | }
133 | 
134 | #endif // FLOAT4_H
135 | 


--------------------------------------------------------------------------------
/refs/src/image.cpp:
--------------------------------------------------------------------------------
  1 | #include <memory>
  2 | #include <fstream>
  3 | #include <cmath>
  4 | 
  5 | #include <png.h>
  6 | #include <jpeglib.h>
  7 | 
  8 | #include "image.h"
  9 | 
 10 | static void gamma_correct(ImageRgba32& img) {
 11 |     for (size_t y = 0; y < img.height; ++y) {
 12 |         for (size_t x = 0; x < img.width; ++x) {
 13 |             auto* pix = &img.pixels[4 * (y * img.width + x)];
 14 |             for (int i = 0; i < 3; ++i)
 15 |                 pix[i] = std::pow(pix[i] * (1.0f / 255.0f), 2.2f) * 255.0f;
 16 |         }
 17 |     }
 18 | }
 19 | 
 20 | static void read_from_stream(png_structp png_ptr, png_bytep data, png_size_t length) {
 21 |     png_voidp a = png_get_io_ptr(png_ptr);
 22 |     ((std::istream*)a)->read((char*)data, length);
 23 | }
 24 | 
 25 | bool load_png(const FilePath& path, ImageRgba32& img) {
 26 |     std::ifstream file(path, std::ifstream::binary);
 27 |     if (!file)
 28 |         return false;
 29 | 
 30 |     // Read signature
 31 |     char sig[8];
 32 |     file.read(sig, 8);
 33 |     if (!png_check_sig((unsigned char*)sig, 8))
 34 |         return false;
 35 | 
 36 |     png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
 37 |     if (!png_ptr)
 38 |         return false;
 39 | 
 40 |     png_infop info_ptr = png_create_info_struct(png_ptr);
 41 |     if (!info_ptr) {
 42 |         png_destroy_read_struct(&png_ptr, nullptr, nullptr);
 43 |         return false;
 44 |     }
 45 | 
 46 |     if (setjmp(png_jmpbuf(png_ptr))) {
 47 |         png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
 48 |         return false;
 49 |     }
 50 | 
 51 |     png_set_sig_bytes(png_ptr, 8);
 52 |     png_set_read_fn(png_ptr, (png_voidp)&file, read_from_stream);
 53 |     png_read_info(png_ptr, info_ptr);
 54 | 
 55 |     img.width    = png_get_image_width(png_ptr, info_ptr);
 56 |     img.height   = png_get_image_height(png_ptr, info_ptr);
 57 |     img.channels = 4;
 58 | 
 59 |     png_uint_32 color_type = png_get_color_type(png_ptr, info_ptr);
 60 |     png_uint_32 bit_depth  = png_get_bit_depth(png_ptr, info_ptr);
 61 | 
 62 |     // Expand paletted and grayscale images to RGB
 63 |     if (color_type == PNG_COLOR_TYPE_PALETTE) {
 64 |         png_set_palette_to_rgb(png_ptr);
 65 |     } else if (color_type == PNG_COLOR_TYPE_GRAY ||
 66 |                color_type == PNG_COLOR_TYPE_GRAY_ALPHA) {
 67 |         png_set_gray_to_rgb(png_ptr);
 68 |     }
 69 | 
 70 |     // Transform to 8 bit per channel
 71 |     if (bit_depth == 16)
 72 |         png_set_strip_16(png_ptr);
 73 | 
 74 |     // Get alpha channel when there is one
 75 |     if (png_get_valid(png_ptr, info_ptr, PNG_INFO_tRNS))
 76 |         png_set_tRNS_to_alpha(png_ptr);
 77 | 
 78 |     // Otherwise add an opaque alpha channel
 79 |     else
 80 |         png_set_filler(png_ptr, 0xFF, PNG_FILLER_AFTER);
 81 | 
 82 |     img.pixels.reset(new uint8_t[img.channels * img.width * img.height]);
 83 |     std::unique_ptr<png_byte[]> row_bytes(new png_byte[img.width * 4]);
 84 |     for (size_t y = 0; y < img.height; y++) {
 85 |         png_read_row(png_ptr, row_bytes.get(), nullptr);
 86 |         uint8_t* img_row = img.pixels.get() + 4 * img.width * (img.height - 1 - y);
 87 |         for (size_t x = 0; x < img.width; x++) {
 88 |             for (size_t c = 0; c < img.channels; ++c)
 89 |                 img_row[x * img.channels + c] = row_bytes[x * 4 + c];
 90 |         }
 91 |     }
 92 | 
 93 |     png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
 94 |     gamma_correct(img);
 95 |     return true;
 96 | }
 97 | 
 98 | struct enhanced_jpeg_decompress_struct : jpeg_decompress_struct {
 99 |     jmp_buf jmp;
100 |     std::istream* is;
101 |     JOCTET src_buf[1024];
102 | };
103 | 
104 | static void jpeg_error_exit(j_common_ptr cinfo) {
105 |     cinfo->err->output_message(cinfo);
106 |     longjmp(reinterpret_cast<enhanced_jpeg_decompress_struct*>(cinfo)->jmp, 1);
107 | }
108 | 
109 | static void jpeg_output_message(j_common_ptr) {}
110 | 
111 | static void jpeg_no_op(j_decompress_ptr) {}
112 | 
113 | static int jpeg_fill_input_buffer(j_decompress_ptr cinfo) {
114 |     auto enhanced = static_cast<enhanced_jpeg_decompress_struct*>(cinfo);
115 |     enhanced->is->read((char*)enhanced->src_buf, 1024);
116 |     cinfo->src->bytes_in_buffer = enhanced->is->gcount();
117 |     cinfo->src->next_input_byte = enhanced->src_buf;
118 |     return TRUE;
119 | }
120 | 
121 | static void jpeg_skip_input_data(j_decompress_ptr cinfo, long num_bytes) {
122 |     auto enhanced = static_cast<enhanced_jpeg_decompress_struct*>(cinfo);
123 |     if (num_bytes != 0) {
124 |         if (num_bytes < long(cinfo->src->bytes_in_buffer)) {
125 |             cinfo->src->next_input_byte += num_bytes;
126 |             cinfo->src->bytes_in_buffer -= num_bytes;
127 |         } else {
128 |             enhanced->is->seekg(num_bytes - cinfo->src->bytes_in_buffer, std::ios_base::cur);
129 |             cinfo->src->bytes_in_buffer = 0;
130 |         }
131 |     }
132 | }
133 | 
134 | bool load_jpg(const FilePath& path, ImageRgba32& image) {
135 |     std::ifstream file(path, std::ifstream::binary);
136 |     if (!file)
137 |         return false;
138 | 
139 |     enhanced_jpeg_decompress_struct cinfo;
140 |     cinfo.is = &file;
141 |     jpeg_error_mgr jerr;
142 | 
143 |     cinfo.err           = jpeg_std_error(&jerr);
144 |     jerr.error_exit     = jpeg_error_exit;
145 |     jerr.output_message = jpeg_output_message;
146 |     jpeg_create_decompress(&cinfo);
147 | 
148 |     if (setjmp(cinfo.jmp)) {
149 |         jpeg_abort_decompress(&cinfo);
150 |         jpeg_destroy_decompress(&cinfo);
151 |         return false;
152 |     }
153 | 
154 |     jpeg_source_mgr src;
155 |     src.init_source       = jpeg_no_op;
156 |     src.fill_input_buffer = jpeg_fill_input_buffer;
157 |     src.skip_input_data   = jpeg_skip_input_data;
158 |     src.resync_to_restart = jpeg_resync_to_restart;
159 |     src.term_source       = jpeg_no_op;
160 |     src.bytes_in_buffer   = 0;
161 |     cinfo.src = &src;
162 | 
163 |     jpeg_read_header(&cinfo, true);
164 |     jpeg_start_decompress(&cinfo);
165 |     image.width  = cinfo.output_width;
166 |     image.height = cinfo.output_height;
167 |     auto image_size = image.width * image.height * 4;
168 |     image.pixels.reset(new uint8_t[image_size]);
169 |     std::fill(image.pixels.get(), image.pixels.get() + image_size, 0);
170 |     image.channels = cinfo.output_components;
171 | 
172 |     std::unique_ptr<JSAMPLE[]> row(new JSAMPLE[image.width * image.channels]);
173 |     for (size_t y = 0; y < image.height; y++) {
174 |         auto src_ptr = row.get();
175 |         auto dst_ptr = &image.pixels[(image.height - 1 - y) * image.width * 4];
176 |         jpeg_read_scanlines(&cinfo, &src_ptr, 1);
177 |         for (size_t x = 0; x < image.width; ++x, src_ptr += image.channels, dst_ptr += 4) {
178 |             for (size_t c = 0; c < image.channels; c++)
179 |                 dst_ptr[c] = src_ptr[c];
180 |         }
181 |     }
182 | 
183 |     jpeg_finish_decompress(&cinfo);
184 |     jpeg_destroy_decompress(&cinfo);
185 |     gamma_correct(image);
186 |     return true;
187 | }
188 | 


--------------------------------------------------------------------------------
/refs/src/image.h:
--------------------------------------------------------------------------------
 1 | #ifndef IMAGE_H
 2 | #define IMAGE_H
 3 | 
 4 | #include "file_path.h"
 5 | 
 6 | struct ImageRgba32 {
 7 |     std::unique_ptr<uint8_t[]> pixels;
 8 |     size_t width, height;
 9 |     size_t channels;
10 | };
11 | 
12 | bool load_png(const FilePath&, ImageRgba32&);
13 | bool load_jpg(const FilePath&, ImageRgba32&);
14 | 
15 | #endif // IMAGE_H
16 | 


--------------------------------------------------------------------------------
/refs/src/math.isph:
--------------------------------------------------------------------------------
  1 | #ifndef QUALIFIER
  2 | #error "Please define the preprocessor symbol QUALIFIER before including this file"
  3 | #endif
  4 | 
  5 | inline struct float3_s QUALIFIER make_float3(float QUALIFIER x, float QUALIFIER y, float QUALIFIER z) {
  6 |     struct float3_s QUALIFIER v = { x, y, z };
  7 |     return v;
  8 | }
  9 | 
 10 | inline struct float2_s QUALIFIER  make_float2(float QUALIFIER x, float QUALIFIER y) {
 11 |     struct float2_s QUALIFIER v = { x, y };
 12 |     return v;
 13 | }
 14 | 
 15 | inline struct rgb_s QUALIFIER make_rgb(float QUALIFIER r, float QUALIFIER g, float QUALIFIER b) {
 16 |     struct rgb_s QUALIFIER c = { r, g, b };
 17 |     return c;
 18 | }
 19 | 
 20 | inline struct rgb_s QUALIFIER operator+ (const struct rgb_s QUALIFIER a, const struct rgb_s QUALIFIER b) {
 21 |     return make_rgb(a.r + b.r, a.g + b.g, a.b + b.b);
 22 | }
 23 | 
 24 | inline struct float3_s QUALIFIER operator+ (const struct float3_s QUALIFIER a, const struct float3_s QUALIFIER b) {
 25 |     return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
 26 | }
 27 | 
 28 | inline struct float3_s QUALIFIER operator- (const struct float3_s QUALIFIER a, const struct float3_s QUALIFIER b) {
 29 |     return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
 30 | }
 31 | 
 32 | inline struct float2_s QUALIFIER operator+ (const struct float2_s QUALIFIER a, const struct float2_s QUALIFIER b) {
 33 |     return make_float2(a.x + b.x, a.y + b.y);
 34 | }
 35 | 
 36 | inline struct rgb_s QUALIFIER operator* (const struct rgb_s QUALIFIER a, float QUALIFIER b) {
 37 |     return make_rgb(a.r * b, a.g * b, a.b * b);
 38 | }
 39 | 
 40 | inline struct float3_s QUALIFIER operator* (const struct float3_s QUALIFIER a, float QUALIFIER b) {
 41 |     return make_float3(a.x * b, a.y * b, a.z * b);
 42 | }
 43 | 
 44 | inline struct rgb_s QUALIFIER operator* (const struct rgb_s QUALIFIER a, const struct rgb_s QUALIFIER b) {
 45 |     return make_rgb(a.r * b.r, a.g * b.g, a.b * b.b);
 46 | }
 47 | 
 48 | inline struct float2_s QUALIFIER operator* (const struct float2_s QUALIFIER a, float QUALIFIER b) {
 49 |     return make_float2(a.x * b, a.y * b);
 50 | }
 51 | 
 52 | inline struct float3_s QUALIFIER negate(const struct float3_s QUALIFIER v) {
 53 |     return make_float3(-v.x, -v.y, -v.z);
 54 | }
 55 | 
 56 | inline float QUALIFIER dot(const struct float3_s QUALIFIER a, const struct float3_s QUALIFIER b) {
 57 |     return a.x * b.x + a.y * b.y + a.z * b.z;
 58 | }
 59 | 
 60 | inline float QUALIFIER length(const struct float3_s QUALIFIER a) {
 61 |     return sqrt(dot(a, a));
 62 | }
 63 | 
 64 | inline struct float3_s QUALIFIER normalize(const struct float3_s QUALIFIER v) {
 65 |     return v * (1.0f / length(v));
 66 | }
 67 | 
 68 | inline float QUALIFIER lerp(float QUALIFIER a, float QUALIFIER b, float QUALIFIER t) {
 69 |     return a * (1.0f - t) + b * t;
 70 | }
 71 | 
 72 | inline struct rgb_s QUALIFIER lerp(struct rgb_s QUALIFIER a, struct rgb_s QUALIFIER b, float QUALIFIER t) {
 73 |     return a * (1.0f - t) + b * t;
 74 | }
 75 | 
 76 | inline float QUALIFIER luminance(struct rgb_s QUALIFIER c) {
 77 |     return c.r * 0.2126f + c.g * 0.7152f + c.b * 0.0722f;
 78 | }
 79 | 
 80 | inline float QUALIFIER fastlog2(float QUALIFIER x) {
 81 |     unsigned int QUALIFIER vx = intbits(x);
 82 |     unsigned int QUALIFIER mx = (vx & 0x007FFFFFu) | 0x3f000000u;
 83 |     float QUALIFIER y = vx * 1.1920928955078125e-7f;
 84 |     float QUALIFIER z = floatbits(mx);
 85 |     return y - 124.22551499f - 1.498030302f * z - 1.72587999f / (0.3520887068f + z);
 86 | }
 87 | 
 88 | inline float QUALIFIER fastpow2(float QUALIFIER p) {
 89 |     float QUALIFIER off   = p < 0.0f    ? 1.0f : 0.0f;
 90 |     float QUALIFIER clipp = p < -126.0f ? -126.0f : p;
 91 |     int QUALIFIER w = clipp;
 92 |     float QUALIFIER z = clipp - w + off;
 93 |     int QUALIFIER v = (1u << 23u) * (clipp + 121.2740575f + 27.7280233f / (4.84252568f - z) - 1.49012907f * z);
 94 |     return floatbits(v);
 95 | }
 96 | 
 97 | inline float QUALIFIER fastpow(float QUALIFIER x, float QUALIFIER y) {
 98 |     return fastpow2(y * fastlog2(x));
 99 | }
100 | 


--------------------------------------------------------------------------------
/refs/src/obj.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOAD_OBJ_H
 2 | #define LOAD_OBJ_H
 3 | 
 4 | #include <vector>
 5 | #include <string>
 6 | #include <unordered_map>
 7 | 
 8 | #include "float3.h"
 9 | #include "color.h"
10 | #include "file_path.h"
11 | 
12 | namespace obj {
13 | 
14 | struct Index {
15 |     int v, n, t;
16 | };
17 | 
18 | struct Face {
19 |     std::vector<Index> indices;
20 |     int material;
21 | };
22 | 
23 | struct Group {
24 |     std::vector<Face> faces;
25 | };
26 | 
27 | struct Object {
28 |     std::vector<Group> groups;
29 | };
30 | 
31 | struct Material {
32 |     rgb ka;
33 |     rgb kd;
34 |     rgb ks;
35 |     rgb ke;
36 |     float ns;
37 |     float ni;
38 |     rgb tf;
39 |     float tr;
40 |     float d;
41 |     int illum;
42 |     std::string map_ka;
43 |     std::string map_kd;
44 |     std::string map_ks;
45 |     std::string map_ke;
46 |     std::string map_bump;
47 |     std::string map_d;
48 | };
49 | 
50 | struct File {
51 |     std::vector<Object>      objects;
52 |     std::vector<float3>      vertices;
53 |     std::vector<float3>      normals;
54 |     std::vector<float2>      texcoords;
55 |     std::vector<std::string> materials;
56 |     std::vector<std::string> mtl_libs;
57 | };
58 | 
59 | typedef std::unordered_map<std::string, Material> MaterialLib;
60 | 
61 | struct TriMesh {
62 |     std::vector<float3>   vertices;
63 |     std::vector<uint32_t> indices;
64 |     std::vector<float3>   normals;
65 |     std::vector<float3>   face_normals;
66 |     std::vector<float2>   texcoords;
67 | };
68 | 
69 | bool load_obj(const FilePath&, File&);
70 | bool load_mtl(const FilePath&, MaterialLib&);
71 | TriMesh compute_tri_mesh(const File&, const MaterialLib&, size_t);
72 | 
73 | } // namespace obj
74 | 
75 | #endif // LOAD_OBJ_H
76 | 


--------------------------------------------------------------------------------
/refs/src/optix_path_tracer.h:
--------------------------------------------------------------------------------
 1 | #ifndef OPTIX_PATH_TRACER_H
 2 | #define OPTIX_PATH_TRACER_H
 3 | 
 4 | struct Material {
 5 |     float3 kd;
 6 |     float3 ks;
 7 |     float3 ke;
 8 |     float3 tf;
 9 |     int map_kd;
10 |     int map_ks;
11 |     float ns;
12 |     float ni;
13 |     uint illum;
14 | };
15 | 
16 | struct Light {
17 |     float3 v0;
18 |     float3 v1;
19 |     float3 v2;
20 |     float3 normal;
21 |     float inv_area;
22 |     float3 intensity;
23 | };
24 | 
25 | #endif // OPTIX_PATH_TRACER_H
26 | 


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | set(CPUINFO_PATH "/proc/cpuinfo" CACHE STRING "Path to the CPU info file on the system")
  2 | set(TARGET_PLATFORM "" CACHE STRING "Target platform for the converter tool. Leave empty to autodetect CPU. Use converter --help to list possible platforms.")
  3 | set(TARGET_DEVICE "" CACHE STRING "Target device on the selected platform. Leave empty to use the default.")
  4 | set(SCENE_FILE "${PROJECT_SOURCE_DIR}/testing/cornell_box.obj" CACHE FILEPATH "Absolute path to an OBJ scene")
  5 | set(MEGAKERNEL_FUSION ON CACHE BOOL "Set to true to enable simple shader fusion for megakernel mappings")
  6 | set(MAX_PATH_LEN "64" CACHE STRING "Maximum path length")
  7 | set(DISABLE_GUI OFF CACHE BOOL "Set to true to disable GUI")
  8 | set(SPP "4" CACHE STRING "Samples per pixel")
  9 | if (SCENE_FILE STREQUAL "")
 10 |     message(FATAL_ERROR "Please specify a valid OBJ scene in the SCENE_FILE variable")
 11 | endif()
 12 | set(CONVERTER_OPTIONS "")
 13 | if (NOT TARGET_PLATFORM STREQUAL "")
 14 |     set(CONVERTER_OPTIONS "--target" "${TARGET_PLATFORM}")
 15 | endif()
 16 | if (NOT TARGET_DEVICE STREQUAL "")
 17 |     set(CONVERTER_OPTIONS ${CONVERTER_OPTIONS} "--device" "${TARGET_DEVICE}")
 18 |     if (MEGAKERNEL_FUSION AND (TARGET_PLATFORM STREQUAL "nvvm-megakernel" OR TARGET_PLATFORM STREQUAL "amdgpu-megakernel"))
 19 |         set(CONVERTER_OPTIONS ${CONVERTER_OPTIONS} "--fusion")
 20 |     endif()
 21 | endif()
 22 | 
 23 | set(RODENT_SRCS
 24 |     core/color.impala
 25 |     core/common.impala
 26 |     core/cpu_common.impala
 27 |     core/matrix.impala
 28 |     core/random.impala
 29 |     core/sort.impala
 30 |     core/vector.impala
 31 |     render/image.impala
 32 |     render/camera.impala
 33 |     render/geometry.impala
 34 |     render/light.impala
 35 |     render/material.impala
 36 |     render/renderer.impala
 37 |     render/scene.impala
 38 |     render/driver.impala
 39 |     render/mapping_cpu.impala
 40 |     render/mapping_gpu.impala
 41 |     traversal/intersection.impala
 42 |     traversal/stack.impala
 43 |     traversal/mapping_cpu.impala
 44 |     traversal/mapping_gpu.impala)
 45 | 
 46 | set(DRIVER_SRCS
 47 |     driver/driver.cpp
 48 |     driver/interface.cpp
 49 |     driver/interface.h
 50 |     driver/obj.cpp
 51 |     driver/obj.h
 52 |     driver/image.cpp
 53 |     driver/image.h
 54 |     driver/bvh.h
 55 |     driver/float2.h
 56 |     driver/float3.h
 57 |     driver/float4.h
 58 |     driver/file_path.h
 59 |     driver/common.h
 60 |     driver/color.h)
 61 | 
 62 | set(CONVERTER_SRCS
 63 |     driver/converter.cpp
 64 |     driver/obj.cpp
 65 |     driver/obj.h
 66 |     driver/file_path.h
 67 |     driver/interface.h
 68 |     driver/bvh.h)
 69 | 
 70 | anydsl_runtime_wrap(RODENT_OBJS
 71 |     NAME "rodent"
 72 |     CLANG_FLAGS ${CLANG_FLAGS}
 73 |     IMPALA_FLAGS --log-level info
 74 |     FILES ${RODENT_SRCS} ${CMAKE_CURRENT_BINARY_DIR}/main.impala)
 75 | 
 76 | anydsl_runtime_wrap(DISCARD_TMP_OBJS
 77 |     NAME "interface"
 78 |     FILES ${RODENT_SRCS} dummy_main.impala
 79 |     INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/driver/interface)
 80 | 
 81 | if (NOT DISABLE_GUI)
 82 |     find_package(SDL2 REQUIRED)
 83 | endif()
 84 | find_package(PNG REQUIRED)
 85 | find_package(JPEG REQUIRED)
 86 | find_package(LZ4 REQUIRED)
 87 | 
 88 | add_executable(converter ${CONVERTER_SRCS})
 89 | target_include_directories(converter PUBLIC ${LZ4_INCLUDE_DIR})
 90 | target_link_libraries(converter ${LZ4_LIBRARY})
 91 | target_compile_definitions(converter PUBLIC -DCPUINFO_PATH="${CPUINFO_PATH}")
 92 | if (COLORIZE)
 93 |     target_compile_definitions(converter PUBLIC -DCOLORIZE)
 94 | endif()
 95 | if (EMBREE_FOUND)
 96 |     target_include_directories(converter PUBLIC ${EMBREE_ROOT_DIR}/include ${EMBREE_ROOT_DIR} ${EMBREE_LIBRARY_DIR})
 97 |     target_link_libraries(converter ${EMBREE_DEPENDENCIES})
 98 |     target_compile_definitions(converter PUBLIC ${EMBREE_DEFINITIONS} -DENABLE_EMBREE_BVH)
 99 | endif()
100 | 
101 | add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/main.impala
102 |     COMMAND converter ${SCENE_FILE} ${CONVERTER_OPTIONS} --max-path-len ${MAX_PATH_LEN} --samples-per-pixel ${SPP}
103 |     COMMAND ${CMAKE_COMMAND} -E rename ${CMAKE_BINARY_DIR}/main.impala ${CMAKE_CURRENT_BINARY_DIR}/main.impala
104 |     WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
105 |     DEPENDS ${SCENE_FILE} converter)
106 | 
107 | add_custom_target(convert DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/main.impala)
108 | 
109 | set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/main.impala PROPERTIES GENERATED TRUE)
110 | 
111 | add_library(driver ${DRIVER_SRCS})
112 | target_include_directories(driver PUBLIC ${LZ4_INCLUDE_DIR})
113 | if (EMBREE_FOUND)
114 |     target_include_directories(driver PUBLIC ${EMBREE_ROOT_DIR}/include ${EMBREE_ROOT_DIR} ${EMBREE_LIBRARY_DIR} ${PNG_INCLUDE_DIRS} ${JPEG_INCLUDE_DIRS} ${LZ4_INCLUDE_DIR})
115 |     target_link_libraries(driver ${EMBREE_DEPENDENCIES})
116 |     target_compile_definitions(driver PUBLIC -DENABLE_EMBREE_DEVICE)
117 | endif()
118 | if (NOT DISABLE_GUI)
119 |     target_include_directories(driver PUBLIC ${SDL2_INCLUDE_DIR})
120 |     target_link_libraries(driver ${SDL2_LIBRARY})
121 | else()
122 |     target_compile_definitions(driver PUBLIC -DDISABLE_GUI)
123 | endif()
124 | if (COLORIZE)
125 |     target_compile_definitions(driver PUBLIC -DCOLORIZE)
126 | endif()
127 | 
128 | add_executable(rodent ${RODENT_OBJS})
129 | target_link_libraries(rodent driver ${AnyDSL_runtime_LIBRARIES} ${PNG_LIBRARIES} ${JPEG_LIBRARIES} ${LZ4_LIBRARY})
130 | 
131 | if (SCENE_FILE STREQUAL "${PROJECT_SOURCE_DIR}/testing/cornell_box.obj")
132 |     # Test rodent when the cornell box is used
133 |     add_test(NAME rodent_cornell COMMAND ${CMAKE_COMMAND} -DRODENT=$<TARGET_FILE:rodent> -DIM_COMPARE=${ImageMagick_compare_EXECUTABLE} "-DRODENT_ARGS=--eye;0;1;2.7;--dir;0;0;-1;--up;0;1;0" -DTESTING_DIR=${PROJECT_SOURCE_DIR}/testing -DRODENT_DIR=${CMAKE_BINARY_DIR} -DRODENT_OUTPUT=rodent-cornell-output -P ${PROJECT_SOURCE_DIR}/cmake/test/run_rodent.cmake)
134 | endif()
135 | 


--------------------------------------------------------------------------------
/src/core/color.impala:
--------------------------------------------------------------------------------
 1 | struct Color {
 2 |     r: f32,
 3 |     g: f32,
 4 |     b: f32
 5 | }
 6 | 
 7 | fn @make_color(r: f32, g: f32, b: f32) -> Color {
 8 |     Color {
 9 |         r: r,
10 |         g: g,
11 |         b: b
12 |     }
13 | }
14 | 
15 | fn @color_add(a: Color, b: Color) -> Color {
16 |     make_color(a.r + b.r, a.g + b.g, a.b + b.b)
17 | }
18 | 
19 | fn @color_mul(a: Color, b: Color) -> Color {
20 |     make_color(a.r * b.r, a.g * b.g, a.b * b.b)
21 | }
22 | 
23 | fn @color_mulf(c: Color, f: f32) -> Color {
24 |     make_color(c.r * f, c.g * f, c.b * f)
25 | }
26 | 
27 | fn @color_lerp(a: Color, b: Color, t: f32) -> Color {
28 |     make_color((1.0f - t) * a.r + t * b.r,
29 |                (1.0f - t) * a.g + t * b.g,
30 |                (1.0f - t) * a.b + t * b.b)
31 | }
32 | 
33 | fn @color_luminance(c: Color) -> f32 {
34 |     c.r * 0.2126f + c.g * 0.7152f + c.b * 0.0722f
35 | }
36 | 
37 | fn @is_black(c: Color) -> bool {
38 |     c.r == 0.0f && c.g == 0.0f && c.b == 0.0f
39 | }
40 | 
41 | fn @vec3_to_color(v: Vec3) -> Color {
42 |     make_color(v.x, v.y, v.z)
43 | }
44 | 
45 | fn @color_to_vec3(c: Color) -> Vec3 {
46 |     make_vec3(c.r, c.g, c.b)
47 | }
48 | 
49 | static black = Color { r: 0.0f, g: 0.0f, b: 0.0f };
50 | static white = Color { r: 1.0f, g: 1.0f, b: 1.0f };
51 | static pink  = Color { r: 1.0f, g: 0.0f, b: 1.0f }; 
52 | 


--------------------------------------------------------------------------------
/src/core/common.impala:
--------------------------------------------------------------------------------
  1 | // Constants -----------------------------------------------------------------------
  2 | 
  3 | static flt_eps = 1.1920928955e-07f;
  4 | static flt_max = 3.4028234664e+38f;
  5 | static flt_min = 1.1754943509e-38f;
  6 | static flt_inf = 1.0f / 0.0f;
  7 | static flt_pi = 3.14159265359f;
  8 | static flt_sqrt2 = 1.41421356237f;
  9 | static flt_sqrt3 = 1.73205080757f;
 10 | 
 11 | // Fast division -------------------------------------------------------------------
 12 | 
 13 | struct FastDiv {
 14 |     m: u32,
 15 |     s1: u32,
 16 |     s2: u32
 17 | }
 18 | 
 19 | fn @make_fast_div(div: u32) -> FastDiv {
 20 |     let log = ilog2(div as i32) as u32;
 21 |     let max = 1u64 << 32u64;
 22 |     let m = (max << log as u64) / (div as u64) - max + 1u64;
 23 |     let s1 = select(log < 1u32, log, 1u32);
 24 |     let s2 = select(log > 1u32, log - 1u32, 0u32);
 25 |     FastDiv {
 26 |         m: m as u32,
 27 |         s1: s1,
 28 |         s2: s2
 29 |     }
 30 | }
 31 | 
 32 | fn @fast_div(fd: FastDiv, i: u32) -> u32 {
 33 |     let t = (((fd.m as u64) * (i as u64)) >> 32u64) as u32;
 34 |     (t + ((i - t) >> fd.s1)) >> fd.s2
 35 | }
 36 | 
 37 | // Fast power ----------------------------------------------------------------------
 38 | 
 39 | // Inspired from:
 40 | // http://www.machinedlearnings.com/2011/06/fast-approximate-logarithm-exponential.html
 41 | 
 42 | fn @fastlog2(x: f32) -> f32 {
 43 |     let vx = bitcast[u32](x);
 44 |     let mx = (vx & 0x007FFFFFu) | 0x3f000000u;
 45 |     let y = (vx as f32) * 1.1920928955078125e-7f;
 46 |     let z = bitcast[f32](mx);
 47 |     y - 124.22551499f - 1.498030302f * z - 1.72587999f / (0.3520887068f + z)
 48 | }
 49 | 
 50 | fn @fastpow2(p: f32) -> f32 {
 51 |     let offset = select(p < 0.0f, 1.0f, 0.0f);
 52 |     let clipp  = select(p < -126.0f, -126.0f, p);
 53 |     let w = clipp as i32;
 54 |     let z = clipp - (w as f32) + offset;
 55 |     let v = ((1u << 23u) as f32 * (clipp + 121.2740575f + 27.7280233f / (4.84252568f - z) - 1.49012907f * z)) as i32;
 56 |     bitcast[f32](v)
 57 | }
 58 | 
 59 | fn @fastpow(x: f32, p: f32) -> f32 {
 60 |     fastpow2(p * fastlog2(x))
 61 | }
 62 | 
 63 | // Likely/unlikely -----------------------------------------------------------------
 64 | 
 65 | extern "device" {
 66 |     fn "llvm.expect.i1" expect(bool, bool) -> bool;
 67 | }
 68 | 
 69 | fn @  likely(cond: bool) -> bool { expect(cond,  true) }
 70 | fn @unlikely(cond: bool) -> bool { expect(cond, false) }
 71 | 
 72 | // Misc. ---------------------------------------------------------------------------
 73 | 
 74 | fn @once(body: fn () -> ()) -> () {
 75 |     @@body()
 76 | }
 77 | 
 78 | fn @prodsign(x: f32, y: f32) -> f32 {
 79 |     bitcast[f32](bitcast[i32](x) ^ (bitcast[i32](y) & bitcast[i32](0x80000000u)))
 80 | }
 81 | 
 82 | fn @safe_rcp(x: f32) -> f32 {
 83 |     let min_rcp = 1e-8f;
 84 |     if select(x > 0.0f, x, -x) < min_rcp { prodsign(flt_max, x) } else { 1.0f / x }
 85 | }
 86 | 
 87 | fn @round_up(n: i32, d: i32) -> i32 {
 88 |     let m = n % d;
 89 |     n + (if m != 0 { d - m } else { 0 })
 90 | }
 91 | 
 92 | fn @round_down(n: i32, d: i32) -> i32 {
 93 |     (n / d) * d
 94 | }
 95 | 
 96 | fn @sqrt_newton(y: f32, p: f32) -> f32 {
 97 |     fn @(?x) newton(x: f32) -> f32 {
 98 |         if (x * x - y) > p {
 99 |             newton(x - (x * x - y) / (2.0f * x))
100 |         } else {
101 |             x
102 |         }
103 |     }
104 |     newton(1.0f)
105 | }
106 | 
107 | fn @ilog2(i: i32) -> i32 {
108 |     fn @(?i) ilog2_helper(i: i32, p: i32) -> i32 {
109 |         if i <= (1 << p) {
110 |             p
111 |         } else {
112 |             ilog2_helper(i, p + 1)
113 |         }
114 |     }
115 |     ilog2_helper(i, 0)
116 | }
117 | 
118 | fn @lerp(a: f32, b: f32, k: f32) -> f32 {
119 |     (1.0f - k) * a + k * b
120 | }
121 | 
122 | fn @lerp2(a: f32, b: f32, c: f32, k1: f32, k2: f32) -> f32 {
123 |     (1.0f - k1 - k2) * a + k1 * b + k2 * c
124 | }
125 | 
126 | fn @triangle_area(math: Intrinsics, v0: Vec3, v1: Vec3, v2: Vec3) -> f32 {
127 |     let e1 = vec3_sub(v1, v0);
128 |     let e2 = vec3_sub(v2, v0);
129 |     let  n = vec3_cross(e1, e2);
130 |     0.5f * vec3_len(math, n)
131 | }
132 | 
133 | fn @positive_cos(a: Vec3, b: Vec3) -> f32 {
134 |     let cos = vec3_dot(a, b);
135 |     if cos >= 0.0f { cos } else { 0.0f }
136 | }
137 | 
138 | fn @swap_f32(a: &mut f32, b: &mut f32) -> () {
139 |     let tmp = *a;
140 |     *a = *b;
141 |     *b = tmp;
142 | }
143 | 
144 | fn @swap_i32(a: &mut i32, b: &mut i32) -> () {
145 |     let tmp = *a;
146 |     *a = *b;
147 |     *b = tmp;
148 | }
149 | 
150 | fn @swap_u32(a: &mut u32, b: &mut u32) -> () {
151 |     let tmp = *a;
152 |     *a = *b;
153 |     *b = tmp;
154 | }
155 | 


--------------------------------------------------------------------------------
/src/core/cpu_common.impala:
--------------------------------------------------------------------------------
 1 | // Misc. ---------------------------------------------------------------------------
 2 | 
 3 | extern "C" {
 4 |     fn clock_us() -> i64;
 5 | }
 6 | 
 7 | static cpu_profiling_enabled = false;
 8 | static cpu_profiling_serial  = false;
 9 | 
10 | // Profiles the function given as argument
11 | fn @cpu_profile(counter: &mut i64, body: fn () -> ()) -> () {
12 |     if cpu_profiling_enabled {
13 |         let start = clock_us();
14 |         body();
15 |         let end = clock_us();
16 |         if cpu_profiling_serial {
17 |             *counter += end - start;
18 |         } else {
19 |             atomic(1u32, counter, end - start, 7u32, "");
20 |         }
21 |     } else {
22 |         body()
23 |     }
24 | }
25 | 
26 | // Iterate over the bit that are set in a mask (assumes that mask != 0)
27 | fn cpu_one_bits(mut mask: i32, @body: fn (i32) -> ()) -> () {
28 |     let lane = cpu_ctz32(mask, true);
29 |     @@body(lane);
30 |     mask &= mask - 1;
31 |     if mask != 0 {
32 |         cpu_one_bits(mask, body, return)
33 |     }
34 | }
35 | 
36 | // Performs a horizontal reduction over vector lanes
37 | fn @(?n) cpu_reduce(value: f32, n: i32, op: fn (f32, f32) -> f32) -> f32 {
38 |     if n >= 2 {
39 |         let m = n / 2;
40 |         cpu_reduce(op(value, rv_shuffle(value, m)), m, op)
41 |     } else {
42 |         value
43 |     }
44 | }
45 | 
46 | // Prefetches a chunk of memory
47 | fn @cpu_prefetch_bytes(ptr: &[u8], bytes: i32) -> () {
48 |     for i in unroll_step(0, bytes, 64) {
49 |         cpu_prefetch(&ptr(i),  0 /* read */, 3 /* closest locality */, 1 /* data */);
50 |     }
51 | }
52 | 
53 | // Returns the first vector lane index i for which value[i] == lane
54 | fn @cpu_index_of(value: f32, lane: f32) -> i32 {
55 |     cpu_ctz32(rv_ballot(value == lane), true)
56 | }
57 | 
58 | // Vectorizes an arbitrary range
59 | fn @vectorized_range(vector_width: i32, a: i32, b: i32, body: fn (i32, i32) -> ()) -> () {
60 |     if vector_width == 1 {
61 |         for i in range(a, b) {
62 |             body(i, 1);
63 |         }
64 |     } else {
65 |         let n_vec = round_down(b - a, vector_width);
66 |         for i in range_step(a, a + n_vec, vector_width) {
67 |             for j in vectorize(vector_width) {
68 |                 @@body(i + j, vector_width)
69 |             }
70 |         }
71 |         for i in range(a + n_vec, b) {
72 |             @@body(i, 1)
73 |         }
74 |     }
75 | }
76 | 


--------------------------------------------------------------------------------
/src/core/matrix.impala:
--------------------------------------------------------------------------------
  1 | struct Mat2x2 {
  2 |     col: [Vec2 * 2]
  3 | }
  4 | 
  5 | struct Mat3x3 {
  6 |     col: [Vec3 * 3]
  7 | }
  8 | 
  9 | struct Mat3x4 {
 10 |     col: [Vec3 * 4]
 11 | }
 12 | 
 13 | struct Mat4x4 {
 14 |     col: [Vec4 * 4]
 15 | }
 16 | 
 17 | fn @make_mat2x2(c0: Vec2, c1: Vec2) -> Mat2x2 {
 18 |     Mat2x2 {
 19 |         col: [c0, c1]
 20 |     }
 21 | }
 22 | 
 23 | fn @make_mat3x3(c0: Vec3, c1: Vec3, c2: Vec3) -> Mat3x3 {
 24 |     Mat3x3 {
 25 |         col: [c0, c1, c2]
 26 |     }
 27 | }
 28 | 
 29 | fn @make_orthonormal_mat3x3(n: Vec3) -> Mat3x3 {
 30 |     let sign = select(n.z >= 0.0f, 1.0f, -1.0f);
 31 |     let a = -1.0f / (sign + n.z);
 32 |     let b = n.x * n.y * a;
 33 | 
 34 |     let t  = make_vec3(1.0f + sign * n.x * n.x * a, sign * b, -sign * n.x);
 35 |     let bt = make_vec3(b, sign + n.y * n.y * a, -n.y);
 36 |     Mat3x3 {
 37 |         col: [t, bt, n]
 38 |     }
 39 | }
 40 | 
 41 | fn @make_mat3x4(c0: Vec3, c1: Vec3, c2: Vec3, c3: Vec3) -> Mat3x4 {
 42 |     Mat3x4 {
 43 |         col: [c0, c1, c2, c3]
 44 |     }
 45 | }
 46 | 
 47 | fn @make_mat4x4(c0: Vec4, c1: Vec4, c2: Vec4, c3: Vec4) -> Mat4x4 {
 48 |     Mat4x4 {
 49 |         col: [c0, c1, c2, c3]
 50 |     }
 51 | }
 52 | 
 53 | fn @mat2x2_row(m: Mat2x2, i: i32) -> Vec2 {
 54 |     [make_vec2(m.col(0).x, m.col(1).x),
 55 |      make_vec2(m.col(0).y, m.col(1).y)](i)
 56 | }
 57 | 
 58 | fn @mat3x3_row(m: Mat3x3, i: i32) -> Vec3 {
 59 |     [make_vec3(m.col(0).x, m.col(1).x, m.col(2).x),
 60 |      make_vec3(m.col(0).y, m.col(1).y, m.col(2).y),
 61 |      make_vec3(m.col(0).z, m.col(1).z, m.col(2).z)](i)
 62 | } 
 63 | 
 64 | fn @mat3x4_row(m: Mat3x4, i: i32) -> Vec4 {
 65 |     [make_vec4(m.col(0).x, m.col(1).x, m.col(2).x, m.col(3).x),
 66 |      make_vec4(m.col(0).y, m.col(1).y, m.col(2).y, m.col(3).y),
 67 |      make_vec4(m.col(0).z, m.col(1).z, m.col(2).z, m.col(3).z)](i)
 68 | } 
 69 | 
 70 | fn @mat4x4_row(m: Mat4x4, i: i32) -> Vec4 {
 71 |     [make_vec4(m.col(0).x, m.col(1).x, m.col(2).x, m.col(3).x),
 72 |      make_vec4(m.col(0).y, m.col(1).y, m.col(2).y, m.col(3).y),
 73 |      make_vec4(m.col(0).z, m.col(1).z, m.col(2).z, m.col(3).z),
 74 |      make_vec4(m.col(0).w, m.col(1).w, m.col(2).w, m.col(3).w)](i)
 75 | } 
 76 | 
 77 | fn @mat2x2_identity() -> Mat2x2 {
 78 |     make_mat2x2(make_vec2(1.0f, 0.0f),
 79 |                 make_vec2(0.0f, 1.0f))
 80 | }
 81 | 
 82 | fn @mat3x3_identity() -> Mat3x3 {
 83 |     make_mat3x3(make_vec3(1.0f, 0.0f, 0.0f),
 84 |                 make_vec3(0.0f, 1.0f, 0.0f),
 85 |                 make_vec3(0.0f, 0.0f, 1.0f))
 86 | }
 87 | 
 88 | fn @mat3x4_identity() -> Mat3x4 {
 89 |     make_mat3x4(make_vec3(1.0f, 0.0f, 0.0f),
 90 |                 make_vec3(0.0f, 1.0f, 0.0f),
 91 |                 make_vec3(0.0f, 0.0f, 1.0f),
 92 |                 make_vec3(0.0f, 0.0f, 0.0f))
 93 | }
 94 | 
 95 | fn @mat4x4_identity() -> Mat4x4 {
 96 |     make_mat4x4(make_vec4(1.0f, 0.0f, 0.0f, 0.0f),
 97 |                 make_vec4(0.0f, 1.0f, 0.0f, 0.0f),
 98 |                 make_vec4(0.0f, 0.0f, 1.0f, 0.0f),
 99 |                 make_vec4(0.0f, 0.0f, 0.0f, 1.0f))
100 | }
101 | 
102 | fn @mat2x2_mul(m: Mat2x2, v: Vec2) -> Vec2 {
103 |     make_vec2(vec2_dot(mat2x2_row(m, 0), v),
104 |               vec2_dot(mat2x2_row(m, 1), v))
105 | }
106 | 
107 | fn @mat3x3_mul(m: Mat3x3, v: Vec3) -> Vec3 {
108 |     make_vec3(vec3_dot(mat3x3_row(m, 0), v),
109 |               vec3_dot(mat3x3_row(m, 1), v),
110 |               vec3_dot(mat3x3_row(m, 2), v))
111 | }
112 | 
113 | fn @mat3x4_mul(m: Mat3x4, v: Vec4) -> Vec3 {
114 |     make_vec3(vec4_dot(mat3x4_row(m, 0), v),
115 |               vec4_dot(mat3x4_row(m, 1), v),
116 |               vec4_dot(mat3x4_row(m, 2), v))
117 | }
118 | 
119 | fn @mat4x4_mul(m: Mat4x4, v: Vec4) -> Vec4 {
120 |     make_vec4(vec4_dot(mat4x4_row(m, 0), v),
121 |               vec4_dot(mat4x4_row(m, 1), v),
122 |               vec4_dot(mat4x4_row(m, 2), v),
123 |               vec4_dot(mat4x4_row(m, 3), v))
124 | }
125 | 
126 | fn @mat2x2_matmul(a: Mat2x2, b: Mat2x2) -> Mat2x2 {
127 |     make_mat2x2(mat2x2_mul(a, b.col(0)),
128 |                 mat2x2_mul(a, b.col(1)))
129 | }
130 | 
131 | fn @mat3x3_matmul(a: Mat3x3, b: Mat3x3) -> Mat3x3 {
132 |     make_mat3x3(mat3x3_mul(a, b.col(0)),
133 |                 mat3x3_mul(a, b.col(1)),
134 |                 mat3x3_mul(a, b.col(2)))
135 | }
136 | 
137 | fn @mat4x4_matmul(a: Mat4x4, b: Mat4x4) -> Mat4x4 {
138 |     make_mat4x4(mat4x4_mul(a, b.col(0)),
139 |                 mat4x4_mul(a, b.col(1)),
140 |                 mat4x4_mul(a, b.col(2)),
141 |                 mat4x4_mul(a, b.col(3)))
142 | }
143 | 


--------------------------------------------------------------------------------
/src/core/random.impala:
--------------------------------------------------------------------------------
  1 | // Change these variables to use another random number generator
  2 | //type RndState = u64;
  3 | //static randi = mwc64x;
  4 | type RndState = u32;
  5 | static randi = xorshift;
  6 | 
  7 | fn @randf(rnd: &mut RndState) -> f32 {
  8 |     // Assumes IEEE 754 floating point format
  9 |     let x = randi(rnd) as u32;
 10 |     bitcast[f32]((127u32 << 23u32) | (x & 0x7FFFFFu32)) - 1.0f
 11 | }
 12 | 
 13 | // MWC64X: http://cas.ee.ic.ac.uk/people/dt10/research/rngs-gpu-mwc64x.html
 14 | fn @mwc64x(seed: &mut u64) -> i32 {
 15 |     let c = *seed >> 32u64;
 16 |     let x = *seed & 0xFFFFFFFFu64;
 17 |     *seed = x * 4294883355u64 + c;
 18 |     (x as i32)^(c as i32)
 19 | }
 20 | 
 21 | // 32-bit version of the xorshift random number generator
 22 | fn @xorshift(seed: &mut u32) -> i32 {
 23 |     let mut x = *seed;
 24 |     x = select(x == 0u32, 1u32, x);
 25 |     x ^= x << 13u32;
 26 |     x ^= x >> 17u32;
 27 |     x ^= x << 5u32;
 28 |     *seed = x;
 29 |     x as i32
 30 | }
 31 | 
 32 | // Result of sampling a direction
 33 | struct DirSample {
 34 |     dir: Vec3,
 35 |     pdf: f32
 36 | }
 37 | 
 38 | fn @make_dir_sample(math: Intrinsics, c: f32, s: f32, phi: f32, pdf: f32) -> DirSample {
 39 |     let x = s * math.cosf(phi);
 40 |     let y = s * math.sinf(phi);
 41 |     let z = c;
 42 |     DirSample {
 43 |         dir: make_vec3(x, y, z),
 44 |         pdf: pdf
 45 |     }
 46 | }
 47 | 
 48 | // Samples a point uniformly on a triangle
 49 | fn @sample_triangle(mut u: f32, mut v: f32, v0: Vec3, v1: Vec3, v2: Vec3) -> Vec3 {
 50 |     if (u + v > 1.0f) {
 51 |         u = 1.0f - u;
 52 |         v = 1.0f - v;
 53 |     }
 54 |     vec3_add(
 55 |         vec3_add(vec3_mulf(v0, 1.0f - v - u),
 56 |                  vec3_mulf(v1, u)),
 57 |         vec3_mulf(v2, v)
 58 |     )
 59 | }
 60 | 
 61 | // Probability density function for uniform sphere sampling
 62 | fn @uniform_sphere_pdf() -> f32 { 1.0f / (4.0f * flt_pi) }
 63 | 
 64 | // Samples a direction uniformly on a sphere
 65 | fn @sample_uniform_sphere(math: Intrinsics, u: f32, v: f32) -> DirSample {
 66 |     let c = 2.0f * v - 1.0f;
 67 |     let s = math.sqrtf(1.0f - c * c);
 68 |     let phi = 2.0f * flt_pi * u;
 69 |     make_dir_sample(math, c, s, phi, uniform_sphere_pdf())
 70 | }
 71 | 
 72 | // Probability density function for cosine weighted hemisphere sampling
 73 | fn @cosine_hemisphere_pdf(c: f32) -> f32 { c * (1.0f / flt_pi) }
 74 | 
 75 | // Samples a direction on a hemisphere proportionally to the cosine with the surface normal
 76 | fn @sample_cosine_hemisphere(math: Intrinsics, u: f32, v: f32) -> DirSample {
 77 |     let c = math.sqrtf(1.0f - v);
 78 |     let s = math.sqrtf(v);
 79 |     let phi = 2.0f * flt_pi * u;
 80 |     make_dir_sample(math, c, s, phi, cosine_hemisphere_pdf(c))
 81 | }
 82 | 
 83 | // Probability density function for cosine-power weighted hemisphere sampling
 84 | fn @cosine_power_hemisphere_pdf(math: Intrinsics, c: f32, k: f32) -> f32 {
 85 |     fastpow/*math.powf*/(c, k) * (k + 1.0f) * (1.0f / (2.0f * flt_pi))
 86 | }
 87 | 
 88 | // Samples a direction on a hemisphere proportionally to the power of the cosine with the surface normal
 89 | fn @sample_cosine_power_hemisphere(math: Intrinsics, k: f32, u: f32, v: f32) -> DirSample {
 90 |     let c = math.fminf(fastpow/*math.powf*/(v, 1.0f / (k + 1.0f)), 1.0f);
 91 |     let s = math.sqrtf(1.0f - c * c);
 92 |     let phi = 2.0f * flt_pi * u;
 93 |     // We have:
 94 |     // pow(c, k) = pow(pow(v, 1 / (k + 1)), k)
 95 |     //           = pow(v, k / (k + 1))
 96 |     //           = v * pow(v, -1 / (k + 1))
 97 |     //           = v / c
 98 |     let pow_c_k = select(c != 0.0f, v / c, 0.0f);
 99 |     let pdf = pow_c_k * (k + 1.0f) * (1.0f / (2.0f * flt_pi));
100 |     make_dir_sample(math, c, s, phi, pdf)
101 | }
102 | 
103 | // Initializer for Bernstein's hash function
104 | fn @bernstein_init() -> u32 { 5381u32 }
105 | 
106 | // Bernstein's hash function
107 | fn @bernstein_hash(mut h: u32, d: u32) -> u32 {
108 |     h = (h * 33u32) ^ ( d           & 0xFFu32);
109 |     h = (h * 33u32) ^ ((d >>  8u32) & 0xFFu32);
110 |     h = (h * 33u32) ^ ((d >> 16u32) & 0xFFu32);
111 |     h = (h * 33u32) ^ ((d >> 24u32) & 0xFFu32);
112 |     h
113 | }
114 | 
115 | // Initializer for the FNV hash function
116 | fn @fnv_init() -> u32 { 0x811C9DC5u32 }
117 | 
118 | // FNV hash function
119 | fn @fnv_hash(mut h: u32, d: u32) -> u32 {
120 |     h = (h * 16777619u32) ^ ( d           & 0xFFu32);
121 |     h = (h * 16777619u32) ^ ((d >>  8u32) & 0xFFu32);
122 |     h = (h * 16777619u32) ^ ((d >> 16u32) & 0xFFu32);
123 |     h = (h * 16777619u32) ^ ((d >> 24u32) & 0xFFu32);
124 |     h
125 | }
126 | 
127 | // Returns the probability to continue given the contribution of a path
128 | fn @russian_roulette(c: Color, clamp: f32) -> f32 {
129 |     let prob = 2.0f * color_luminance(c);
130 |     if prob > clamp { clamp } else { prob }
131 | }
132 | 


--------------------------------------------------------------------------------
/src/core/sort.impala:
--------------------------------------------------------------------------------
 1 | type SortingNetwork = fn (i32, fn (i32, i32) -> ()) -> ();
 2 | 
 3 | fn @bose_nelson_sort(n: i32, cmp_swap: fn (i32, i32) -> ()) -> () {
 4 |     fn @(?i & ?len) p_star(i: i32, len: i32) -> () {
 5 |         if len > 1 {
 6 |             let m = len / 2;
 7 |             p_star(i, m);
 8 |             p_star((i + m), (len - m));
 9 |             p_bracket(i, m, (i + m), (len - m));
10 |         }
11 |     }
12 | 
13 |     fn @(?i1 & ?len1 & ?i2 & ?len2) p_bracket(i1: i32, len1: i32, i2: i32, len2: i32) -> () {
14 |         if len1 == 1 && len2 == 1 {
15 |             cmp_swap(i1, i2);
16 |         } else if len1 == 1 && len2 == 2 {
17 |             cmp_swap(i1, i2 + 1);
18 |             cmp_swap(i1, i2);
19 |         } else if len1 == 2 && len2 == 1 {
20 |             cmp_swap(i1, i2);
21 |             cmp_swap(i1 + 1, i2);
22 |         } else {
23 |             let a = len1 / 2;
24 |             let b = select(len1 % 2 != 0, len2 / 2, (len2 + 1) / 2);
25 |             p_bracket(i1, a, i2, b);
26 |             p_bracket((i1 + a), (len1 - a), (i2 + b), (len2 - b));
27 |             p_bracket((i1 + a), (len1 - a), i2, b);
28 |         }
29 |     }
30 | 
31 |     p_star(0, n)
32 | }
33 | 
34 | fn @batcher_sort(n: i32, cmp_swap: fn (i32, i32) -> ()) -> () {
35 |     fn @(?i & ?len & ?r) merge(i: i32, len: i32, r: i32) -> () {
36 |         let step = r * 2;
37 |         if step < len {
38 |             merge(i, len, step);
39 |             merge(i + r, len, step);
40 |             for j in unroll_step(i + r, i + len - r, step) @{
41 |                 // Remove comparators for non-existing elements
42 |                 if j < n && j + r < n {
43 |                     cmp_swap(j, j + r)
44 |                 }
45 |             }
46 |         } else {
47 |             // idem
48 |             if i < n && i + r < n {
49 |                 cmp_swap(i, i + r);
50 |             }
51 |         }
52 |     }
53 | 
54 |     fn @(?i & ?len) sort(i: i32, len: i32) -> () {
55 |         if len > 1 {
56 |             let m = len / 2;
57 |             sort(i, m);
58 |             sort(i + m, m);
59 |             merge(i, len, 1)
60 |         }
61 |     }
62 | 
63 |     // Compute closest power of two
64 |     let p = 1 << ilog2(n);
65 |     sort(0, p)
66 | }
67 | 
68 | fn @bitonic_sort(n: i32, cmp_swap: fn (i32, i32) -> ()) -> () {
69 |     fn @(?i & ?len) merge(i: i32, len: i32, dir: bool) -> () {
70 |         if len > 1 {
71 |             // Compute greatest power of two lower than len
72 |             let m = 1 << (ilog2(len) - 1);
73 | 
74 |             for j in unroll(i, i + len - m) @{
75 |                 cmp_swap(select(dir, j, j + m), select(dir, j + m, j));
76 |             }
77 | 
78 |             merge(i, m, dir);
79 |             merge(i + m, len - m, dir);
80 |         }
81 |     }
82 | 
83 |     fn @(?i & ?len) sort(i: i32, len: i32, dir: bool) -> () {
84 |         if len > 1 {
85 |             let m = len / 2;
86 |             sort(i, m, !dir);
87 |             sort(i + m, len - m, dir);
88 |             merge(i, len, dir);
89 |         }
90 |     }
91 | 
92 |     sort(0, n, true)
93 | }
94 | 


--------------------------------------------------------------------------------
/src/core/vector.impala:
--------------------------------------------------------------------------------
  1 | struct Vec2 {
  2 |     x: f32,
  3 |     y: f32
  4 | }
  5 | 
  6 | struct Vec3 {
  7 |     x: f32,
  8 |     y: f32,
  9 |     z: f32
 10 | }
 11 | 
 12 | struct Vec4 {
 13 |     x: f32,
 14 |     y: f32,
 15 |     z: f32,
 16 |     w: f32
 17 | }
 18 | 
 19 | fn @make_vec2(x: f32, y: f32) -> Vec2 { Vec2 { x: x, y: y } }
 20 | fn @make_vec3(x: f32, y: f32, z: f32) -> Vec3 { Vec3 { x: x, y: y, z: z } }
 21 | fn @make_vec4(x: f32, y: f32, z: f32, w: f32) -> Vec4 { Vec4 { x: x, y: y, z: z, w: w } }
 22 | 
 23 | fn @vec2_to_3(v: Vec2, z: f32) -> Vec3 { make_vec3(v.x, v.y, z) }
 24 | fn @vec2_to_4(v: Vec2, z: f32, w: f32) -> Vec4 { make_vec4(v.x, v.y, z, w) }
 25 | fn @vec3_to_2(v: Vec3) -> Vec2 { make_vec2(v.x, v.y) }
 26 | fn @vec3_to_4(v: Vec3, w: f32) -> Vec4 { make_vec4(v.x, v.y, v.z, w) }
 27 | fn @vec4_to_3(v: Vec4) -> Vec3 { make_vec3(v.x, v.y, v.z) }
 28 | fn @vec4_to_2(v: Vec4) -> Vec2 { make_vec2(v.x, v.y) }
 29 | 
 30 | fn @vec2_map(v: Vec2, f: fn (f32) -> f32) -> Vec2 { make_vec2(@@f(v.x), @@f(v.y)) }
 31 | fn @vec3_map(v: Vec3, f: fn (f32) -> f32) -> Vec3 { make_vec3(@@f(v.x), @@f(v.y), @@f(v.z)) }
 32 | fn @vec4_map(v: Vec4, f: fn (f32) -> f32) -> Vec4 { make_vec4(@@f(v.x), @@f(v.y), @@f(v.z), @@f(v.w)) }
 33 | 
 34 | fn @vec2_zip(a: Vec2, b: Vec2, f: fn (f32, f32) -> f32) -> Vec2 { make_vec2(@@f(a.x, b.x), @@f(a.y, b.y)) }
 35 | fn @vec3_zip(a: Vec3, b: Vec3, f: fn (f32, f32) -> f32) -> Vec3 { make_vec3(@@f(a.x, b.x), @@f(a.y, b.y), @@f(a.z, b.z)) }
 36 | fn @vec4_zip(a: Vec4, b: Vec4, f: fn (f32, f32) -> f32) -> Vec4 { make_vec4(@@f(a.x, b.x), @@f(a.y, b.y), @@f(a.z, b.z), @@f(a.w, b.w)) }
 37 | 
 38 | fn @vec2_add(a: Vec2, b: Vec2) -> Vec2 { vec2_zip(a, b, |x, y| x + y) }
 39 | fn @vec3_add(a: Vec3, b: Vec3) -> Vec3 { vec3_zip(a, b, |x, y| x + y) }
 40 | fn @vec4_add(a: Vec4, b: Vec4) -> Vec4 { vec4_zip(a, b, |x, y| x + y) }
 41 | fn @vec2_sub(a: Vec2, b: Vec2) -> Vec2 { vec2_zip(a, b, |x, y| x - y) }
 42 | fn @vec3_sub(a: Vec3, b: Vec3) -> Vec3 { vec3_zip(a, b, |x, y| x - y) }
 43 | fn @vec4_sub(a: Vec4, b: Vec4) -> Vec4 { vec4_zip(a, b, |x, y| x - y) }
 44 | fn @vec2_mul(a: Vec2, b: Vec2) -> Vec2 { vec2_zip(a, b, |x, y| x * y) }
 45 | fn @vec3_mul(a: Vec3, b: Vec3) -> Vec3 { vec3_zip(a, b, |x, y| x * y) }
 46 | fn @vec4_mul(a: Vec4, b: Vec4) -> Vec4 { vec4_zip(a, b, |x, y| x * y) }
 47 | fn @vec2_div(a: Vec2, b: Vec2) -> Vec2 { vec2_zip(a, b, |x, y| x / y) }
 48 | fn @vec3_div(a: Vec3, b: Vec3) -> Vec3 { vec3_zip(a, b, |x, y| x / y) }
 49 | fn @vec4_div(a: Vec4, b: Vec4) -> Vec4 { vec4_zip(a, b, |x, y| x / y) }
 50 | 
 51 | fn @vec2_neg(v: Vec2) -> Vec2 { vec2_map(v, |x| -x) }
 52 | fn @vec3_neg(v: Vec3) -> Vec3 { vec3_map(v, |x| -x) }
 53 | fn @vec4_neg(v: Vec4) -> Vec4 { vec4_map(v, |x| -x) }
 54 | 
 55 | fn @vec2_mulf(v: Vec2, t: f32) -> Vec2 { vec2_mul(v, make_vec2(t, t)) }
 56 | fn @vec3_mulf(v: Vec3, t: f32) -> Vec3 { vec3_mul(v, make_vec3(t, t, t)) }
 57 | fn @vec4_mulf(v: Vec4, t: f32) -> Vec4 { vec4_mul(v, make_vec4(t, t, t, t)) }
 58 | 
 59 | fn @vec2_dot(a: Vec2, b: Vec2) -> f32 { a.x * b.x + a.y * b.y }
 60 | fn @vec3_dot(a: Vec3, b: Vec3) -> f32 { a.x * b.x + a.y * b.y + a.z * b.z }
 61 | fn @vec4_dot(a: Vec4, b: Vec4) -> f32 { a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w }
 62 | 
 63 | fn @vec3_cross(a: Vec3, b: Vec3) -> Vec3 {
 64 |     make_vec3(a.y * b.z - a.z * b.y,
 65 |               a.z * b.x - a.x * b.z,
 66 |               a.x * b.y - a.y * b.x)
 67 | }
 68 | 
 69 | fn @vec2_len2(v: Vec2) -> f32 { vec2_dot(v, v) }
 70 | fn @vec3_len2(v: Vec3) -> f32 { vec3_dot(v, v) }
 71 | fn @vec4_len2(v: Vec4) -> f32 { vec4_dot(v, v) }
 72 | 
 73 | fn @vec2_reflect(v: Vec2, n: Vec2) -> Vec2 { vec2_sub(vec2_mulf(n, 2.0f * vec2_dot(n, v)), v) }
 74 | fn @vec3_reflect(v: Vec3, n: Vec3) -> Vec3 { vec3_sub(vec3_mulf(n, 2.0f * vec3_dot(n, v)), v) }
 75 | fn @vec4_reflect(v: Vec4, n: Vec4) -> Vec4 { vec4_sub(vec4_mulf(n, 2.0f * vec4_dot(n, v)), v) }
 76 | 
 77 | fn @vec2_len(math: Intrinsics, v: Vec2) -> f32 { math.sqrtf(vec2_len2(v)) }
 78 | fn @vec3_len(math: Intrinsics, v: Vec3) -> f32 { math.sqrtf(vec3_len2(v)) }
 79 | fn @vec4_len(math: Intrinsics, v: Vec4) -> f32 { math.sqrtf(vec4_len2(v)) }
 80 | 
 81 | fn @vec2_normalize(math: Intrinsics, v: Vec2) -> Vec2 { vec2_mulf(v, 1.0f / vec2_len(math, v)) }
 82 | fn @vec3_normalize(math: Intrinsics, v: Vec3) -> Vec3 { vec3_mulf(v, 1.0f / vec3_len(math, v)) }
 83 | fn @vec4_normalize(math: Intrinsics, v: Vec4) -> Vec4 { vec4_mulf(v, 1.0f / vec4_len(math, v)) }
 84 | 
 85 | fn @vec2_lerp(a: Vec2, b: Vec2, k: f32) -> Vec2 { vec2_zip(a, b, |x, y| lerp(x, y, k)) }
 86 | fn @vec3_lerp(a: Vec3, b: Vec3, k: f32) -> Vec3 { vec3_zip(a, b, |x, y| lerp(x, y, k)) }
 87 | fn @vec4_lerp(a: Vec4, b: Vec4, k: f32) -> Vec4 { vec4_zip(a, b, |x, y| lerp(x, y, k)) }
 88 | 
 89 | fn @vec2_lerp2(a: Vec2, b: Vec2, c: Vec2, u: f32, v: f32) -> Vec2 {
 90 |     Vec2 {
 91 |         x: lerp2(a.x, b.x, c.x, u, v),
 92 |         y: lerp2(a.y, b.y, c.y, u, v)
 93 |     }
 94 | }
 95 | fn @vec3_lerp2(a: Vec3, b: Vec3, c: Vec3, u: f32, v: f32) -> Vec3 {
 96 |     Vec3 {
 97 |         x: lerp2(a.x, b.x, c.x, u, v),
 98 |         y: lerp2(a.y, b.y, c.y, u, v),
 99 |         z: lerp2(a.z, b.z, c.z, u, v)
100 |     }
101 | }
102 | fn @vec4_lerp2(a: Vec4, b: Vec4, c: Vec4, u: f32, v: f32) -> Vec4 {
103 |     Vec4 {
104 |         x: lerp2(a.x, b.x, c.x, u, v),
105 |         y: lerp2(a.y, b.y, c.y, u, v),
106 |         z: lerp2(a.z, b.z, c.z, u, v),
107 |         w: lerp2(a.w, b.w, c.w, u, v)
108 |     }
109 | }
110 | 
111 | fn @vec2_min(a: Vec2, b: Vec2) -> Vec2 { vec2_zip(a, b, |x, y| select(x < y, x, y)) }
112 | fn @vec3_min(a: Vec3, b: Vec3) -> Vec3 { vec3_zip(a, b, |x, y| select(x < y, x, y)) }
113 | fn @vec4_min(a: Vec4, b: Vec4) -> Vec4 { vec4_zip(a, b, |x, y| select(x < y, x, y)) }
114 | 
115 | fn @vec2_max(a: Vec2, b: Vec2) -> Vec2 { vec2_zip(a, b, |x, y| select(x > y, x, y)) }
116 | fn @vec3_max(a: Vec3, b: Vec3) -> Vec3 { vec3_zip(a, b, |x, y| select(x > y, x, y)) }
117 | fn @vec4_max(a: Vec4, b: Vec4) -> Vec4 { vec4_zip(a, b, |x, y| select(x > y, x, y)) }
118 | 


--------------------------------------------------------------------------------
/src/driver/bbox.h:
--------------------------------------------------------------------------------
 1 | #ifndef BBOX_H
 2 | #define BBOX_H
 3 | 
 4 | #include <cfloat>
 5 | #include <algorithm>
 6 | #include "float3.h"
 7 | 
 8 | /// Bounding box represented by its two extreme points.
 9 | struct BBox {
10 |     float3 min, max;
11 | 
12 |     BBox() {}
13 |     BBox(const float3& f) : min(f), max(f) {}
14 |     BBox(const float3& min, const float3& max) : min(min), max(max) {}
15 | 
16 |     BBox& extend(const BBox& bb) {
17 |         min = ::min(min, bb.min);
18 |         max = ::max(max, bb.max);
19 |         return *this;
20 |     }
21 | 
22 |     BBox& extend(const float3& v) {
23 |         min = ::min(min, v);
24 |         max = ::max(max, v);
25 |         return *this;
26 |     }
27 | 
28 |     float half_area() const {
29 |         const float3 len = max - min;
30 |         const float kx = std::max(len.x, 0.0f);
31 |         const float ky = std::max(len.y, 0.0f);
32 |         const float kz = std::max(len.z, 0.0f);
33 |         return kx * (ky + kz) + ky * kz;
34 |     }
35 | 
36 |     BBox& overlap(const BBox& bb) {
37 |         min = ::max(min, bb.min);
38 |         max = ::min(max, bb.max);
39 |         return *this;
40 |     }
41 | 
42 |     bool is_empty() const {
43 |         return min.x > max.x ||
44 |                min.y > max.y ||
45 |                min.z > max.z;
46 |     }
47 | 
48 | 
49 |     bool is_inside(const float3& v) const {
50 |         return v.x >= min.x && v.y >= min.y && v.z >= min.z &&
51 |                v.x <= max.x && v.y <= max.y && v.z <= max.z;
52 |     }
53 | 
54 |     bool is_overlapping(const BBox& bb) const {
55 |         return min.x <= bb.max.x && max.x >= bb.min.x &&
56 |                min.y <= bb.max.y && max.y >= bb.min.y &&
57 |                min.z <= bb.max.z && max.z >= bb.min.z;
58 |     }
59 | 
60 |     static BBox empty() { return BBox(float3(FLT_MAX), float3(-FLT_MAX)); }
61 |     static BBox full() { return BBox(float3(-FLT_MAX), float3(FLT_MAX)); }
62 | };
63 | 
64 | #endif // BBOX_H
65 | 


--------------------------------------------------------------------------------
/src/driver/buffer.h:
--------------------------------------------------------------------------------
 1 | #ifndef BUFFER_H
 2 | #define BUFFER_H
 3 | 
 4 | #include <ostream>
 5 | #include <istream>
 6 | #include <fstream>
 7 | 
 8 | #include <lz4.h>
 9 | 
10 | static void skip_buffer(std::istream& is) {
11 |     size_t in_size = 0, out_size = 0;
12 |     is.read((char*)&in_size,  sizeof(uint32_t));
13 |     is.read((char*)&out_size, sizeof(uint32_t));
14 |     is.seekg(out_size, std::ios::cur);
15 | }
16 | 
17 | template <typename Array>
18 | static void decompress(const std::vector<char>& in, Array& out) {
19 |     LZ4_decompress_safe(in.data(), (char*)out.data(), in.size(), out.size() * sizeof(out[0]));
20 | }
21 | 
22 | template <typename Array>
23 | static void read_buffer(std::istream& is, Array& array) {
24 |     size_t in_size = 0, out_size = 0;
25 |     is.read((char*)&in_size,  sizeof(uint32_t));
26 |     is.read((char*)&out_size, sizeof(uint32_t));
27 |     std::vector<char> in(out_size);
28 |     is.read(in.data(), in.size());
29 |     array = std::move(Array(in_size / sizeof(array[0])));
30 |     decompress(in, array);
31 | }
32 | 
33 | template <typename Array>
34 | static void read_buffer(const std::string& file_name, Array& array) {
35 |     std::ifstream is(file_name, std::ios::binary);
36 |     read_buffer(is, array);
37 | }
38 | 
39 | template <typename Array>
40 | static void compress(const Array& in, std::vector<char>& out) {
41 |     size_t in_size = sizeof(in[0]) * in.size();
42 |     out.resize(LZ4_compressBound(in_size));
43 |     out.resize(LZ4_compress_default((const char*)in.data(), out.data(), in_size, out.size()));
44 | }
45 | 
46 | template <typename Array>
47 | static void write_buffer(std::ostream& os, const Array& array) {
48 |     std::vector<char> out;
49 |     compress(array, out);
50 |     size_t in_size  = sizeof(array[0]) * array.size();
51 |     size_t out_size = out.size();
52 |     os.write((char*)&in_size,  sizeof(uint32_t));
53 |     os.write((char*)&out_size, sizeof(uint32_t));
54 |     os.write(out.data(), out.size());
55 | }
56 | 
57 | template <typename Array>
58 | static void write_buffer(const std::string& file_name, const Array& array) {
59 |     std::ofstream of(file_name, std::ios::binary);
60 |     write_buffer(of, array);
61 | }
62 | 
63 | #endif // BUFFER_H
64 | 


--------------------------------------------------------------------------------
/src/driver/color.h:
--------------------------------------------------------------------------------
 1 | #ifndef COLOR_H
 2 | #define COLOR_H
 3 | 
 4 | #include "float3.h"
 5 | #include "float4.h"
 6 | 
 7 | struct rgba;
 8 | 
 9 | struct rgb : public float3 {
10 |     rgb() {}
11 |     rgb(const float3& rgb) : float3(rgb) {}
12 |     rgb(float r, float g, float b) : float3(r, g, b) {}
13 |     explicit rgb(float x) : float3(x) {}
14 |     explicit rgb(const rgba& rgba);
15 | 
16 |     rgb& operator += (const rgb& p) {
17 |         *this = *this + p;
18 |         return *this;
19 |     }
20 | };
21 | 
22 | struct rgba : public float4 {
23 |     rgba() {}
24 |     rgba(const float4& rgba) : float4(rgba) {}
25 |     rgba(float r, float g, float b, float a) : float4(r, g, b, a) {}
26 |     explicit rgba(float x) : float4(x) {}
27 |     explicit rgba(const rgb& rgb, float a) : float4(rgb, a) {}
28 | 
29 |     rgba& operator += (const rgba& p) {
30 |         *this = *this + p;
31 |         return *this;
32 |     }
33 | };
34 | 
35 | inline rgb::rgb(const rgba& rgba) : float3(rgba) {}
36 | 
37 | inline rgb gamma(const rgb& c, float g = 0.5f) {
38 |     return rgb(std::pow(c.x, g), std::pow(c.y, g), std::pow(c.z, g));
39 | }
40 | 
41 | inline rgba gamma(const rgba& c, float g = 0.5f) {
42 |     return rgba(std::pow(c.x, g), std::pow(c.y, g), std::pow(c.z, g), c.w);
43 | }
44 | 
45 | inline rgb clamp(const rgb& val, const rgb& min, const rgb& max) {
46 |     return rgb(clamp(val.x, min.x, max.x),
47 |                clamp(val.y, min.y, max.y),
48 |                clamp(val.z, min.z, max.z));
49 | }
50 | 
51 | inline rgba clamp(const rgba& val, const rgba& min, const rgba& max) {
52 |     return rgba(clamp(val.x, min.x, max.x),
53 |                 clamp(val.y, min.y, max.y),
54 |                 clamp(val.z, min.z, max.z),
55 |                 clamp(val.w, min.w, max.w));
56 | }
57 | 
58 | #endif // COLOR_H
59 | 


--------------------------------------------------------------------------------
/src/driver/common.h:
--------------------------------------------------------------------------------
 1 | #ifndef COMMON_H
 2 | #define COMMON_H
 3 | 
 4 | #include <iostream>
 5 | #include <cstdlib>
 6 | #include <cstdint>
 7 | #include <random>
 8 | 
 9 | // Round to the integer above the division.
10 | inline uint32_t round_up(uint32_t val, uint32_t div) {
11 |     auto mod = val % div;
12 |     return val + (mod ? div - mod : 0);
13 | }
14 | 
15 | /// Clamps a between b and c.
16 | template <typename T>
17 | inline T clamp(T a, T b, T c) {
18 |     return (a < b) ? b : ((a > c) ? c : a);
19 | }
20 | 
21 | /// Returns the integer that is greater or equal to the logarithm base 2 of the argument.
22 | template <typename T>
23 | inline T closest_log2(T i) {
24 |     T p = 1, q = 0;
25 |     while (i > p) p <<= 1, q++;
26 |     return q;
27 | }
28 | 
29 | /// Reinterprets a floating point number as an integer.
30 | inline int32_t float_as_int(float f) {
31 |     union { float vf; int32_t vi; } v;
32 |     v.vf = f;
33 |     return v.vi;
34 | }
35 | 
36 | /// Reinterprets an integer as a floating point number.
37 | inline float int_as_float(int32_t i) {
38 |     union { float vf; int32_t vi; } v;
39 |     v.vi = i;
40 |     return v.vf;
41 | }
42 | 
43 | inline void error [[noreturn]] () {
44 |     std::cerr << std::endl;
45 |     abort();
46 | }
47 | 
48 | /// Outputs an error message in the console.
49 | template <typename T, typename... Args>
50 | inline void error [[noreturn]] (T t, Args... args) {
51 | #if COLORIZE
52 |     std::cerr << "\033[1;31m";
53 | #endif
54 |     std::cerr << t;
55 | #if COLORIZE
56 |     std::cerr << "\033[0m";
57 | #endif
58 |     error(args...);
59 | }
60 | 
61 | inline void info() {
62 |     std::cout << std::endl;
63 | }
64 | 
65 | /// Outputs an information message in the console.
66 | template <typename T, typename... Args>
67 | inline void info(T t, Args... args) {
68 |     std::cout << t;
69 |     info(args...);
70 | }
71 | 
72 | inline void warn() {
73 |     std::clog << std::endl;
74 | }
75 | 
76 | /// Outputs an warning message in the console.
77 | template <typename T, typename... Args>
78 | inline void warn(T t, Args... args) {
79 | #if COLORIZE
80 |     std::clog << "\033[1;33m";
81 | #endif
82 |     std::clog << t;
83 | #if COLORIZE
84 |     std::clog << "\033[0m";
85 | #endif
86 |     warn(args...);
87 | }
88 | 
89 | #endif // COMMON_H
90 | 


--------------------------------------------------------------------------------
/src/driver/file_path.h:
--------------------------------------------------------------------------------
 1 | #ifndef FILE_PATH_H
 2 | #define FILE_PATH_H
 3 | 
 4 | #include <string>
 5 | #include <algorithm>
 6 | 
 7 | /// Represents a path in the file system.
 8 | class FilePath {
 9 | public:
10 |     FilePath(const std::string& path)
11 |         : path_(path)
12 |     {
13 |         std::replace(path_.begin(), path_.end(), '\\', '/');
14 |         auto pos = path_.rfind('/');
15 |         base_ = (pos != std::string::npos) ? path_.substr(0, pos)  : ".";
16 |         file_ = (pos != std::string::npos) ? path_.substr(pos + 1) : path_;
17 |     }
18 | 
19 |     const std::string& path() const { return path_; }
20 |     const std::string& base_name() const { return base_; }
21 |     const std::string& file_name() const { return file_; }
22 | 
23 |     std::string extension() const {
24 |         auto pos = file_.rfind('.');
25 |         return (pos != std::string::npos) ? file_.substr(pos + 1) : std::string();
26 |     }
27 | 
28 |     std::string remove_extension() const {
29 |         auto pos = file_.rfind('.');
30 |         return (pos != std::string::npos) ? file_.substr(0, pos) : file_;
31 |     }
32 | 
33 |     operator const std::string& () const {
34 |         return path();
35 |     }
36 | 
37 | private:
38 |     std::string path_;
39 |     std::string base_;
40 |     std::string file_;
41 | };
42 | 
43 | #endif // FILE_PATH_H
44 | 


--------------------------------------------------------------------------------
/src/driver/float2.h:
--------------------------------------------------------------------------------
  1 | #ifndef FLOAT2_H
  2 | #define FLOAT2_H
  3 | 
  4 | #include <cmath>
  5 | #include "common.h"
  6 | 
  7 | struct float3;
  8 | struct float4;
  9 | 
 10 | struct float2 {
 11 |     union {
 12 |         struct { float x, y; };
 13 |         float values[2];
 14 |     };
 15 | 
 16 |     float2() {}
 17 |     explicit float2(float x) : x(x), y(x) {}
 18 |     explicit float2(const float3& xy);
 19 |     explicit float2(const float4& xy);
 20 |     float2(float x, float y) : x(x), y(y) {}
 21 | 
 22 |     bool operator == (const float2& other) const {
 23 |         return x == other.x && y == other.y;
 24 |     }
 25 | 
 26 |     bool operator != (const float2& other) const {
 27 |         return x != other.x || y != other.y;
 28 |     }
 29 | 
 30 |     float operator [] (size_t i) const { return values[i]; }
 31 |     float& operator [] (size_t i) { return values[i]; }
 32 | 
 33 |     float2& operator += (const float2& a) {
 34 |         x += a.x; y += a.y;
 35 |         return *this;
 36 |     }
 37 | 
 38 |     float2& operator -= (const float2& a) {
 39 |         x -= a.x; y -= a.y;
 40 |         return *this;
 41 |     }
 42 | 
 43 |     float2& operator *= (float a) {
 44 |         x *= a; y *= a;
 45 |         return *this;
 46 |     }
 47 | 
 48 |     float2& operator *= (const float2& a) {
 49 |         x *= a.x; y *= a.y;
 50 |         return *this;
 51 |     }
 52 | };
 53 | 
 54 | inline float2 operator * (float a, const float2& b) {
 55 |     return float2(a * b.x, a * b.y);
 56 | }
 57 | 
 58 | inline float2 operator * (const float2& a, float b) {
 59 |     return float2(a.x * b, a.y * b);
 60 | }
 61 | 
 62 | inline float2 operator / (const float2& a, float b) {
 63 |     return a * (1.0f / b);
 64 | }
 65 | 
 66 | inline float2 operator - (const float2& a, const float2& b) {
 67 |     return float2(a.x - b.x, a.y - b.y);
 68 | }
 69 | 
 70 | inline float2 operator + (const float2& a, const float2& b) {
 71 |     return float2(a.x + b.x, a.y + b.y);
 72 | }
 73 | 
 74 | inline float2 operator * (const float2& a, const float2& b) {
 75 |     return float2(a.x * b.x, a.y * b.y);
 76 | }
 77 | 
 78 | inline float2 min(const float2& a, const float2& b) {
 79 |     return float2(a.x < b.x ? a.x : b.x,
 80 |                   a.y < b.y ? a.y : b.y);
 81 | }
 82 | 
 83 | inline float2 max(const float2& a, const float2& b) {
 84 |     return float2(a.x > b.x ? a.x : b.x,
 85 |                   a.y > b.y ? a.y : b.y);
 86 | }
 87 | 
 88 | inline float dot(const float2& a, const float2& b) {
 89 |     return a.x * b.x + a.y * b.y;
 90 | }
 91 | 
 92 | inline float lensqr(const float2& a) {
 93 |     return dot(a, a);
 94 | }
 95 | 
 96 | inline float length(const float2& a) {
 97 |     return std::sqrt(dot(a, a));
 98 | }
 99 | 
100 | inline float2 normalize(const float2& a) {
101 |     return a * (1.0f / length(a));
102 | }
103 | 
104 | #endif // FLOAT2_H
105 | 


--------------------------------------------------------------------------------
/src/driver/float3.h:
--------------------------------------------------------------------------------
  1 | #ifndef FLOAT3_H
  2 | #define FLOAT3_H
  3 | 
  4 | #include <cmath>
  5 | #include "common.h"
  6 | #include "float2.h"
  7 | 
  8 | struct float4;
  9 | 
 10 | struct float3 {
 11 |     union {
 12 |         struct { float x, y, z; };
 13 |         float values[3];
 14 |     };
 15 | 
 16 |     float3() {}
 17 |     explicit float3(float x) : x(x), y(x), z(x) {}
 18 |     explicit float3(const float4& xyz);
 19 |     float3(float x, float y, float z) : x(x), y(y), z(z) {}
 20 |     float3(const float2& xy, float z) : x(xy.x), y(xy.y), z(z) {}
 21 |     float3(float x, const float2& yz) : x(x), y(yz.x), z(yz.y) {}
 22 | 
 23 |     bool operator == (const float3& other) const {
 24 |         return x == other.x && y == other.y && z == other.z;
 25 |     }
 26 | 
 27 |     bool operator != (const float3& other) const {
 28 |         return x != other.x || y != other.y || z != other.z;
 29 |     }
 30 | 
 31 |     float operator [] (size_t i) const { return values[i]; }
 32 |     float& operator [] (size_t i) { return values[i]; }
 33 | 
 34 |     float3& operator += (const float3& a) {
 35 |         x += a.x; y += a.y; z += a.z;
 36 |         return *this;
 37 |     }
 38 | 
 39 |     float3& operator -= (const float3& a) {
 40 |         x -= a.x; y -= a.y; z -= a.z;
 41 |         return *this;
 42 |     }
 43 | 
 44 |     float3& operator *= (float a) {
 45 |         x *= a; y *= a; z *= a;
 46 |         return *this;
 47 |     }
 48 | 
 49 |     float3& operator *= (const float3& a) {
 50 |         x *= a.x; y *= a.y; z *= a.z;
 51 |         return *this;
 52 |     }
 53 | };
 54 | 
 55 | inline float2::float2(const float3& xy)
 56 |     : x(xy.x), y(xy.y)
 57 | {}
 58 | 
 59 | inline float3 operator * (float a, const float3& b) {
 60 |     return float3(a * b.x, a * b.y, a * b.z);
 61 | }
 62 | 
 63 | inline float3 operator * (const float3& a, float b) {
 64 |     return float3(a.x * b, a.y * b, a.z * b);
 65 | }
 66 | 
 67 | inline float3 operator / (const float3& a, float b) {
 68 |     return a * (1.0f / b);
 69 | }
 70 | 
 71 | inline float3 operator - (const float3& a, const float3& b) {
 72 |     return float3(a.x - b.x, a.y - b.y, a.z - b.z);
 73 | }
 74 | 
 75 | inline float3 operator - (const float3& a) {
 76 |     return float3(-a.x, -a.y, -a.z);
 77 | }
 78 | 
 79 | inline float3 operator + (const float3& a, const float3& b) {
 80 |     return float3(a.x + b.x, a.y + b.y, a.z + b.z);
 81 | }
 82 | 
 83 | inline float3 operator * (const float3& a, const float3& b) {
 84 |     return float3(a.x * b.x, a.y * b.y, a.z * b.z);
 85 | }
 86 | 
 87 | inline float3 operator / (const float3& a, const float3& b) {
 88 |     return float3(a.x / b.x, a.y / b.y, a.z / b.z);
 89 | }
 90 | 
 91 | inline float3 cross(const float3& a, const float3& b) {
 92 |     return float3(a.y * b.z - a.z * b.y,
 93 |                   a.z * b.x - a.x * b.z,
 94 |                   a.x * b.y - a.y * b.x);
 95 | }
 96 | 
 97 | inline float3 rotate(const float3& v, const float3& axis, float angle) {
 98 |     float q[4];
 99 |     q[0] = axis.x * sinf(angle / 2);
100 |     q[1] = axis.y * sinf(angle / 2);
101 |     q[2] = axis.z * sinf(angle / 2);
102 |     q[3] = std::cos(angle / 2);
103 | 
104 |     float p[4];
105 |     p[0] = q[3] * v.x + q[1] * v.z - q[2] * v.y;
106 |     p[1] = q[3] * v.y - q[0] * v.z + q[2] * v.x;
107 |     p[2] = q[3] * v.z + q[0] * v.y - q[1] * v.x;
108 |     p[3] = -(q[0] * v.x + q[1] * v.y + q[2] * v.z);
109 | 
110 |     return float3(p[3] * -q[0] + p[0] * q[3] + p[1] * -q[2] - p[2] * -q[1],
111 |                   p[3] * -q[1] - p[0] * -q[2] + p[1] * q[3] + p[2] * -q[0],
112 |                   p[3] * -q[2] + p[0] * -q[1] - p[1] * -q[0] + p[2] * q[3]);
113 | }
114 | 
115 | inline float3 min(const float3& a, const float3& b) {
116 |     return float3(a.x < b.x ? a.x : b.x,
117 |                   a.y < b.y ? a.y : b.y,
118 |                   a.z < b.z ? a.z : b.z);
119 | }
120 | 
121 | inline float3 max(const float3& a, const float3& b) {
122 |     return float3(a.x > b.x ? a.x : b.x,
123 |                   a.y > b.y ? a.y : b.y,
124 |                   a.z > b.z ? a.z : b.z);
125 | }
126 | 
127 | inline float dot(const float3& a, const float3& b) {
128 |     return a.x * b.x + a.y * b.y + a.z * b.z;
129 | }
130 | 
131 | inline float lensqr(const float3& a) {
132 |     return dot(a, a);
133 | }
134 | 
135 | inline float length(const float3& a) {
136 |     return std::sqrt(dot(a, a));
137 | }
138 | 
139 | inline float3 normalize(const float3& a) {
140 |     return a * (1.0f / length(a));
141 | }
142 | 
143 | #endif // FLOAT3_H
144 | 


--------------------------------------------------------------------------------
/src/driver/float4.h:
--------------------------------------------------------------------------------
  1 | #ifndef FLOAT4_H
  2 | #define FLOAT4_H
  3 | 
  4 | #include <cmath>
  5 | #include "common.h"
  6 | #include "float2.h"
  7 | #include "float3.h"
  8 | 
  9 | struct float4 {
 10 |     union {
 11 |         struct { float x, y, z, w; };
 12 |         float values[4];
 13 |     };
 14 | 
 15 |     float4() {}
 16 |     explicit float4(float x) : x(x), y(x), z(x), w(x) {}
 17 |     float4(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {}
 18 |     float4(const float3& xyz, float w) : x(xyz.x), y(xyz.y), z(xyz.z), w(w) {}
 19 |     float4(float x, const float3& yzw) : x(x), y(yzw.x), z(yzw.y), w(yzw.z) {}
 20 |     float4(const float2& xy, float z, float w) : x(xy.x), y(xy.y), z(z), w(w) {}
 21 |     float4(float x, const float2& yz, float w) : x(x), y(yz.x), z(yz.y), w(w) {}
 22 |     float4(float x, float y, const float2& zw) : x(x), y(y), z(zw.x), w(zw.y) {}
 23 |     float4(const float2& xy, const float2& zw) : x(xy.x), y(xy.y), z(zw.x), w(zw.y) {}
 24 | 
 25 |     bool operator == (const float4& other) const {
 26 |         return x == other.x && y == other.y && z == other.z && w != other.w;
 27 |     }
 28 | 
 29 |     bool operator != (const float4& other) const {
 30 |         return x != other.x || y != other.y || z != other.z || w != other.w;
 31 |     }
 32 | 
 33 |     float operator [] (size_t i) const { return values[i]; }
 34 |     float& operator [] (size_t i) { return values[i]; }
 35 | 
 36 |     float4& operator += (const float4& a) {
 37 |         x += a.x; y += a.y; z += a.z; w += a.w;
 38 |         return *this;
 39 |     }
 40 | 
 41 |     float4& operator -= (const float4& a) {
 42 |         x -= a.x; y -= a.y; z -= a.z; w -= a.w;
 43 |         return *this;
 44 |     }
 45 | 
 46 |     float4& operator *= (float a) {
 47 |         x *= a; y *= a; z *= a; w *= a;
 48 |         return *this;
 49 |     }
 50 | 
 51 |     float4& operator *= (const float4& a) {
 52 |         x *= a.x; y *= a.y; z *= a.z; w *= a.w;
 53 |         return *this;
 54 |     }
 55 | };
 56 | 
 57 | inline float2::float2(const float4& xy)
 58 |     : x(xy.x), y(xy.y)
 59 | {}
 60 | 
 61 | inline float3::float3(const float4& xyz)
 62 |     : x(xyz.x), y(xyz.y), z(xyz.z)
 63 | {}
 64 | 
 65 | inline float4 operator * (float a, const float4& b) {
 66 |     return float4(a * b.x, a * b.y, a * b.z, a * b.w);
 67 | }
 68 | 
 69 | inline float4 operator * (const float4& a, float b) {
 70 |     return float4(a.x * b, a.y * b, a.z * b, a.w * b);
 71 | }
 72 | 
 73 | inline float4 operator / (const float4& a, float b) {
 74 |     return a * (1.0f / b);
 75 | }
 76 | 
 77 | inline float4 operator - (const float4& a, const float4& b) {
 78 |     return float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
 79 | }
 80 | 
 81 | inline float4 operator - (const float4& a) {
 82 |     return float4(-a.x, -a.y, -a.z, -a.w);
 83 | }
 84 | 
 85 | inline float4 operator + (const float4& a, const float4& b) {
 86 |     return float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
 87 | }
 88 | 
 89 | inline float4 operator * (const float4& a, const float4& b) {
 90 |     return float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
 91 | }
 92 | 
 93 | inline float4 abs(const float4& a) {
 94 |     return float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
 95 | }
 96 | 
 97 | inline float4 min(const float4& a, const float4& b) {
 98 |     return float4(a.x < b.x ? a.x : b.x,
 99 |                   a.y < b.y ? a.y : b.y,
100 |                   a.z < b.z ? a.z : b.z,
101 |                   a.w < b.w ? a.w : b.w);
102 | }
103 | 
104 | inline float4 max(const float4& a, const float4& b) {
105 |     return float4(a.x > b.x ? a.x : b.x,
106 |                   a.y > b.y ? a.y : b.y,
107 |                   a.z > b.z ? a.z : b.z,
108 |                   a.w > b.w ? a.w : b.w);
109 | }
110 | 
111 | inline float dot(const float4& a, const float4& b) {
112 |     return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
113 | }
114 | 
115 | inline float lensqr(const float4& a) {
116 |     return dot(a, a);
117 | }
118 | 
119 | inline float length(const float4& a) {
120 |     return std::sqrt(dot(a, a));
121 | }
122 | 
123 | inline float4 normalize(const float4& a) {
124 |     return a * (1.0f / length(a));
125 | }
126 | 
127 | inline float4 clamp(const float4& val, const float4& min, const float4& max) {
128 |     return float4(clamp(val.x, min.x, max.x),
129 |                   clamp(val.y, min.y, max.y),
130 |                   clamp(val.z, min.z, max.z),
131 |                   clamp(val.w, min.w, max.w));
132 | }
133 | 
134 | #endif // FLOAT4_H
135 | 


--------------------------------------------------------------------------------
/src/driver/image.h:
--------------------------------------------------------------------------------
 1 | #ifndef IMAGE_H
 2 | #define IMAGE_H
 3 | 
 4 | #include "file_path.h"
 5 | 
 6 | struct ImageRgba32 {
 7 |     std::unique_ptr<uint8_t[]> pixels;
 8 |     size_t width, height;
 9 | };
10 | 
11 | bool load_png(const FilePath&, ImageRgba32&);
12 | bool load_jpg(const FilePath&, ImageRgba32&);
13 | bool save_png(const FilePath&, const ImageRgba32&);
14 | 
15 | #endif // IMAGE_H
16 | 


--------------------------------------------------------------------------------
/src/driver/obj.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOAD_OBJ_H
 2 | #define LOAD_OBJ_H
 3 | 
 4 | #include <vector>
 5 | #include <string>
 6 | #include <unordered_map>
 7 | 
 8 | #include "float3.h"
 9 | #include "color.h"
10 | #include "file_path.h"
11 | 
12 | namespace obj {
13 | 
14 | struct Index {
15 |     int v, n, t;
16 | };
17 | 
18 | struct Face {
19 |     std::vector<Index> indices;
20 |     int material;
21 | };
22 | 
23 | struct Group {
24 |     std::vector<Face> faces;
25 | };
26 | 
27 | struct Object {
28 |     std::vector<Group> groups;
29 | };
30 | 
31 | struct Material {
32 |     rgb ka;
33 |     rgb kd;
34 |     rgb ks;
35 |     rgb ke;
36 |     float ns;
37 |     float ni;
38 |     rgb tf;
39 |     float tr;
40 |     float d;
41 |     int illum;
42 |     std::string map_ka;
43 |     std::string map_kd;
44 |     std::string map_ks;
45 |     std::string map_ke;
46 |     std::string map_bump;
47 |     std::string map_d;
48 | };
49 | 
50 | struct File {
51 |     std::vector<Object>      objects;
52 |     std::vector<float3>      vertices;
53 |     std::vector<float3>      normals;
54 |     std::vector<float2>      texcoords;
55 |     std::vector<std::string> materials;
56 |     std::vector<std::string> mtl_libs;
57 | };
58 | 
59 | typedef std::unordered_map<std::string, Material> MaterialLib;
60 | 
61 | struct TriMesh {
62 |     std::vector<float3>   vertices;
63 |     std::vector<uint32_t> indices;
64 |     std::vector<float3>   normals;
65 |     std::vector<float3>   face_normals;
66 |     std::vector<float2>   texcoords;
67 | };
68 | 
69 | bool load_obj(const FilePath&, File&);
70 | bool load_mtl(const FilePath&, MaterialLib&);
71 | TriMesh compute_tri_mesh(const File&, size_t);
72 | 
73 | } // namespace obj
74 | 
75 | #endif // LOAD_OBJ_H
76 | 


--------------------------------------------------------------------------------
/src/driver/tri.h:
--------------------------------------------------------------------------------
 1 | #ifndef TRI_H
 2 | #define TRI_H
 3 | 
 4 | #include "float3.h"
 5 | #include "bbox.h"
 6 | 
 7 | struct Tri {
 8 |     float3 v0, v1, v2;
 9 | 
10 |     Tri() {}
11 |     Tri(const float3& v0, const float3& v1, const float3& v2)
12 |         : v0(v0), v1(v1), v2(v2)
13 |     {}
14 | 
15 |     float3& operator[] (int i) { return i == 0 ? v0 : (i == 1 ? v1 : v2); }
16 |     const float3& operator[] (int i) const { return i == 0 ? v0 : (i == 1 ? v1 : v2); }
17 | 
18 |     float area() const { return length(cross(v1 - v0, v2 - v0)) / 2; }
19 | 
20 |     /// Computes the triangle bounding box.
21 |     void compute_bbox(BBox& bb) const {
22 |         bb.min = min(v0, min(v1, v2));
23 |         bb.max = max(v0, max(v1, v2));
24 |     }
25 | 
26 |     /// Splits the triangle along one axis and returns the resulting two bounding boxes.
27 |     void compute_split(BBox& left_bb, BBox& right_bb, int axis, float split) const {
28 |         left_bb = BBox::empty();
29 |         right_bb = BBox::empty();
30 | 
31 |         const float3& e0 = v1 - v0;
32 |         const float3& e1 = v2 - v1;
33 |         const float3& e2 = v0 - v2;
34 | 
35 |         const bool left0 = v0[axis] <= split;
36 |         const bool left1 = v1[axis] <= split;
37 |         const bool left2 = v2[axis] <= split;
38 | 
39 |         if (left0) left_bb.extend(v0);
40 |         if (left1) left_bb.extend(v1);
41 |         if (left2) left_bb.extend(v2);
42 | 
43 |         if (!left0) right_bb.extend(v0);
44 |         if (!left1) right_bb.extend(v1);
45 |         if (!left2) right_bb.extend(v2);
46 | 
47 |         if (left0 ^ left1) {
48 |             const float3& p = clip_edge(axis, split, v0, e0);
49 |             left_bb.extend(p);
50 |             right_bb.extend(p);
51 |         }
52 |         if (left1 ^ left2) {
53 |             const float3& p = clip_edge(axis, split, v1, e1);
54 |             left_bb.extend(p);
55 |             right_bb.extend(p);
56 |         }
57 |         if (left2 ^ left0) {
58 |             const float3& p = clip_edge(axis, split, v2, e2);
59 |             left_bb.extend(p);
60 |             right_bb.extend(p);
61 |         }
62 |     }
63 | 
64 | private:
65 |     static float3 clip_edge(int axis, float plane, const float3& p, const float3& edge) {
66 |         const float t = (plane - p[axis]) / (edge[axis]);
67 |         return p + t * edge;
68 |     }
69 | };
70 | 
71 | #endif // TRI_H
72 | 


--------------------------------------------------------------------------------
/src/dummy_main.impala:
--------------------------------------------------------------------------------
 1 | // Dummy file used to generate a C interface for the renderer
 2 | 
 3 | struct Settings {
 4 |     eye: Vec3,
 5 |     dir: Vec3,
 6 |     up: Vec3,
 7 |     right: Vec3,
 8 |     width: f32,
 9 |     height: f32
10 | }
11 | 
12 | extern fn get_spp() -> i32 { 1 }
13 | extern fn render(settings: &Settings, iter: i32) -> () {}
14 | 


--------------------------------------------------------------------------------
/src/render/camera.impala:
--------------------------------------------------------------------------------
 1 | // Opaque camera object
 2 | struct Camera {
 3 |     // Generates a ray for a point on the image plane (in [-1, 1]^2)
 4 |     generate_ray: fn (f32, f32) -> Ray,
 5 |     // Projects a 3D point on the image plane
 6 |     project: fn (Vec3) -> Vec3,
 7 |     // Unprojects a point on the image plane
 8 |     unproject: fn (Vec3) -> Vec3,
 9 |     // Computes the local camera geometry for a point on the image plane
10 |     geometry: fn (f32, f32) -> CameraGeometry
11 | }
12 | 
13 | // Local geometry of the camera lens
14 | struct CameraGeometry {
15 |     cos_dir: f32,    // Cosine between the ray direction and the camera normal
16 |     dist:    f32,    // Distance between the camera origin and the point on the image plane
17 |     area:    f32     // Local pixel area (relative to total lens area)
18 | }
19 | 
20 | fn @make_camera_geometry(cos_dir: f32, dist: f32, area: f32) -> CameraGeometry {
21 |     CameraGeometry {
22 |         cos_dir: cos_dir,
23 |         dist: dist,
24 |         area: area
25 |     }
26 | }
27 | 
28 | // Creates a perspective camera
29 | fn @make_perspective_camera(math: Intrinsics, eye: Vec3, view: Mat3x3, w: f32, h: f32) -> Camera {
30 |     let dir   = view.col(2);
31 |     let right = view.col(0);
32 |     let up    = view.col(1);
33 | 
34 |     Camera {
35 |         generate_ray: @ |x, y| {
36 |             let d = vec3_normalize(math,
37 |                 vec3_add(
38 |                     vec3_add(vec3_mulf(right, w * x),
39 |                              vec3_mulf(up, h * y)),
40 |                     dir
41 |                 )
42 |             );
43 |             make_ray(eye, d, 0.0f, flt_max)
44 |         },
45 |         project: @ |p| {
46 |             let d = vec3_normalize(math, vec3_sub(p, eye));
47 |             make_vec3(vec3_dot(d, right) / w,
48 |                       vec3_dot(d, up) / h,
49 |                       -vec3_dot(d, dir))
50 |         },
51 |         unproject: @ |p| eye,
52 |         geometry: @ |x, y| {
53 |             let d = math.sqrtf(1.0f + x * x * w * w + y * y * h * h);
54 |             make_camera_geometry(1.0f / d, d, 1.0f / (4.0f * w * h))
55 |         }
56 |     }
57 | }
58 | 


--------------------------------------------------------------------------------
/src/render/driver.impala:
--------------------------------------------------------------------------------
  1 | // Driver functions ----------------------------------------------------------------
  2 | 
  3 | extern "C" {
  4 |     fn rodent_get_film_data(i32, &mut &mut [f32], &mut i32, &mut i32) -> ();
  5 |     fn rodent_cpu_get_primary_stream(&mut PrimaryStream, i32) -> ();
  6 |     fn rodent_cpu_get_secondary_stream(&mut SecondaryStream, i32) -> ();
  7 |     fn rodent_gpu_get_first_primary_stream(i32, &mut PrimaryStream, i32) -> ();
  8 |     fn rodent_gpu_get_second_primary_stream(i32, &mut PrimaryStream, i32) -> ();
  9 |     fn rodent_gpu_get_secondary_stream(i32, &mut SecondaryStream, i32) -> ();
 10 |     fn rodent_gpu_get_tmp_buffer(i32, &mut &mut [i32], i32) -> ();
 11 |     fn rodent_load_buffer(i32, &[u8]) -> &[i8];
 12 |     fn rodent_load_bvh2_tri1(i32, &[u8], &mut &[Node2], &mut &[Tri1]) -> ();
 13 |     fn rodent_load_bvh4_tri4(i32, &[u8], &mut &[Node4], &mut &[Tri4]) -> ();
 14 |     fn rodent_load_bvh8_tri4(i32, &[u8], &mut &[Node8], &mut &[Tri4]) -> ();
 15 |     fn rodent_load_png(i32, &[u8], &mut &[u8], &mut i32, &mut i32) -> ();
 16 |     fn rodent_load_jpg(i32, &[u8], &mut &[u8], &mut i32, &mut i32) -> ();
 17 |     fn rodent_cpu_intersect_primary_embree(&PrimaryStream, i32, i32) -> ();
 18 |     fn rodent_cpu_intersect_secondary_embree(&SecondaryStream) -> ();
 19 |     fn rodent_present(i32) -> ();
 20 | }
 21 | 
 22 | // Ray streams ---------------------------------------------------------------------
 23 | 
 24 | struct RayStream {
 25 |     id: &mut [i32], // this field is also used to indicate if the ray is alive
 26 |     org_x: &mut [f32],
 27 |     org_y: &mut [f32],
 28 |     org_z: &mut [f32],
 29 |     dir_x: &mut [f32],
 30 |     dir_y: &mut [f32],
 31 |     dir_z: &mut [f32],
 32 |     tmin: &mut [f32],
 33 |     tmax: &mut [f32],
 34 | }
 35 | 
 36 | struct PrimaryStream {
 37 |     rays: RayStream,
 38 |     geom_id: &mut [i32],
 39 |     prim_id: &mut [i32],
 40 |     t: &mut [f32],
 41 |     u: &mut [f32],
 42 |     v: &mut [f32],
 43 |     rnd: &mut [RndState],
 44 |     mis: &mut [f32],
 45 |     contrib_r: &mut [f32],
 46 |     contrib_g: &mut [f32],
 47 |     contrib_b: &mut [f32],
 48 |     depth: &mut [i32],
 49 |     size: i32,
 50 |     pad: i32 // TODO: Needed for AMDGPU backend
 51 | }
 52 | 
 53 | struct SecondaryStream {
 54 |     rays: RayStream,
 55 |     prim_id: &mut [i32],
 56 |     color_r: &mut [f32],
 57 |     color_g: &mut [f32],
 58 |     color_b: &mut [f32],
 59 |     size: i32,
 60 |     pad: i32 // TODO: Needed for AMDGPU backend
 61 | }
 62 | 
 63 | fn @make_ray_stream_reader(rays: RayStream, vector_width: i32) -> fn (i32, i32) -> Ray {
 64 |     @ |i, j| {
 65 |         let k = i * vector_width + j;
 66 |         make_ray(
 67 |             make_vec3(rays.org_x(k),
 68 |                       rays.org_y(k),
 69 |                       rays.org_z(k)),
 70 |             make_vec3(rays.dir_x(k),
 71 |                       rays.dir_y(k),
 72 |                       rays.dir_z(k)),
 73 |             rays.tmin(k),
 74 |             rays.tmax(k)
 75 |         )
 76 |     }
 77 | }
 78 | 
 79 | fn @make_ray_stream_writer(rays: RayStream, vector_width: i32) -> fn (i32, i32, Ray) -> () {
 80 |     @ |i, j, ray| {
 81 |         let k = i * vector_width + j;
 82 |         rays.org_x(k) = ray.org.x;
 83 |         rays.org_y(k) = ray.org.y;
 84 |         rays.org_z(k) = ray.org.z;
 85 |         rays.dir_x(k) = ray.dir.x;
 86 |         rays.dir_y(k) = ray.dir.y;
 87 |         rays.dir_z(k) = ray.dir.z;
 88 |         rays.tmin(k)  = ray.tmin;
 89 |         rays.tmax(k)  = ray.tmax;
 90 |     }
 91 | }
 92 | 
 93 | fn @make_primary_stream_hit_reader(primary: PrimaryStream, vector_width: i32) -> fn (i32, i32) -> Hit {
 94 |     @ |i, j| {
 95 |         let k = i * vector_width + j;
 96 |         make_hit(
 97 |             primary.geom_id(k),
 98 |             primary.prim_id(k),
 99 |             primary.t(k),
100 |             make_vec2(primary.u(k),
101 |                       primary.v(k))
102 |         )
103 |     }
104 | }
105 | 
106 | fn @make_primary_stream_hit_writer(primary: PrimaryStream, vector_width: i32, invalid_geom_id: i32) -> fn (i32, i32, Hit) -> () {
107 |     @ |i, j, hit| {
108 |         let k = i * vector_width + j;
109 |         primary.geom_id(k) = if hit.geom_id == -1 { invalid_geom_id } else { hit.geom_id };
110 |         primary.prim_id(k) = hit.prim_id;
111 |         primary.t(k)       = hit.distance;
112 |         primary.u(k)       = hit.uv_coords.x;
113 |         primary.v(k)       = hit.uv_coords.y;
114 |     }
115 | }
116 | 
117 | fn @make_secondary_stream_hit_writer(secondary: SecondaryStream, vector_width: i32) -> fn (i32, i32, Hit) -> () {
118 |     @ |i, j, hit| {
119 |         let k = i * vector_width + j;
120 |         secondary.prim_id(k) = hit.prim_id;
121 |     }
122 | }
123 | 
124 | fn @make_primary_stream_state_reader(primary: PrimaryStream, vector_width: i32) -> fn (i32, i32) -> RayState {
125 |     @ |i, j| {
126 |         let k = i * vector_width + j;
127 |         RayState {
128 |             rnd:     primary.rnd(k),
129 |             contrib: make_color(primary.contrib_r(k), primary.contrib_g(k), primary.contrib_b(k)),
130 |             mis:     primary.mis(k),
131 |             depth:   primary.depth(k)
132 |         }
133 |     }
134 | }
135 | 
136 | fn @make_primary_stream_state_writer(primary: PrimaryStream, vector_width: i32) -> fn (i32, i32, RayState) -> () {
137 |     @ |i, j, state| {
138 |         let k = i * vector_width + j;
139 |         primary.rnd(k)       = state.rnd;
140 |         primary.contrib_r(k) = state.contrib.r;
141 |         primary.contrib_g(k) = state.contrib.g;
142 |         primary.contrib_b(k) = state.contrib.b;
143 |         primary.mis(k)       = state.mis;
144 |         primary.depth(k)     = state.depth;
145 |     }
146 | }
147 | 


--------------------------------------------------------------------------------
/src/render/geometry.impala:
--------------------------------------------------------------------------------
 1 | // Abstract geometry object (tied to one device)
 2 | struct Geometry {
 3 |     // Computes the surface element after an intersection on this geometry
 4 |     surface_element: fn (Ray, Hit) -> SurfaceElement,
 5 |     // Shader for this geometry
 6 |     shader: Shader
 7 | }
 8 | 
 9 | // Triangle mesh with per-vertex/per-face attributes
10 | struct TriMesh {
11 |     vertices:     fn (i32) -> Vec3,
12 |     normals:      fn (i32) -> Vec3,
13 |     face_normals: fn (i32) -> Vec3,
14 |     triangles:    fn (i32) -> (i32, i32, i32),
15 |     attrs:        fn (i32) -> (bool, fn (i32) -> Vec4),
16 |     num_attrs:    i32,
17 |     num_tris:     i32
18 | }
19 | 
20 | // Creates a geometry object from a triangle mesh definition
21 | fn @make_tri_mesh_geometry(math: Intrinsics, tri_mesh: TriMesh, shader: Shader) -> Geometry {
22 |     Geometry {
23 |         surface_element: @ |ray, hit| {
24 |             let (i0, i1, i2) = tri_mesh.triangles(hit.prim_id);
25 | 
26 |             let face_normal = tri_mesh.face_normals(hit.prim_id);
27 |             let normal = vec3_normalize(math, vec3_lerp2(tri_mesh.normals(i0), tri_mesh.normals(i1), tri_mesh.normals(i2), hit.uv_coords.x, hit.uv_coords.y));
28 |             let is_entering = vec3_dot(ray.dir, face_normal) <= 0.0f;
29 | 
30 |             fn @attr(i: i32) -> Vec4 {
31 |                 if i >= tri_mesh.num_attrs {
32 |                     make_vec4(0.0f, 0.0f, 0.0f, 0.0f)
33 |                 } else {
34 |                     let (per_face, attr_value) = tri_mesh.attrs(i);
35 |                     if per_face {
36 |                         attr_value(hit.prim_id)
37 |                     } else {
38 |                         vec4_lerp2(attr_value(i0), attr_value(i1), attr_value(i2), hit.uv_coords.x, hit.uv_coords.y)
39 |                     }
40 |                 }
41 |             }
42 | 
43 |             SurfaceElement {
44 |                 is_entering: is_entering,
45 |                 point:       vec3_add(ray.org, vec3_mulf(ray.dir, hit.distance)),
46 |                 face_normal: if is_entering { face_normal } else { vec3_neg(face_normal) },
47 |                 uv_coords:   hit.uv_coords,
48 |                 local:       make_orthonormal_mat3x3(if vec3_dot(ray.dir, normal) <= 0.0f { normal } else { vec3_neg(normal) }),
49 |                 attr:        attr
50 |             }
51 |         },
52 |         shader: shader
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/src/render/image.impala:
--------------------------------------------------------------------------------
 1 | // Images are discrete collections of pixels with a fixed number of channels
 2 | struct Image {
 3 |     pixels:   fn (i32, i32) -> Color,
 4 |     width:    i32,
 5 |     height:   i32
 6 | }
 7 | 
 8 | struct BorderHandling {
 9 |     horz: fn (Intrinsics, f32) -> f32,
10 |     vert: fn (Intrinsics, f32) -> f32
11 | }
12 | 
13 | type Texture = fn (Vec2) -> Color;
14 | type ImageFilter = fn (Intrinsics, Image, Vec2) -> Color;
15 | 
16 | fn @make_image(pixels: fn (i32, i32) -> Color, width: i32, height: i32) -> Image {
17 |     Image {
18 |         pixels:   pixels,
19 |         width:    width,
20 |         height:   height
21 |     }
22 | }
23 | 
24 | fn @make_image_rgba32(pixels: fn (i32, i32) -> u32, width: i32, height: i32) -> Image {
25 |     Image {
26 |         pixels: @ |x, y| {
27 |             let pixel = pixels(x, y);
28 |             let r = pixel & 0xFFu;
29 |             let g = (pixel >> 8u) & 0xFFu;
30 |             let b = (pixel >> 16u) & 0xFFu;
31 |             make_color((r as f32) * (1.0f / 255.0f),
32 |                        (g as f32) * (1.0f / 255.0f),
33 |                        (b as f32) * (1.0f / 255.0f))
34 |         },
35 |         width:    width,
36 |         height:   height
37 |     }
38 | }
39 | 
40 | fn @make_clamp_border() -> BorderHandling {
41 |     let clamp = @ |math, x| math.fminf(1.0f, math.fmaxf(0.0f, x));
42 |     BorderHandling {
43 |         horz: clamp,
44 |         vert: clamp
45 |     }
46 | }
47 | 
48 | fn @make_repeat_border() -> BorderHandling {
49 |     let repeat = @ |math, x| x - math.floorf(x);
50 |     BorderHandling {
51 |         horz: repeat,
52 |         vert: repeat
53 |     }
54 | }
55 | 
56 | fn @make_nearest_filter() -> ImageFilter {
57 |     @ |math, img, uv| {
58 |         img.pixels(math.min((uv.x * img.width  as f32) as i32, img.width  - 1),
59 |                    math.min((uv.y * img.height as f32) as i32, img.height - 1))
60 |     }
61 | }
62 | 
63 | fn @make_bilinear_filter() -> ImageFilter {
64 |     @ |math, img, uv| {
65 |         let u = uv.x * img.width as f32;
66 |         let v = uv.y * img.height as f32;
67 |         let x0 = math.min(u as i32, img.width  - 1);
68 |         let y0 = math.min(v as i32, img.height - 1);
69 |         let x1 = math.min(x0 + 1, img.width  - 1);
70 |         let y1 = math.min(y0 + 1, img.height - 1);
71 |         let kx = u - (u as i32 as f32);
72 |         let ky = v - (v as i32 as f32);
73 | 
74 |         let p00 = img.pixels(x0, y0);
75 |         let p10 = img.pixels(x1, y0);
76 |         let p01 = img.pixels(x0, y1);
77 |         let p11 = img.pixels(x1, y1);
78 | 
79 |         let interp = @ |x00, x10, x01, x11| lerp(lerp(x00, x10, kx), lerp(x01, x11, kx), ky);
80 |         make_color(interp(p00.r, p10.r, p01.r, p11.r),
81 |                    interp(p00.g, p10.g, p01.g, p11.g),
82 |                    interp(p00.b, p10.b, p01.b, p11.b))
83 |     }
84 | }
85 | 
86 | fn @make_texture(math: Intrinsics, border: BorderHandling, filter: ImageFilter, image: Image) -> Texture {
87 |     @ |uv| {
88 |         let u = border.horz(math, uv.x);
89 |         let v = border.vert(math, uv.y);
90 |         filter(math, image, make_vec2(u, v))
91 |     }
92 | }
93 | 


--------------------------------------------------------------------------------
/src/render/light.impala:
--------------------------------------------------------------------------------
  1 | // Result from sampling a light source
  2 | struct EmissionSample {
  3 |     pos: Vec3,          // Position on the light source
  4 |     dir: Vec3,          // Direction of the light going outwards
  5 |     intensity: Color,   // Intensity along the direction
  6 |     pdf_area: f32,      // Probability to sample the point on the light
  7 |     pdf_dir: f32,       // Probability to sample the direction on the light, conditioned on the point on the light source
  8 |     cos: f32            // Cosine between the direction and the light source geometry
  9 | }
 10 | 
 11 | // Result from sampling direct lighting from a light source
 12 | struct DirectLightSample {
 13 |     pos: Vec3,          // Position on the light source
 14 |     intensity: Color,   // Intensity along the direction
 15 |     pdf_area: f32,      // Probability to sample the point on the light
 16 |     pdf_dir: f32,       // Probability to sample the direction using emission sampling
 17 |     cos: f32            // Cosine between the direction and the light source geometry
 18 | }
 19 | 
 20 | // Emission properties of a light source
 21 | struct EmissionValue {
 22 |     intensity: Color,   // Intensity along the direction
 23 |     pdf_area: f32,      // Probability to sample the point on the light
 24 |     pdf_dir: f32        // Probability to sample the direction using emission sampling
 25 | }
 26 | 
 27 | // Surface that emits light
 28 | struct AreaEmitter {
 29 |     sample: fn (Vec2) -> (Vec3, Vec3, f32),
 30 |     normal: fn (Vec2) -> Vec3,
 31 |     pdf:    fn (Vec2) -> f32
 32 | }
 33 | 
 34 | // Opaque light structure
 35 | struct Light {
 36 |     // Samples direct illumination from this light source at the given point on a surface
 37 |     sample_direct: fn (&mut RndState, Vec3) -> DirectLightSample,
 38 |     // Samples the emitting surface of the light
 39 |     sample_emission: fn (&mut RndState) -> EmissionSample,
 40 |     // Returns the emission properties of the light at a given point on its surface
 41 |     emission: fn (Vec3, Vec2) -> EmissionValue,
 42 |     // true if the light has an area (can be hit by a ray)
 43 |     has_area: bool
 44 | }
 45 | 
 46 | fn @make_emission_sample(pos: Vec3, dir: Vec3, intensity: Color, pdf_area: f32, pdf_dir: f32, cos: f32) -> EmissionSample {
 47 |     if pdf_area > 0.0f && pdf_dir > 0.0f && cos > 0.0f {
 48 |         EmissionSample {
 49 |             pos: pos,
 50 |             dir: dir,
 51 |             intensity: intensity,
 52 |             pdf_area: pdf_area,
 53 |             pdf_dir: pdf_dir,
 54 |             cos: cos
 55 |         }
 56 |     } else {
 57 |         EmissionSample {
 58 |             pos: pos,
 59 |             dir: dir,
 60 |             intensity: black,
 61 |             pdf_area: 1.0f,
 62 |             pdf_dir: 1.0f,
 63 |             cos: 1.0f
 64 |         }
 65 |     }
 66 | }
 67 | 
 68 | fn @make_direct_sample(pos: Vec3, intensity: Color, pdf_area: f32, pdf_dir: f32, cos: f32) -> DirectLightSample {
 69 |     if pdf_area > 0.0f && pdf_dir > 0.0f && cos > 0.0f {
 70 |         DirectLightSample {
 71 |             pos: pos,
 72 |             intensity: intensity,
 73 |             pdf_area: pdf_area,
 74 |             pdf_dir: pdf_dir,
 75 |             cos: cos
 76 |         }
 77 |     } else {
 78 |         DirectLightSample {
 79 |             pos: pos,
 80 |             intensity: black,
 81 |             pdf_area: 1.0f,
 82 |             pdf_dir: 1.0f,
 83 |             cos: 0.0f
 84 |         }
 85 |     }
 86 | }
 87 | 
 88 | fn @make_emission_value(intensity: Color, pdf_area: f32, pdf_dir: f32) -> EmissionValue {
 89 |     if pdf_dir > 0.0f {
 90 |         EmissionValue {
 91 |             intensity: intensity,
 92 |             pdf_area: pdf_area,
 93 |             pdf_dir: pdf_dir
 94 |         }
 95 |     } else {
 96 |         EmissionValue {
 97 |             intensity: black,
 98 |             pdf_area: 1.0f,
 99 |             pdf_dir: 1.0f
100 |         }
101 |     }
102 | }
103 | 
104 | fn @make_point_light(math: Intrinsics, pos: Vec3, color: Color) -> Light {
105 |     Light {
106 |         sample_direct: @ |rnd, from| {
107 |             let intensity = color_mulf(color, 1.0f / (4.0f * flt_pi));
108 |             make_direct_sample(pos, intensity, 1.0f, uniform_sphere_pdf(), 1.0f)
109 |         },
110 |         sample_emission: @ |rnd| {
111 |             let u = randf(rnd);
112 |             let v = randf(rnd);
113 |             let sample = sample_uniform_sphere(math, u, v);
114 |             let intensity = color_mulf(color, 1.0f / (4.0f * flt_pi));
115 |             make_emission_sample(pos, sample.dir, intensity, 1.0f, sample.pdf, 1.0f)
116 |         },
117 |         emission: @ |_, _| make_emission_value(black, 1.0f, 1.0f),
118 |         has_area: false
119 |     }
120 | }
121 | 
122 | fn @make_area_light(math: Intrinsics, area: AreaEmitter, color: Color) -> Light {
123 |     Light {
124 |         sample_direct: @ |rnd, from| {
125 |             let (pos, n, area_pdf) = area.sample(make_vec2(randf(rnd), randf(rnd)));
126 |             let dir = vec3_sub(from, pos);
127 |             let cos = vec3_dot(dir, n) / vec3_len(math, dir);
128 |             make_direct_sample(pos, color, area_pdf, cosine_hemisphere_pdf(cos), cos)
129 |         },
130 |         sample_emission: @ |rnd| {
131 |             let (pos, n, area_pdf) = area.sample(make_vec2(randf(rnd), randf(rnd)));
132 |             let sample = sample_cosine_hemisphere(math, randf(rnd), randf(rnd));
133 |             make_emission_sample(pos, mat3x3_mul(make_orthonormal_mat3x3(n), sample.dir), color, area_pdf, sample.pdf, sample.dir.z)
134 |         },
135 |         emission: @ |dir, uv_coords| make_emission_value(color, area.pdf(uv_coords), cosine_hemisphere_pdf(vec3_dot(area.normal(uv_coords), dir))),
136 |         has_area: true
137 |     }
138 | }
139 | 
140 | fn @make_triangle_light(math: Intrinsics, v0: Vec3, v1: Vec3, v2: Vec3, color: Color) -> Light {
141 |     let n_ = vec3_cross(vec3_sub(v1, v0), vec3_sub(v2, v0));
142 |     let inv_area = 1.0f / (0.5f * sqrt_newton(vec3_len2(n_), 1e-5f));
143 |     let n = vec3_mulf(n_, 0.5f * inv_area);
144 |     make_precomputed_triangle_light(math, v0, v1, v2, n, inv_area, color)
145 | }
146 | 
147 | fn @make_precomputed_triangle_light(math: Intrinsics, v0: Vec3, v1: Vec3, v2: Vec3, n: Vec3, inv_area: f32, color: Color) -> Light {
148 |     let emitter = AreaEmitter {
149 |         sample: @ |uv| (sample_triangle(uv.x, uv.y, v0, v1, v2), n, inv_area),
150 |         normal: @ |_| n,
151 |         pdf: @ |uv| inv_area
152 |     };
153 |     make_area_light(math, emitter, color)
154 | }
155 | 


--------------------------------------------------------------------------------
/src/render/renderer.impala:
--------------------------------------------------------------------------------
  1 | struct PathTracer {
  2 |     on_emit:   RayEmitter,
  3 |     on_hit:    fn (Ray, Hit, &mut RayState, SurfaceElement, Material, fn (Color) -> !) -> (),
  4 |     on_shadow: fn (Ray, Hit, &mut RayState, SurfaceElement, Material, fn (Ray, Color) -> !) -> (),
  5 |     on_bounce: fn (Ray, Hit, &mut RayState, SurfaceElement, Material, fn (Ray, RayState) -> !) -> (),
  6 | }
  7 | 
  8 | struct RayState {
  9 |     rnd:     RndState,
 10 |     contrib: Color,
 11 |     mis:     f32,
 12 |     depth:   i32
 13 | }
 14 | 
 15 | type RayEmitter = fn (i32, i32, i32, i32, i32) -> (Ray, RayState);
 16 | 
 17 | fn @make_ray_state(rnd: RndState, contrib: Color, mis: f32, depth: i32) -> RayState {
 18 |     RayState {
 19 |         rnd: rnd,
 20 |         contrib: contrib,
 21 |         mis: mis,
 22 |         depth: depth
 23 |     }
 24 | }
 25 | 
 26 | fn @make_camera_emitter(scene: Scene, device: Device, iter: i32) -> RayEmitter {
 27 |     @ |sample, x, y, width, height| {
 28 |         let mut hash = fnv_init();
 29 |         hash = fnv_hash(hash, sample as u32);
 30 |         hash = fnv_hash(hash, iter as u32);
 31 |         hash = fnv_hash(hash, x as u32);
 32 |         hash = fnv_hash(hash, y as u32);
 33 |         let mut rnd = hash as RndState;
 34 |         let kx = 2.0f * (x as f32 + randf(&mut rnd)) / (width  as f32) - 1.0f;
 35 |         let ky = 1.0f - 2.0f * (y as f32 + randf(&mut rnd)) / (height as f32);
 36 |         let ray = scene.camera.generate_ray(kx, ky);
 37 |         let state = make_ray_state(rnd, white, 0.0f, 0);
 38 |         (ray, state)
 39 |     }
 40 | }
 41 | 
 42 | fn @make_debug_renderer() -> Renderer {
 43 |     @ |scene, device, iter| {
 44 |         let on_emit = make_camera_emitter(scene, device, iter);
 45 |         let on_shadow = @ |_, _, _, _, _, _| ();
 46 |         let on_bounce = @ |_, _, _, _, _, _| ();
 47 |         let on_hit = @ |ray, hit, state, surf, mat, accumulate| {
 48 |             accumulate(color_mulf(white, -vec3_dot(ray.dir, surf.local.col(2))))
 49 |         };
 50 | 
 51 |         let path_tracer = PathTracer {
 52 |             on_emit:   on_emit,
 53 |             on_hit:    on_hit,
 54 |             on_shadow: on_shadow,
 55 |             on_bounce: on_bounce
 56 |         };
 57 | 
 58 |         device.trace(scene, path_tracer, 1);
 59 |     }
 60 | }
 61 | 
 62 | fn @make_path_tracing_renderer(max_path_len: i32, spp: i32)-> Renderer {
 63 |     @ |scene, device, iter| {
 64 |         let offset = 0.001f;
 65 |         let pdf_lightpick = 1.0f / (scene.num_lights as f32);
 66 | 
 67 |         let on_emit = make_camera_emitter(scene, device, iter);
 68 | 
 69 |         fn @on_shadow( ray: Ray
 70 |                      , hit: Hit
 71 |                      , state: &mut RayState
 72 |                      , surf: SurfaceElement
 73 |                      , mat: Material
 74 |                      , emit: fn (Ray, Color) -> !
 75 |                      ) -> () {
 76 |             // No shadow rays for specular materials
 77 |             if mat.bsdf.is_specular {
 78 |                 return()
 79 |             }
 80 | 
 81 |             let rnd = &mut state.rnd;
 82 |             // Note: randi() returns random integers, but we only want positive integers here
 83 |             let light_id = (randi(rnd) & 0x7FFFFFFF) % scene.num_lights;
 84 |             let light = @@(scene.lights)(light_id);
 85 |             let light_sample = @@(light.sample_direct)(rnd, surf.point);
 86 |             let light_dir = vec3_sub(light_sample.pos, surf.point);
 87 |             let vis = vec3_dot(light_dir, surf.local.col(2));
 88 | 
 89 |             if vis > 0.0f && light_sample.cos > 0.0f {
 90 |                 let inv_d = 1.0f / vec3_len(device.intrinsics, light_dir);
 91 |                 let inv_d2 = inv_d * inv_d;
 92 |                 let in_dir = vec3_mulf(light_dir, inv_d);
 93 |                 let out_dir = vec3_neg(ray.dir);
 94 | 
 95 |                 let pdf_e = if light.has_area { mat.bsdf.pdf(in_dir, out_dir) } else { 0.0f };
 96 |                 let pdf_l = light_sample.pdf_area * pdf_lightpick;
 97 |                 let inv_pdf_l = 1.0f / pdf_l;
 98 | 
 99 |                 let cos_e = vis * inv_d;
100 |                 let cos_l = light_sample.cos;
101 | 
102 |                 let mis = if light.has_area { 1.0f / (1.0f + pdf_e * cos_l * inv_d2 * inv_pdf_l) } else { 1.0f };
103 |                 let geom_factor = cos_e * cos_l * inv_d2 * inv_pdf_l;
104 | 
105 |                 let contrib = color_mul(light_sample.intensity, color_mul(state.contrib, mat.bsdf.eval(in_dir, out_dir)));
106 |                 emit(
107 |                     make_ray(surf.point, light_dir, offset, 1.0f - offset),
108 |                     color_mulf(contrib, geom_factor * mis)
109 |                 )
110 |             }
111 |         }
112 | 
113 |         fn @on_hit( ray: Ray
114 |                   , hit: Hit
115 |                   , state: &mut RayState
116 |                   , surf: SurfaceElement
117 |                   , mat: Material
118 |                   , accumulate: fn (Color) -> !
119 |                   ) -> () {
120 |             // Hits on a light source
121 |             if mat.is_emissive && surf.is_entering {
122 |                 let out_dir = vec3_neg(ray.dir);
123 |                 let emit = mat.emission(out_dir);
124 |                 let next_mis = state.mis * hit.distance * hit.distance / vec3_dot(out_dir, surf.local.col(2));
125 |                 let mis = 1.0f / (1.0f + next_mis * pdf_lightpick * emit.pdf_area);
126 |                 accumulate(color_mulf(color_mul(state.contrib, emit.intensity), mis))
127 |             }
128 |         }
129 | 
130 |         fn @on_bounce( ray: Ray
131 |                      , hit: Hit
132 |                      , state: &mut RayState
133 |                      , surf: SurfaceElement
134 |                      , mat: Material
135 |                      , bounce: fn (Ray, RayState) -> !
136 |                      ) -> () {
137 |             // Russian roulette and maximum depth
138 |             let rr_prob = russian_roulette(state.contrib, 0.75f);
139 |             if state.depth >= max_path_len || randf(&mut state.rnd) >= rr_prob {
140 |                 return()
141 |             }
142 | 
143 |             // Bounce
144 |             let out_dir = vec3_neg(ray.dir);
145 |             let mat_sample = mat.bsdf.sample(&mut state.rnd, out_dir, false);
146 |             let contrib = color_mul(state.contrib, mat_sample.color);
147 |             let mis = if mat.bsdf.is_specular { 0.0f } else { 1.0f / mat_sample.pdf };
148 |             bounce(
149 |                 make_ray(surf.point, mat_sample.in_dir, offset, flt_max),
150 |                 make_ray_state(state.rnd, color_mulf(contrib, mat_sample.cos / (mat_sample.pdf * rr_prob)), mis, state.depth + 1)
151 |             )
152 |         }
153 | 
154 |         let path_tracer = PathTracer {
155 |             on_emit:   on_emit,
156 |             on_hit:    on_hit,
157 |             on_shadow: on_shadow,
158 |             on_bounce: on_bounce
159 |         };
160 | 
161 |         device.trace(scene, path_tracer, spp);
162 |     }
163 | }
164 | 


--------------------------------------------------------------------------------
/src/render/scene.impala:
--------------------------------------------------------------------------------
 1 | // Compile-time scene data
 2 | struct Scene {
 3 |     num_geometries: i32,
 4 |     num_lights:     i32,
 5 | 
 6 |     geometries: fn (i32) -> Geometry,
 7 |     lights:     fn (i32) -> Light,
 8 |     camera:     Camera,
 9 |     bvh:        Bvh    
10 | }
11 | 
12 | // Rendering device
13 | struct Device {
14 |     intrinsics: Intrinsics,
15 | 
16 |     trace: fn (Scene, PathTracer, i32) -> (),
17 |     present: fn () -> (),
18 | 
19 |     // General formats
20 |     load_buffer: fn (&[u8]) -> DeviceBuffer,
21 |     load_bvh: fn (&[u8]) -> Bvh,
22 |     load_png: fn (&[u8]) -> Image,
23 |     load_jpg: fn (&[u8]) -> Image
24 | }
25 | 
26 | struct DeviceBuffer {
27 |     load_i32:  fn (i32) -> i32,
28 |     load_f32:  fn (i32) -> f32,
29 |     load_vec2: fn (i32) -> Vec2,
30 |     load_vec3: fn (i32) -> Vec3,
31 |     load_vec4: fn (i32) -> Vec4,
32 |     load_int2: fn (i32) -> (i32, i32),
33 |     load_int3: fn (i32) -> (i32, i32, i32),
34 |     load_int4: fn (i32) -> (i32, i32, i32, i32),
35 | }
36 | 
37 | type Renderer = fn (Scene, Device, i32) -> ();
38 | type Shader   = fn (Ray, Hit, SurfaceElement) -> Material;
39 | 


--------------------------------------------------------------------------------
/src/traversal/stack.impala:
--------------------------------------------------------------------------------
  1 | struct NodeRef {
  2 |     node: i32,
  3 |     tmin: f32
  4 | }
  5 | 
  6 | struct Stack {
  7 |     push:       fn (i32, f32) -> (),
  8 |     push_after: fn (i32, f32) -> (),
  9 |     set_top:    fn (i32, f32) -> (),
 10 |     sort_n:     fn (i32, fn (f32, f32) -> bool, SortingNetwork, bool) -> (),
 11 |     pop:        fn () -> NodeRef,
 12 |     top:        fn () -> NodeRef,
 13 |     is_empty:   fn () -> bool,
 14 |     size:       fn () -> i32
 15 | }
 16 | 
 17 | struct SmallStack {
 18 |     write: fn (i32, (i32, f32)) -> (),
 19 |     read:  fn (i32) -> (i32, f32)
 20 | }
 21 | 
 22 | fn @is_leaf (ref: NodeRef) -> bool { ref.node < 0 }
 23 | fn @is_inner(ref: NodeRef) -> bool { ref.node > 0 }
 24 | 
 25 | fn @make_small_stack(n: i32) -> SmallStack {
 26 |     fn @(?begin & ?end) make_small_stack_helper(begin: i32, end: i32) -> SmallStack {
 27 |         if begin == end {
 28 |             SmallStack {
 29 |                 write: @ |_, _| (),
 30 |                 read:  @ |_| undef[(i32, f32)]()
 31 |             }
 32 |         } else if begin + 1 == end {
 33 |             let mut val : (i32, f32);
 34 |             SmallStack {
 35 |                 write: @ |i, v| val = v,
 36 |                 read:  @ |i| val
 37 |             }
 38 |         } else {
 39 |             let m = (begin + end) / 2;
 40 |             let left  = make_small_stack_helper(begin, m);
 41 |             let right = make_small_stack_helper(m, end);
 42 |             SmallStack {
 43 |                 write: @ |i, v| if i < m { left.write(i, v) } else { right.write(i, v) },
 44 |                 read:  @ |i|    if i < m { left.read(i)     } else { right.read(i)     }
 45 |             }
 46 |         }
 47 |     }
 48 | 
 49 |     make_small_stack_helper(0, n)
 50 | }
 51 | 
 52 | fn @alloc_stack() -> Stack {
 53 |     let mut nodes : [i32 * 64];
 54 |     let mut tmins : [f32 * 64];
 55 |     let mut node = 0;
 56 |     let mut tmin = flt_max;
 57 |     let mut ptr = -1;
 58 | 
 59 |     let vals_accessor = @ |off| (@ |i| nodes(i + off), @ |i, v| nodes(i + off) = v);
 60 |     let keys_accessor = @ |off| (@ |i| tmins(i + off), @ |i, k| tmins(i + off) = k);
 61 | 
 62 |     Stack {
 63 |         push: @ |n, t| {
 64 |             ptr++;
 65 |             nodes(ptr) = node;
 66 |             tmins(ptr) = tmin;
 67 |             node = n;
 68 |             tmin = t;
 69 |         },
 70 |         push_after: @ |n, t| {
 71 |             ptr++;
 72 |             nodes(ptr) = n;
 73 |             tmins(ptr) = t;
 74 |         },
 75 |         set_top: @ |n, t| {
 76 |             node = n;
 77 |             tmin = t;
 78 |         },
 79 |         sort_n: @ |n, cmp, sorting_network, branchless| {
 80 |             let (read_val, write_val) = vals_accessor(ptr - n + 1);
 81 |             let (read_key, write_key) = keys_accessor(ptr - n + 1);
 82 |             if branchless {
 83 |                 let tmp = make_small_stack(n);
 84 |                 for i in range(0, n) @{
 85 |                     tmp.write(i, (read_val(i), read_key(i)))
 86 |                 }
 87 |                 sorting_network(n, @ |i, j| {
 88 |                     let (v0, k0) = tmp.read(i);
 89 |                     let (v1, k1) = tmp.read(j);
 90 |                     let swp = cmp(k0, k1);
 91 |                     tmp.write(i, select(swp, (v1, k1), (v0, k0)));
 92 |                     tmp.write(j, select(swp, (v0, k0), (v1, k1)));
 93 |                 });
 94 |                 for i in range(0, n) @{
 95 |                     let (v, k) = tmp.read(i);
 96 |                     write_val(i, v);
 97 |                     write_key(i, k);
 98 |                 }
 99 |             } else {
100 |                 sorting_network(n, @ |i, j| {
101 |                     let (k0, k1) = (read_key(i), read_key(j));
102 |                     if cmp(k0, k1) {
103 |                         let (v0, v1) = (read_val(i), read_val(j));
104 |                         write_key(i, k1);
105 |                         write_key(j, k0);
106 |                         write_val(i, v1);
107 |                         write_val(j, v0);
108 |                     }
109 |                 });
110 |             }
111 |         },
112 |         pop: @ || {
113 |             let old = NodeRef { node: node, tmin: tmin };
114 |             node = nodes(ptr);
115 |             tmin = tmins(ptr);
116 |             ptr--;
117 |             old
118 |         },
119 |         top: @ || NodeRef { node: node, tmin: tmin },
120 |         is_empty: @ || node == 0,
121 |         size: @ || ptr
122 |     }
123 | }
124 | 


--------------------------------------------------------------------------------
/testing/cornell_box.mtl:
--------------------------------------------------------------------------------
 1 | # The original Cornell Box in OBJ format.
 2 | # Note that the real box is not a perfect cube, so
 3 | # the faces are imperfect in this data set.
 4 | #
 5 | # Created by Guedis Cardenas and Morgan McGuire at Williams College, 2011
 6 | # Released into the Public Domain.
 7 | #
 8 | # http://graphics.cs.williams.edu/data
 9 | # http://www.graphics.cornell.edu/online/box/data.html
10 | #
11 | 
12 | newmtl leftWall
13 |   Ns 10.0000
14 |   Ni 1.5000
15 |   illum 2
16 |   Ka 0.63 0.065 0.05 # Red
17 |   Kd 0.63 0.065 0.05
18 |   Ks 0 0 0
19 |   Ke 0 0 0
20 | 
21 | 
22 | newmtl rightWall
23 |   Ns 10.0000
24 |   Ni 1.5000
25 |   illum 2
26 |   Ka 0.14 0.45 0.091 # Green
27 |   Kd 0.14 0.45 0.091
28 |   Ks 0 0 0
29 |   Ke 0 0 0
30 | 
31 | 
32 | newmtl floor
33 |   Ns 10.0000
34 |   Ni 1.0000
35 |   illum 2
36 |   Ka 0.725 0.71 0.68 # White
37 |   Kd 0.725 0.71 0.68
38 |   Ks 0 0 0
39 |   Ke 0 0 0
40 | 
41 | 
42 | newmtl ceiling
43 |   Ns 10.0000
44 |   Ni 1.0000
45 |   illum 2
46 |   Ka 0.725 0.71 0.68 # White
47 |   Kd 0.725 0.71 0.68
48 |   Ks 0     0    0
49 |   Ke 0     0    0
50 | 
51 | 
52 | newmtl backWall
53 |   Ns 10.0000
54 |   Ni 1.0000
55 |   illum 2
56 |   Ka 0.725 0.71 0.68 # White
57 |   Kd 0.725 0.71 0.68
58 |   Ks 0 0 0
59 |   Ke 0 0 0
60 | 
61 | 
62 | newmtl shortBox
63 |   Ns 10.0000
64 |   Ni 1.0000
65 |   illum 2
66 |   Ka 0.725 0.71 0.68 # White
67 |   Kd 0.725 0.71 0.68
68 |   Ks 0 0 0
69 |   Ke 0 0 0
70 | 
71 | 
72 | newmtl tallBox
73 |   Ns 10.0000
74 |   Ni 1.0000
75 |   illum 2
76 |   Ka 0.725 0.71 0.68 # White
77 |   Kd 0.725 0.71 0.68
78 |   Ks 0 0 0
79 |   Ke 0 0 0
80 | 
81 | newmtl light
82 |   Ns 10.0000
83 |   Ni 1.0000
84 |   illum 2
85 |   Ka 0.78 0.78 0.78 # White
86 |   Kd 0.78 0.78 0.78
87 |   Ks 0 0 0
88 |   Ke 17 12 4
89 | 


--------------------------------------------------------------------------------
/testing/cornell_box.obj:
--------------------------------------------------------------------------------
  1 | # The original Cornell Box in OBJ format.
  2 | # Note that the real box is not a perfect cube, so
  3 | # the faces are imperfect in this data set.
  4 | #
  5 | # Created by Guedis Cardenas and Morgan McGuire at Williams College, 2011
  6 | # Released into the Public Domain.
  7 | #
  8 | # http://graphics.cs.williams.edu/data
  9 | # http://www.graphics.cornell.edu/online/box/data.html
 10 | #
 11 | 
 12 | mtllib cornell_box.mtl
 13 | 
 14 | ## Object floor
 15 | v  -1.01  0.00   0.99
 16 | v   1.00  0.00   0.99
 17 | v   1.00  0.00  -1.04
 18 | v  -0.99  0.00  -1.04
 19 | 
 20 | g floor
 21 | usemtl floor
 22 | f -4 -3 -2 -1
 23 | 
 24 | ## Object ceiling 
 25 | v  -1.02  1.99   0.99  
 26 | v  -1.02  1.99  -1.04
 27 | v   1.00  1.99  -1.04
 28 | v   1.00  1.99   0.99
 29 | 
 30 | g ceiling
 31 | usemtl ceiling
 32 | f -4 -3 -2 -1
 33 | 
 34 | ## Object backwall  
 35 | v  -0.99  0.00  -1.04 
 36 | v   1.00  0.00  -1.04
 37 | v   1.00  1.99  -1.04
 38 | v  -1.02  1.99  -1.04
 39 | 
 40 | g backWall
 41 | usemtl backWall
 42 | f -4 -3 -2 -1
 43 | 
 44 | ## Object rightwall 
 45 | v	1.00  0.00  -1.04   
 46 | v	1.00  0.00   0.99
 47 | v	1.00  1.99   0.99
 48 | v	1.00  1.99  -1.04
 49 | 
 50 | g rightWall
 51 | usemtl rightWall
 52 | f -4 -3 -2 -1
 53 | 
 54 | ## Object leftWall   
 55 | v  -1.01  0.00   0.99
 56 | v  -0.99  0.00  -1.04
 57 | v  -1.02  1.99  -1.04
 58 | v  -1.02  1.99   0.99
 59 | 
 60 | g leftWall
 61 | usemtl leftWall
 62 | f -4 -3 -2 -1
 63 | 
 64 | ## Object shortBox 
 65 | usemtl shortBox
 66 | 
 67 | # Top Face 
 68 | v	0.53  0.60   0.75 
 69 | v	0.70  0.60   0.17  
 70 | v	0.13  0.60   0.00
 71 | v  -0.05  0.60   0.57
 72 | f -4 -3 -2 -1
 73 | 
 74 | # Left Face 
 75 | v  -0.05  0.00   0.57
 76 | v  -0.05  0.60   0.57
 77 | v   0.13  0.60   0.00 
 78 | v   0.13  0.00   0.00
 79 | f -4 -3 -2 -1
 80 | 
 81 | # Front Face
 82 | v	0.53  0.00   0.75
 83 | v	0.53  0.60   0.75
 84 | v  -0.05  0.60   0.57
 85 | v  -0.05  0.00   0.57
 86 | f -4 -3 -2 -1
 87 | 
 88 | # Right Face
 89 | v	0.70  0.00   0.17
 90 | v	0.70  0.60   0.17
 91 | v	0.53  0.60   0.75
 92 | v	0.53  0.00   0.75
 93 | f -4 -3 -2 -1
 94 | 
 95 | # Back Face
 96 | v	0.13  0.00   0.00
 97 | v	0.13  0.60   0.00
 98 | v	0.70  0.60   0.17
 99 | v	0.70  0.00   0.17
100 | f -4 -3 -2 -1 
101 | 
102 | # Bottom Face
103 | v	0.53  0.00   0.75 
104 | v	0.70  0.00   0.17  
105 | v	0.13  0.00   0.00
106 | v  -0.05  0.00   0.57
107 | f -12 -11 -10 -9
108 | 
109 | g shortBox
110 | usemtl shortBox
111 | 
112 | ## Object tallBox 
113 | usemtl tallBox
114 | 
115 | # Top Face
116 | v	-0.53  1.20   0.09
117 | v	 0.04  1.20  -0.09
118 | v	-0.14  1.20  -0.67
119 | v	-0.71  1.20  -0.49
120 | f -4 -3 -2 -1
121 | 
122 | # Left Face
123 | v	-0.53  0.00   0.09
124 | v	-0.53  1.20   0.09
125 | v	-0.71  1.20  -0.49
126 | v	-0.71  0.00  -0.49
127 | f -4 -3 -2 -1
128 | 
129 | # Back Face
130 | v	-0.71  0.00  -0.49
131 | v	-0.71  1.20  -0.49
132 | v	-0.14  1.20  -0.67
133 | v	-0.14  0.00  -0.67
134 | f -4 -3 -2 -1
135 | 
136 | # Right Face
137 | v	-0.14  0.00  -0.67
138 | v	-0.14  1.20  -0.67
139 | v	 0.04  1.20  -0.09
140 | v	 0.04  0.00  -0.09
141 | f -4 -3 -2 -1
142 | 
143 | # Front Face
144 | v	 0.04  0.00  -0.09
145 | v	 0.04  1.20  -0.09
146 | v	-0.53  1.20   0.09
147 | v	-0.53  0.00   0.09
148 | f -4 -3 -2 -1
149 | 
150 | # Bottom Face
151 | v	-0.53  0.00   0.09
152 | v	 0.04  0.00  -0.09
153 | v	-0.14  0.00  -0.67
154 | v	-0.71  0.00  -0.49
155 | f -8 -7 -6 -5
156 | 
157 | g tallBox
158 | usemtl tallBox
159 | 
160 | ## Object light 
161 | v	-0.24  1.98   0.16
162 | v	-0.24  1.98  -0.22
163 | v	 0.23  1.98  -0.22
164 | v	 0.23  1.98   0.16
165 | 
166 | g light
167 | usemtl light
168 | f -4 -3 -2 -1
169 | 


--------------------------------------------------------------------------------
/testing/ref-cornell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AnyDSL/rodent/f24b885cc9bf0a8b336f4c5de2f12e07a468f798/testing/ref-cornell.png


--------------------------------------------------------------------------------
/testing/ref-primary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AnyDSL/rodent/f24b885cc9bf0a8b336f4c5de2f12e07a468f798/testing/ref-primary.png


--------------------------------------------------------------------------------
/testing/ref-random.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AnyDSL/rodent/f24b885cc9bf0a8b336f4c5de2f12e07a468f798/testing/ref-random.png


--------------------------------------------------------------------------------
/testing/sponza-primary.rays:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AnyDSL/rodent/f24b885cc9bf0a8b336f4c5de2f12e07a468f798/testing/sponza-primary.rays


--------------------------------------------------------------------------------
/testing/sponza-random.rays:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AnyDSL/rodent/f24b885cc9bf0a8b336f4c5de2f12e07a468f798/testing/sponza-random.rays


--------------------------------------------------------------------------------
/testing/sponza.bvh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AnyDSL/rodent/f24b885cc9bf0a8b336f4c5de2f12e07a468f798/testing/sponza.bvh


--------------------------------------------------------------------------------
/tools/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Generate the traversal benchmark utility, and the ../common/traversal.h interface
 2 | add_subdirectory(bench_traversal)
 3 | add_subdirectory(bench_shading)
 4 | add_subdirectory(bench_interface)
 5 | 
 6 | find_package(CUDA QUIET)
 7 | if (CUDA_FOUND)
 8 |     add_subdirectory(bench_aila)
 9 | endif()
10 | 
11 | add_subdirectory(ray_gen)
12 | find_package(PNG QUIET)
13 | if (PNG_FOUND)
14 |     add_subdirectory(fbuf2png)
15 | endif()
16 | 
17 | add_subdirectory(bvh_extractor)
18 | 
19 | if (EMBREE_FOUND)
20 |     add_subdirectory(bench_embree)
21 | endif()
22 | 
23 | if (ImageMagick_FOUND AND PNG_FOUND)
24 |     # Only test the primary rays, as the random rays are often too close
25 |     # to surfaces and often give slightly different results for each algorithm
26 |     add_test(NAME single_bvh4 COMMAND ${CMAKE_COMMAND} -DBENCH_TRAVERSAL=$<TARGET_FILE:bench_traversal> -DFBUF2PNG=$<TARGET_FILE:fbuf2png> -DIM_COMPARE=${ImageMagick_compare_EXECUTABLE} "-DBENCH_TRAVERSAL_ARGS=--single;--bvh-width;4" -DTESTING_DIR=${PROJECT_SOURCE_DIR}/testing -DTRAVERSAL_OUTPUT=single-bvh4-output -P ${PROJECT_SOURCE_DIR}/cmake/test/run_traversal.cmake)
27 |     add_test(NAME packet_bvh4 COMMAND ${CMAKE_COMMAND} -DBENCH_TRAVERSAL=$<TARGET_FILE:bench_traversal> -DFBUF2PNG=$<TARGET_FILE:fbuf2png> -DIM_COMPARE=${ImageMagick_compare_EXECUTABLE} "-DBENCH_TRAVERSAL_ARGS=--packet;--ray-width;4;--bvh-width;4" -DTESTING_DIR=${PROJECT_SOURCE_DIR}/testing -DTRAVERSAL_OUTPUT=packet-bvh4-output -P ${PROJECT_SOURCE_DIR}/cmake/test/run_traversal.cmake)
28 |     add_test(NAME hybrid_bvh4 COMMAND ${CMAKE_COMMAND} -DBENCH_TRAVERSAL=$<TARGET_FILE:bench_traversal> -DFBUF2PNG=$<TARGET_FILE:fbuf2png> -DIM_COMPARE=${ImageMagick_compare_EXECUTABLE} "-DBENCH_TRAVERSAL_ARGS=--ray-width;4;--bvh-width;4" -DTESTING_DIR=${PROJECT_SOURCE_DIR}/testing -DTRAVERSAL_OUTPUT=hybrid-bvh4-output -P ${PROJECT_SOURCE_DIR}/cmake/test/run_traversal.cmake)
29 |     add_test(NAME single_bvh8 COMMAND ${CMAKE_COMMAND} -DBENCH_TRAVERSAL=$<TARGET_FILE:bench_traversal> -DFBUF2PNG=$<TARGET_FILE:fbuf2png> -DIM_COMPARE=${ImageMagick_compare_EXECUTABLE} "-DBENCH_TRAVERSAL_ARGS=--single;--bvh-width;8" -DTESTING_DIR=${PROJECT_SOURCE_DIR}/testing -DTRAVERSAL_OUTPUT=single-bvh8-output -P ${PROJECT_SOURCE_DIR}/cmake/test/run_traversal.cmake)
30 |     add_test(NAME packet_bvh8 COMMAND ${CMAKE_COMMAND} -DBENCH_TRAVERSAL=$<TARGET_FILE:bench_traversal> -DFBUF2PNG=$<TARGET_FILE:fbuf2png> -DIM_COMPARE=${ImageMagick_compare_EXECUTABLE} "-DBENCH_TRAVERSAL_ARGS=--packet;--ray-width;8;--bvh-width;8" -DTESTING_DIR=${PROJECT_SOURCE_DIR}/testing -DTRAVERSAL_OUTPUT=packet-bvh8-output -P ${PROJECT_SOURCE_DIR}/cmake/test/run_traversal.cmake)
31 |     add_test(NAME hybrid_bvh8 COMMAND ${CMAKE_COMMAND} -DBENCH_TRAVERSAL=$<TARGET_FILE:bench_traversal> -DFBUF2PNG=$<TARGET_FILE:fbuf2png> -DIM_COMPARE=${ImageMagick_compare_EXECUTABLE} "-DBENCH_TRAVERSAL_ARGS=--ray-width;8;--bvh-width;8" -DTESTING_DIR=${PROJECT_SOURCE_DIR}/testing -DTRAVERSAL_OUTPUT=hybrid-bvh8-output -P ${PROJECT_SOURCE_DIR}/cmake/test/run_traversal.cmake)
32 | endif()
33 | 


--------------------------------------------------------------------------------
/tools/bench_aila/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(CUDA_HOST_COMPILER ${CMAKE_C_COMPILER} CACHE FILEPATH "Compiler to use with CUDA") 
 2 | cuda_compile(AILA_TRAVERSAL
 3 |     CudaTracerKernels.hpp
 4 |     kepler_dynamic_fetch.cu
 5 |     OPTIONS "-O3;-std=c++11;--expt-extended-lambda;-arch=sm_52;-I${CMAKE_CURRENT_SOURCE_DIR}/../common")
 6 | 
 7 | add_executable(bench_aila bench_aila.cpp ${AILA_TRAVERSAL})
 8 | target_include_directories(bench_aila PUBLIC ../common ../../src)
 9 | target_link_libraries(bench_aila ${CUDA_LIBRARIES} ${AnyDSL_runtime_LIBRARIES})
10 | # Needs the interface file generated by bench_traversal
11 | add_dependencies(bench_aila bench_traversal)
12 | 


--------------------------------------------------------------------------------
/tools/bench_aila/bench_aila.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <fstream>
  3 | #include <chrono>
  4 | #include <numeric>
  5 | #include <algorithm>
  6 | #include <string>
  7 | #include <cstring>
  8 | #include <functional>
  9 | #include <vector>
 10 | 
 11 | #include "traversal.h"
 12 | #include "load_bvh.h"
 13 | #include "load_rays.h"
 14 | 
 15 | void setup_traversal(const Node2* nodes, size_t num_nodes, const Tri1* tris, size_t num_tris);
 16 | void shutdown_traversal();
 17 | void bench_traversal(const Ray1* rays, Hit1* hits, int num_rays, double* timings, int ntimes, bool any);
 18 | 
 19 | inline void check_argument(int i, int argc, char** argv) {
 20 |     if (i + 1 >= argc) {
 21 |         std::cerr << "Missing argument for " << argv[i] << std::endl;
 22 |         exit(1);
 23 |     }
 24 | }
 25 | 
 26 | inline void usage() {
 27 |     std::cout << "Usage: bench_aila [options]\n"
 28 |                  "Available options:\n"
 29 |                  "  -bvh     --bvh-file        Sets the BVH file to use\n"
 30 |                  "  -ray     --ray-file        Sets the ray file to use\n"
 31 |                  "           --tmin            Sets the minimum distance along the rays (default: 0)\n"
 32 |                  "           --tmax            Sets the maximum distance along the rays (default: 1e9)\n"
 33 |                  "           --bench           Sets the number of benchmark iterations (default: 1)\n"
 34 |                  "           --warmup          Sets the number of warmup iterations (default: 0)\n"
 35 |                  "  -any                       Exit at the first intersection (disabled by default)\n"
 36 |                  "  -o       --output          Sets the output file name (no file is generated by default)\n";
 37 | }
 38 | 
 39 | static void transform_nodes(Node2* nodes, size_t size) {
 40 |     for (size_t i = 0; i < size; ++i) {
 41 |         Node2 copy = nodes[i];
 42 |         nodes[i].bounds[4] = copy.bounds[6];
 43 |         nodes[i].bounds[5] = copy.bounds[7];
 44 |         nodes[i].bounds[6] = copy.bounds[8];
 45 |         nodes[i].bounds[7] = copy.bounds[9];
 46 |         nodes[i].bounds[8] = copy.bounds[4];
 47 |         nodes[i].bounds[9] = copy.bounds[5];
 48 |     }
 49 | }
 50 | 
 51 | int main(int argc, char** argv) {
 52 |     std::string ray_file;
 53 |     std::string bvh_file;
 54 |     std::string out_file;
 55 |     float tmin = 0.0f, tmax = 1e9f;
 56 |     int iters = 1;
 57 |     int warmup = 0;
 58 |     bool any_hit = false;
 59 | 
 60 |     for (int i = 1; i < argc; i++) {
 61 |         auto arg = argv[i];
 62 |         if (arg[0] == '-') {
 63 |             if (!strcmp(arg, "-h") || !strcmp(arg, "--help")) {
 64 |                 usage();
 65 |                 return 0;
 66 |             } else if (!strcmp(arg, "-bvh") || !strcmp(arg, "--bvh-file")) {
 67 |                 check_argument(i, argc, argv);
 68 |                 bvh_file = argv[++i];
 69 |             } else if (!strcmp(arg, "-ray") || !strcmp(arg, "--ray-file")) {
 70 |                 check_argument(i, argc, argv);
 71 |                 ray_file = argv[++i];
 72 |             } else if (!strcmp(arg, "--tmin")) {
 73 |                 check_argument(i, argc, argv);
 74 |                 tmin = strtof(argv[++i], nullptr);
 75 |             } else if (!strcmp(arg, "--tmax")) {
 76 |                 check_argument(i, argc, argv);
 77 |                 tmax = strtof(argv[++i], nullptr);
 78 |             } else if (!strcmp(arg, "--bench")) {
 79 |                 check_argument(i, argc, argv);
 80 |                 iters = strtol(argv[++i], nullptr, 10);
 81 |             } else if (!strcmp(arg, "--warmup")) {
 82 |                 check_argument(i, argc, argv);
 83 |                 warmup = strtol(argv[++i], nullptr, 10);
 84 |             } else if (!strcmp(arg, "-any")) {
 85 |                 any_hit = true;
 86 |             } else if (!strcmp(arg, "-o") || !strcmp(arg, "--output")) {
 87 |                 check_argument(i, argc, argv);
 88 |                 out_file = argv[++i];
 89 |             } else {
 90 |                 std::cerr << "Unknown option '" << arg << "'" << std::endl;
 91 |                 return 1;
 92 |             }
 93 |         } else {
 94 |             std::cerr << "Invalid argument '" << arg << "'" << std::endl;
 95 |             return 1;
 96 |         }
 97 |     }
 98 | 
 99 |     if (bvh_file == "") {
100 |         std::cerr << "No BVH file specified" << std::endl;
101 |         return 1;
102 |     }
103 |     if (ray_file == "") {
104 |         std::cerr << "No ray file specified" << std::endl;
105 |         return 1;
106 |     }
107 | 
108 |     anydsl::Array<Node2> nodes;
109 |     anydsl::Array<Tri1>  tris;
110 |     if (!load_bvh(bvh_file, nodes, tris, BvhType::BVH2_TRI1, anydsl::Platform::Host, anydsl::Device(0))) {
111 |         std::cerr << "Cannot load BVH file" << std::endl;
112 |         return 1;
113 |     }
114 | 
115 |     transform_nodes(nodes.data(), nodes.size());
116 | 
117 |     anydsl::Array<Ray1> rays;
118 |     if (!load_rays(ray_file, rays, tmin, tmax, anydsl::Platform::Host, anydsl::Device(0))) {
119 |         std::cerr << "Cannot load rays" << std::endl;
120 |         return 1;
121 |     }
122 | 
123 |     std::vector<Hit1> hits(rays.size());
124 |     std::vector<double> timings(iters);
125 | 
126 |     setup_traversal(nodes.data(), nodes.size(), tris.data(), tris.size());
127 |     bench_traversal(rays.data(), hits.data(), rays.size(), nullptr, warmup, any_hit);
128 |     bench_traversal(rays.data(), hits.data(), rays.size(), timings.data(), iters, any_hit);
129 |     shutdown_traversal();
130 | 
131 |     size_t intr = 0;
132 |     for (auto& hit : hits) intr += (hit.tri_id >= 0);
133 | 
134 |     if (out_file != "") {
135 |         std::ofstream of(out_file, std::ofstream::binary);
136 |         for (auto& hit : hits)
137 |             of.write((char*)&hit.t, sizeof(float));
138 |     }
139 | 
140 |     std::sort(timings.begin(), timings.end());
141 |     auto sum = std::accumulate(timings.begin(), timings.end(), 0.0);
142 |     auto avg = sum / timings.size();
143 |     auto med = timings[timings.size() / 2];
144 |     auto min = *std::min_element(timings.begin(), timings.end());
145 |     std::cout << sum << "ms for " << iters << " iteration(s)" << std::endl;
146 |     std::cout << rays.size() * iters / (1000.0 * sum) << " Mrays/sec" << std::endl;
147 |     std::cout << "# Average: " << avg << " ms" << std::endl;
148 |     std::cout << "# Median: " << med  << " ms" << std::endl;
149 |     std::cout << "# Min: " << min << " ms" << std::endl;
150 |     std::cout << intr << " intersection(s)" << std::endl;
151 |     return 0;
152 | }
153 | 


--------------------------------------------------------------------------------
/tools/bench_embree/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(bench_embree
 2 |     bench_embree.cpp
 3 |     ../../src/driver/obj.h
 4 |     ../../src/driver/obj.cpp)
 5 | target_include_directories(bench_embree PUBLIC ../common ../../src ${EMBREE_ROOT_DIR}/include ${EMBREE_ROOT_DIR} ${EMBREE_LIBRARY_DIR})
 6 | target_compile_definitions(bench_embree PUBLIC ${EMBREE_DEFINITIONS})
 7 | target_link_libraries(bench_embree ${EMBREE_DEPENDENCIES} ${AnyDSL_runtime_LIBRARIES})
 8 | # Needs the interface file generated by bench_traversal
 9 | add_dependencies(bench_embree bench_traversal)
10 | 


--------------------------------------------------------------------------------
/tools/bench_interface/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(INTERFACE_SRCS
 2 |     bench_interface.impala
 3 |     ../../src/render/material.impala
 4 |     ../../src/render/light.impala
 5 |     ../../src/render/image.impala
 6 |     ../../src/core/common.impala
 7 |     ../../src/core/color.impala
 8 |     ../../src/core/random.impala
 9 |     ../../src/core/matrix.impala
10 |     ../../src/core/vector.impala)
11 | 
12 | anydsl_runtime_wrap(INTERFACE_OBJS
13 |     NAME "bench_interface"
14 |     CLANG_FLAGS ${CLANG_FLAGS}
15 |     FILES ${INTERFACE_SRCS}
16 |     INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../common/interface)
17 | 
18 | add_executable(bench_interface
19 |     ${INTERFACE_OBJS}
20 |     bench_interface.cpp
21 |     ${CMAKE_CURRENT_SOURCE_DIR}/../common/interface.h)
22 | target_include_directories(bench_interface PUBLIC ../common ../../src/driver)
23 | target_link_libraries(bench_interface ${AnyDSL_runtime_LIBRARIES})
24 | 


--------------------------------------------------------------------------------
/tools/bench_interface/bench_interface.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <chrono>
  3 | #include <cstdint>
  4 | #include <vector>
  5 | #include <random>
  6 | #include <limits>
  7 | #include <memory>
  8 | #include <algorithm>
  9 | 
 10 | #if defined(__x86_64__) || defined(__amd64__) || defined(_M_X64)
 11 | #include <x86intrin.h>
 12 | #endif
 13 | 
 14 | #include <anydsl_runtime.hpp>
 15 | 
 16 | #include "float2.h"
 17 | #include "float3.h"
 18 | #include "interface.h"
 19 | 
 20 | #define BENCH_CUDA
 21 | 
 22 | #ifdef BENCH_CUDA
 23 | // Some external functions will refer to those symbols when compiled on the GPU
 24 | extern "C" float __nv_fminf(float a, float b) { return fminf(a, b); }
 25 | extern "C" float __nv_fmaxf(float a, float b) { return fmaxf(a, b); }
 26 | extern "C" float __nv_sqrtf(float x) { return sqrtf(x); }
 27 | extern "C" float __nv_floorf(float x) { return floorf(x); }
 28 | #endif
 29 | 
 30 | template <typename T>
 31 | void fill(anydsl::Array<T>& array, T val) {
 32 |     anydsl::Array<T> copy(array.size());
 33 |     std::fill(copy.begin(), copy.end(), val);
 34 |     anydsl::copy(copy, array);
 35 | }
 36 | 
 37 | template <typename T>
 38 | void set(anydsl::Array<T>& array, const std::vector<T>& vals) {
 39 |     anydsl::Array<T> host_array(vals.size());
 40 |     std::copy(vals.begin(), vals.end(), host_array.begin());
 41 |     copy(host_array, array);
 42 | }
 43 | 
 44 | int main(int argc, char** argv) {
 45 | #if defined(__x86_64__) || defined(__amd64__) || defined(_M_X64)
 46 |     _mm_setcsr(_mm_getcsr() | (_MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON));
 47 | #endif
 48 | 
 49 | #ifdef BENCH_CUDA
 50 |     auto plt = anydsl::Platform::Cuda;
 51 |     auto dev = anydsl::Device(0);
 52 | #else
 53 |     auto plt = anydsl::Platform::Host;
 54 |     auto dev = anydsl::Device(0);
 55 | #endif
 56 | 
 57 |     auto num_vertices = 4;
 58 |     auto num_triangles = 2;
 59 | 
 60 |     anydsl::Array<float3>  vertices (plt, dev, num_vertices);
 61 |     anydsl::Array<float3>  normals  (plt, dev, num_vertices);
 62 |     anydsl::Array<float2>  texcoords(plt, dev, num_vertices);
 63 |     anydsl::Array<int32_t> indices  (plt, dev, num_triangles * 4);
 64 | 
 65 |     // Create a quad
 66 |     set(vertices, {
 67 |         float3(-1.0f,  1.0f, 0.0f),
 68 |         float3(-1.0f, -1.0f, 0.0f),
 69 |         float3( 1.0f, -1.0f, 0.0f),
 70 |         float3( 1.0f,  1.0f, 0.0f)
 71 |     });
 72 |     set(normals, {
 73 |         float3(0.0f, 0.0f, 1.0f),
 74 |         float3(0.0f, 0.0f, 1.0f),
 75 |         float3(0.0f, 0.0f, 1.0f),
 76 |         float3(0.0f, 0.0f, 1.0f)
 77 |     });
 78 |     set(texcoords, {
 79 |         float2(-1.0f,  1.0f),
 80 |         float2(-1.0f, -1.0f),
 81 |         float2( 1.0f, -1.0f),
 82 |         float2( 1.0f,  1.0f)
 83 |     });
 84 |     set(indices, {
 85 |         0,
 86 |         1,
 87 |         2,
 88 |         -1,
 89 | 
 90 |         2,
 91 |         3,
 92 |         0,
 93 |         -1
 94 |     });
 95 | 
 96 |     int width  = 1024;
 97 |     int height = 1024;
 98 |     anydsl::Array<Color> pixels_kd(plt, dev, width * height);
 99 |     anydsl::Array<Color> pixels_ks(plt, dev, width * height);
100 |     anydsl::Array<Color> pixels_ns(plt, dev, width * height);
101 | 
102 |     fill(pixels_kd, Color { 0.1f, 0.2f, 0.3f });
103 |     fill(pixels_ks, Color { 1.0f, 0.5f, 0.1f });
104 |     fill(pixels_ns, Color { 0.1f, 0.5f, 1.0f });
105 | 
106 |     Tex tex_kd {
107 |         pixels_kd.data(),
108 |         Color { 0.0f, 0.0f, 0.0f },
109 |         0,
110 |         1,
111 |         width,
112 |         height
113 |     };
114 | 
115 |     Tex tex_ks {
116 |         pixels_ks.data(),
117 |         Color { 0.5f, 1.0f, 0.2f },
118 |         2,
119 |         0,
120 |         width,
121 |         height
122 |     };
123 | 
124 |     Tex tex_ns {
125 |         pixels_ns.data(),
126 |         Color { 0.0f, 0.0f, 0.0f },
127 |         1,
128 |         1,
129 |         width,
130 |         height
131 |     };
132 | 
133 |     ShadedMesh mesh {
134 |         reinterpret_cast<Vec3*>(vertices.data()),
135 |         reinterpret_cast<unsigned int*>(indices.data()),
136 |         reinterpret_cast<Vec3*>(normals.data()),
137 |         reinterpret_cast<Vec2*>(texcoords.data()),
138 |         tex_kd,
139 |         tex_ks,
140 |         tex_ns
141 |     };
142 | 
143 |     size_t N = 1024*1024;
144 |     anydsl::Array<Vec3> host_in_dirs(N);
145 |     anydsl::Array<Vec3> host_out_dirs(N);
146 |     anydsl::Array<TriHit> host_tri_hits(N);
147 | 
148 |     uint32_t seed = 42;
149 |     std::mt19937 gen(seed);
150 |     std::uniform_real_distribution<float> rnd(0.0f, 1.0f);
151 |     for (size_t i = 0; i < N; ++i) {
152 |         host_tri_hits[i].id = i % 2;
153 |         host_tri_hits[i].uv.x = rnd(gen);
154 |         host_tri_hits[i].uv.y = rnd(gen);
155 | 
156 |         auto in  = normalize(float3(rnd(gen), rnd(gen), rnd(gen)));
157 |         auto out = normalize(float3(rnd(gen), rnd(gen), rnd(gen)));
158 | 
159 |         host_in_dirs[i].x = in.x;
160 |         host_in_dirs[i].y = in.y;
161 |         host_in_dirs[i].z = in.z;
162 | 
163 |         host_out_dirs[i].x = out.x;
164 |         host_out_dirs[i].y = out.y;
165 |         host_out_dirs[i].z = out.z;
166 |     }
167 |     anydsl::Array<Color>  colors  (plt, dev, N);
168 |     anydsl::Array<Vec3>   in_dirs (plt, dev, N);
169 |     anydsl::Array<Vec3>   out_dirs(plt, dev, N);
170 |     anydsl::Array<TriHit> tri_hits(plt, dev, N);
171 |     anydsl::copy(host_in_dirs,  in_dirs);
172 |     anydsl::copy(host_out_dirs, out_dirs);
173 |     anydsl::copy(host_tri_hits, tri_hits);
174 | 
175 | #ifdef BENCH_CUDA
176 |     size_t iters = 1000;
177 | #else
178 |     size_t iters = 100;
179 | #endif
180 |     std::vector<double> times;
181 |     for (size_t i = 0; i < iters; ++i) {
182 |         auto t0 = std::chrono::high_resolution_clock::now();
183 |         bench_interface(&mesh, tri_hits.data(), in_dirs.data(), out_dirs.data(), colors.data(), N);
184 |         auto t1 = std::chrono::high_resolution_clock::now();
185 |         times.push_back(std::chrono::duration_cast<std::chrono::microseconds>(t1 - t0).count());
186 |     }
187 |     std::sort(times.begin(), times.end());
188 |     std::cout << N / times[iters/2] << " Mrays/s" << std::endl;
189 | 
190 |     return 0;
191 | }
192 | 


--------------------------------------------------------------------------------
/tools/bench_interface/bench_interface.impala:
--------------------------------------------------------------------------------
  1 | static border_clamp    = 0u32;
  2 | static border_repeat   = 1u32;
  3 | static border_constant = 2u32;
  4 | 
  5 | static sampler_nearest  = 0u32;
  6 | static sampler_bilinear = 1u32;
  7 | 
  8 | struct Tex {
  9 |     pixels:  &[Color],
 10 |     border_color: Color,
 11 |     border:  u32,
 12 |     sampler: u32,
 13 |     width:   i32,
 14 |     height:  i32
 15 | }
 16 | 
 17 | struct ShadedMesh {
 18 |     vertices:  &[Vec3],
 19 |     indices:   &[u32],
 20 |     normals:   &[Vec3],
 21 |     texcoords: &[Vec2],
 22 | 
 23 |     tex_kd:    Tex,
 24 |     tex_ks:    Tex,
 25 |     tex_ns:    Tex
 26 | }
 27 | 
 28 | struct TriHit {
 29 |     id: i32,
 30 |     uv: Vec2    
 31 | }
 32 | 
 33 | struct ShaderInput {
 34 |     point:       Vec3,
 35 |     face_normal: Vec3,
 36 |     normal:      Vec3,
 37 |     uv_coords:   Vec2,
 38 |     local:       Mat3x3,
 39 |     kd:          Color,
 40 |     ks:          Color,
 41 |     ns:          f32
 42 | }
 43 | 
 44 | static iterate = gpu_iterate;
 45 | static math = nvvm_intrinsics;
 46 | static opt_interface = false;
 47 | static opt_tex = false;
 48 | 
 49 | fn cpu_iterate(n: i32, body: fn (i32) -> ()) -> () {
 50 |     let num_cores = 0; // autodetect
 51 |     for i in parallel(num_cores, 0, n) {
 52 |         body(i);
 53 |     }
 54 | }
 55 | 
 56 | fn gpu_iterate(n: i32, body: fn (i32) -> ()) -> () {
 57 |     let dev = 0;
 58 |     let grid  = (n, 1, 1);
 59 |     let block = (64, 1, 1);
 60 |     let acc   = nvvm_accelerator(dev);
 61 |     for work_item in acc.exec(grid, block) {
 62 |         body(work_item.gidx());
 63 |     }
 64 |     acc.sync();
 65 | }
 66 | 
 67 | extern fn @(opt) lookup_tex(opt: bool, tex: Tex, mut uv: Vec2) -> Color {
 68 |     if tex.border == border_clamp {
 69 |         let border = make_clamp_border();
 70 |         uv.x = border.horz(math, uv.x);
 71 |         uv.y = border.vert(math, uv.y);
 72 |     } else if tex.border == border_repeat {
 73 |         let border = make_repeat_border();
 74 |         uv.x = border.horz(math, uv.x);
 75 |         uv.y = border.vert(math, uv.y);
 76 |     } else /* if tex.border == border_constant */ {
 77 |         if uv.x < 0.0f || uv.x > 1.0f ||
 78 |            uv.y < 0.0f || uv.y > 1.0f {
 79 |             return(tex.border_color)
 80 |         }
 81 |     }
 82 |     let image = make_image(|x, y| tex.pixels(x + y * tex.width), tex.width, tex.height);
 83 |     if tex.sampler == sampler_nearest {
 84 |         let filter = make_nearest_filter();
 85 |         filter(math, image, uv)
 86 |     } else /* if tex.sampler == sampler_bilinear */ {
 87 |         let filter = make_bilinear_filter();
 88 |         filter(math, image, uv)
 89 |     }
 90 | }
 91 | 
 92 | extern fn @(opt) compute_shader_input(opt: bool, mesh: ShadedMesh, tri_hit: TriHit) -> ShaderInput {
 93 |     let i0 = mesh.indices(tri_hit.id * 4 + 0);
 94 |     let i1 = mesh.indices(tri_hit.id * 4 + 1);
 95 |     let i2 = mesh.indices(tri_hit.id * 4 + 2);
 96 |     let v0 = mesh.vertices(i0);
 97 |     let v1 = mesh.vertices(i1);
 98 |     let v2 = mesh.vertices(i2);
 99 |     let uv = tri_hit.uv;
100 | 
101 |     let point       = vec3_lerp2(v0, v1, v2, uv.x, uv.y);
102 |     let face_normal = vec3_normalize(math, vec3_cross(vec3_sub(v1, v0), vec3_sub(v2, v0)));
103 |     let normal      = vec3_normalize(math, vec3_lerp2(mesh.normals(i0), mesh.normals(i1), mesh.normals(i2), uv.x, uv.y));
104 |     let texcoord    = vec2_lerp2(mesh.texcoords(i0), mesh.texcoords(i1), mesh.texcoords(i2), uv.x, uv.y);
105 | 
106 |     let kd = lookup_tex(opt_tex, mesh.tex_kd, texcoord);
107 |     let ks = lookup_tex(opt_tex, mesh.tex_ks, texcoord);
108 |     let ns = lookup_tex(opt_tex, mesh.tex_ns, texcoord).r;
109 | 
110 |     let local = make_orthonormal_mat3x3(normal);
111 | 
112 |     ShaderInput {
113 |         point:       point,
114 |         face_normal: face_normal,
115 |         normal:      normal,
116 |         uv_coords:   uv,
117 |         local:       local,
118 |         kd:          kd,
119 |         ks:          ks,
120 |         ns:          ns
121 |     }
122 | }
123 | 
124 | extern fn @(opt) shade(opt: bool, input: ShaderInput, in_dir: Vec3, out_dir: Vec3) -> Color {
125 |     let surf = SurfaceElement {
126 |         is_entering: true,
127 |         point:       input.point,
128 |         face_normal: input.face_normal,
129 |         uv_coords:   input.uv_coords,
130 |         attr:        @ |_| make_vec4(0.0f, 0.0f, 0.0f, 0.0f),
131 |         local:       input.local
132 |     };
133 |     let bsdf = make_diffuse_bsdf(math, surf, input.kd);
134 |     bsdf.eval(in_dir, out_dir)
135 | }
136 | 
137 | extern fn bench_interface(mesh_ptr: &ShadedMesh, tri_hits: &[TriHit], in_dirs: &[Vec3], out_dirs: &[Vec3], colors: &mut [Color], n: i32) -> () {
138 |     let mesh = *mesh_ptr;
139 |     for i in iterate(n) {
140 |         let input = compute_shader_input(opt_interface, mesh, tri_hits(i));
141 |         colors(i) = shade(opt_interface, input, in_dirs(i), out_dirs(i));
142 |     }
143 | }
144 | 


--------------------------------------------------------------------------------
/tools/bench_shading/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(SHADING_SRCS
 2 |     bench_shading.impala
 3 |     ../../src/render/mapping_cpu.impala
 4 |     ../../src/render/material.impala
 5 |     ../../src/render/geometry.impala
 6 |     ../../src/render/light.impala
 7 |     ../../src/render/image.impala
 8 |     ../../src/render/camera.impala
 9 |     ../../src/render/scene.impala
10 |     ../../src/render/renderer.impala
11 |     ../../src/render/driver.impala
12 |     ../../src/traversal/intersection.impala
13 |     ../../src/traversal/mapping_cpu.impala
14 |     ../../src/traversal/mapping_gpu.impala
15 |     ../../src/traversal/stack.impala
16 |     ../../src/core/common.impala
17 |     ../../src/core/cpu_common.impala
18 |     ../../src/core/sort.impala
19 |     ../../src/core/color.impala
20 |     ../../src/core/random.impala
21 |     ../../src/core/matrix.impala
22 |     ../../src/core/vector.impala)
23 | 
24 | anydsl_runtime_wrap(SHADING_OBJS
25 |     NAME "bench_shading"
26 |     CLANG_FLAGS ${CLANG_FLAGS}
27 |     FILES ${SHADING_SRCS}
28 |     INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../common/shading)
29 | 
30 | add_executable(bench_shading
31 |     ${SHADING_OBJS}
32 |     bench_shading.cpp
33 |     ${CMAKE_CURRENT_SOURCE_DIR}/../common/shading.h)
34 | target_include_directories(bench_shading PUBLIC ../common ../../src/driver)
35 | target_link_libraries(bench_shading ${AnyDSL_runtime_LIBRARIES})
36 | 


--------------------------------------------------------------------------------
/tools/bench_shading/bench_shading.impala:
--------------------------------------------------------------------------------
  1 | static vector_width = 8;
  2 | static num_geoms = 4;
  3 | static offset = 0.0001f;
  4 | static sorted = true;
  5 | static specialized = true;
  6 | 
  7 | fn @iterate_rays(primary: &PrimaryStream, begins: &[i32], ends: &[i32], num_geoms: i32, body: fn(i32, i32) -> ()) -> () {
  8 |     if ?num_geoms & sorted & specialized {
  9 |         for geom_id in unroll(0, num_geoms) {
 10 |             let (begin, end) = (begins(geom_id), ends(geom_id));
 11 |             for i, vector_width in vectorized_range(vector_width, begin, end) {
 12 |                 @@body(i, geom_id);
 13 |             }
 14 |         }
 15 |     } else {
 16 |         for i, vector_width in vectorized_range(vector_width, begins(0), ends(num_geoms - 1)) {
 17 |             @@body(i, primary.geom_id(i))
 18 |         }
 19 |     }
 20 | }
 21 | 
 22 | extern fn cpu_bench_shading( primary_in: &PrimaryStream
 23 |                            , primary_out: &PrimaryStream
 24 |                            , vertices: &[Vec3]
 25 |                            , normals: &[Vec3]
 26 |                            , face_normals: &[Vec3]
 27 |                            , texcoords: &[Vec2]
 28 |                            , indices: &[i32]
 29 |                            , pixels: &[u32]
 30 |                            , width: i32
 31 |                            , height: i32
 32 |                            , begins: &[i32]
 33 |                            , ends: &[i32]
 34 |                            , num_tris: i32
 35 |                            , num_iters: i32) -> () {
 36 |     let read_primary_ray    = make_ray_stream_reader(primary_in.rays, 1);
 37 |     let read_primary_hit    = make_primary_stream_hit_reader(*primary_in, 1);
 38 |     let read_primary_state  = make_primary_stream_state_reader(*primary_in, 1);
 39 |     let write_primary_ray   = make_ray_stream_writer(primary_out.rays, 1);
 40 |     let write_primary_state = make_primary_stream_state_writer(*primary_out, 1);
 41 | 
 42 |     let math = cpu_intrinsics;
 43 | 
 44 |     let image = make_image_rgba32(@ |i, j| pixels(j * width + i), width, height);
 45 | 
 46 |     fn @shader(geom_id: i32, surf: SurfaceElement) -> Material {
 47 |         let texture = make_texture(math, make_repeat_border(), make_bilinear_filter(), image);
 48 |         let kd = if geom_id & 1 == 0 {
 49 |             make_color(0.0f, 1.0f, 0.0f)
 50 |         } else {
 51 |             texture(vec4_to_2(surf.attr(0)))
 52 |         };
 53 |         let diffuse = make_diffuse_bsdf(math, surf, kd);
 54 |         let (ks, ns) = if geom_id & 2 == 0 {
 55 |             (make_color(0.0f, 1.0f, 0.0f), 96.0f)
 56 |         } else {
 57 |             (texture(vec4_to_2(surf.attr(0))), 12.0f)
 58 |         };
 59 |         let specular = make_phong_bsdf(math, surf, ks, ns);
 60 |         let lum_ks = color_luminance(ks);
 61 |         let lum_kd = color_luminance(kd);
 62 |         let k = select(lum_ks + lum_kd == 0.0f, 0.0f, lum_ks / (lum_ks + lum_kd));
 63 |         let bsdf = make_mix_bsdf(diffuse, specular, k);
 64 |         make_material(bsdf)
 65 |     }
 66 | 
 67 |     let tri_mesh = TriMesh {
 68 |         vertices:     @ |i| vertices(i),
 69 |         normals:      @ |i| normals(i),
 70 |         face_normals: @ |i| face_normals(i),
 71 |         triangles:    @ |i| (indices(i * 4 + 0), indices(i * 4 + 1), indices(i * 4 + 2)),
 72 |         attrs:        @ |_| (false, @ |i| vec2_to_4(texcoords(i), 0.0f, 0.0f)),
 73 |         num_attrs:    1,
 74 |         num_tris:     num_tris
 75 |     };
 76 | 
 77 |     for iter in range(0, num_iters) {
 78 |         for i, geom_id in iterate_rays(primary_in, begins, ends, num_geoms) {
 79 |             let ray       = read_primary_ray(i, 0);
 80 |             let hit       = read_primary_hit(i, 0);
 81 |             let mut state = read_primary_state(i, 0);
 82 | 
 83 |             let geom = make_tri_mesh_geometry(math, tri_mesh, @ |_, _, surf| shader(geom_id, surf));
 84 |             let surf = geom.surface_element(ray, hit);
 85 |             let mat  = geom.shader(ray, hit, surf);
 86 | 
 87 |             let out_dir = vec3_neg(ray.dir);
 88 |             let sample = mat.bsdf.sample(&mut state.rnd, out_dir, false);
 89 | 
 90 |             let contrib = color_mulf(color_mul(state.contrib, sample.color), sample.cos / sample.pdf);
 91 |             let mis = if mat.bsdf.is_specular { 0.0f } else { 1.0f / sample.pdf };
 92 |             let new_ray = make_ray(surf.point, sample.in_dir, offset, flt_max);
 93 |             let new_state = make_ray_state(
 94 |                 state.rnd,
 95 |                 contrib,
 96 |                 mis,
 97 |                 state.depth + 1
 98 |             );
 99 | 
100 |             write_primary_ray(i, 0, new_ray);
101 |             write_primary_state(i, 0, new_state);
102 |         }
103 |     }
104 | }
105 | 


--------------------------------------------------------------------------------
/tools/bench_traversal/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(TRAVERSAL_SRCS
 2 |     bench_traversal.impala
 3 |     ../../src/traversal/intersection.impala
 4 |     ../../src/traversal/stack.impala
 5 |     ../../src/traversal/mapping_cpu.impala
 6 |     ../../src/traversal/mapping_gpu.impala
 7 |     ../../src/core/common.impala
 8 |     ../../src/core/cpu_common.impala
 9 |     ../../src/core/sort.impala
10 |     ../../src/core/matrix.impala
11 |     ../../src/core/vector.impala)
12 | 
13 | anydsl_runtime_wrap(TRAVERSAL_OBJS
14 |     NAME "bench_traversal"
15 |     CLANG_FLAGS ${CLANG_FLAGS}
16 |     FILES ${TRAVERSAL_SRCS}
17 |     INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../common/traversal)
18 | 
19 | add_executable(bench_traversal
20 |     ${TRAVERSAL_OBJS}
21 |     bench_traversal.cpp
22 |     ${CMAKE_CURRENT_SOURCE_DIR}/../common/traversal.h)
23 | target_include_directories(bench_traversal PUBLIC ../common)
24 | target_link_libraries(bench_traversal ${AnyDSL_runtime_LIBRARIES})
25 | if (EXISTS ${CMAKE_CURRENT_BINARY_DIR}/bench_traversal.nvvm.bc)
26 |     add_custom_command(TARGET bench_traversal POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/bench_traversal.nvvm.bc ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
27 | endif()
28 | 


--------------------------------------------------------------------------------
/tools/bvh_extractor/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(EXTRACTOR_SRCS
 2 |     bvh_extractor.cpp
 3 |     extract_bvh2.cpp
 4 |     ../../src/driver/obj.cpp
 5 |     ../../src/driver/obj.h
 6 |     ../../src/driver/bvh.h)
 7 | 
 8 | if (EMBREE_FOUND)
 9 |     set(EXTRACTOR_SRCS ${EXTRACTOR_SRCS}
10 |     extract_bvh4_8.cpp
11 |     ../../src/driver/embree_bvh.h)
12 | endif ()
13 | 
14 | add_executable(bvh_extractor ${EXTRACTOR_SRCS})
15 | target_include_directories(bvh_extractor PUBLIC ../common ../../src)
16 | 
17 | if (EMBREE_FOUND)
18 |     target_include_directories(bvh_extractor PUBLIC ${EMBREE_ROOT_DIR}/include ${EMBREE_ROOT_DIR} ${EMBREE_LIBRARY_DIR})
19 |     target_compile_definitions(bvh_extractor PUBLIC ${EMBREE_DEFINITIONS})
20 |     target_link_libraries(bvh_extractor ${EMBREE_DEPENDENCIES})
21 |     target_compile_definitions(bvh_extractor PUBLIC -DENABLE_EMBREE_BVH)
22 | endif ()
23 | 
24 | # Needs the interface file generated by bench_traversal
25 | add_dependencies(bvh_extractor bench_traversal)
26 | 


--------------------------------------------------------------------------------
/tools/bvh_extractor/bvh_extractor.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <fstream>
  3 | #include <vector>
  4 | #include <cstring>
  5 | 
  6 | #include "driver/obj.h"
  7 | #include "driver/file_path.h"
  8 | #include "driver/bvh.h"
  9 | 
 10 | #ifdef ENABLE_EMBREE_BVH
 11 | size_t build_bvh8(std::ofstream&, const obj::TriMesh&);
 12 | size_t build_bvh4(std::ofstream&, const obj::TriMesh&);
 13 | #endif
 14 | size_t build_bvh2(std::ofstream&, const obj::TriMesh&);
 15 | 
 16 | inline void check_argument(int i, int argc, char** argv) {
 17 |     if (i + 1 >= argc) {
 18 |         std::cerr << "Missing argument for " << argv[i] << std::endl;
 19 |         exit(1);
 20 |     }
 21 | }
 22 | 
 23 | inline void usage() {
 24 |     std::cout << "Usage: bvh_extractor [options]\n"
 25 |                  "Available options:\n"
 26 |                  "  -obj     --obj-file        Sets the OBJ file to use\n"
 27 |                  "  -o       --output          Sets the output file name\n";
 28 | }
 29 | 
 30 | int main(int argc, char** argv) {
 31 |     std::string obj_file, out_file;
 32 |     for (int i = 1; i < argc; i++) {
 33 |         auto arg = argv[i];
 34 |         if (arg[0] == '-') {
 35 |             if (!strcmp(arg, "-h") || !strcmp(arg, "--help")) {
 36 |                 usage();
 37 |                 return 0;
 38 |             } else if (!strcmp(arg, "-obj") || !strcmp(arg, "--obj-file")) {
 39 |                 check_argument(i, argc, argv);
 40 |                 obj_file = argv[++i];
 41 |             } else if (!strcmp(arg, "-o") || !strcmp(arg, "--output")) {
 42 |                 check_argument(i, argc, argv);
 43 |                 out_file = argv[++i];
 44 |             } else {
 45 |                 std::cerr << "Unknown option '" << arg << "'" << std::endl;
 46 |                 return 1;
 47 |             }
 48 |         } else {
 49 |             std::cerr << "Invalid argument '" << arg << "'" << std::endl;
 50 |             return 1;
 51 |         }
 52 |     }
 53 | 
 54 |     if (obj_file == "") {
 55 |         std::cerr << "No OBJ file specified" << std::endl;
 56 |         return 1;
 57 |     }
 58 |     if (out_file == "") {
 59 |         std::cerr << "No output file specified" << std::endl;
 60 |         return 1;
 61 |     }
 62 | 
 63 |     FilePath path(obj_file);
 64 |     obj::File obj;
 65 |     if (!load_obj(obj_file, obj)) {
 66 |         std::cerr << "Cannot load OBJ file" << std::endl;
 67 |         return 1;
 68 |     }
 69 |     obj::TriMesh tri_mesh = compute_tri_mesh(obj, 0);
 70 | 
 71 |     std::cout << "Loaded OBJ file with " << tri_mesh.indices.size() / 4 << " triangle(s)" << std::endl;
 72 | 
 73 |     std::ofstream out(out_file, std::ofstream::binary);
 74 |     if (!out) {
 75 |         std::cerr << "Cannot create output file" << std::endl;
 76 |         return 1;
 77 |     }
 78 | 
 79 |     uint32_t magic = 0x95CBED1F;
 80 |     out.write((char*)&magic, sizeof(uint32_t));
 81 | 
 82 | #ifdef ENABLE_EMBREE_BVH
 83 |     auto bvh8_nodes = build_bvh8(out, tri_mesh);
 84 |     if (!bvh8_nodes) {
 85 |         std::cerr << "Cannot build a BVH8 using Embree" << std::endl;
 86 |         return 1;
 87 |     }
 88 | 
 89 |     std::cout << "BVH8 successfully built (" << bvh8_nodes << " nodes)" << std::endl;
 90 | 
 91 |     auto bvh4_nodes = build_bvh4(out, tri_mesh);
 92 |     if (!bvh4_nodes) {
 93 |         std::cerr << "Cannot build a BVH4 using Embree" << std::endl;
 94 |         return 1;
 95 |     }
 96 | 
 97 |     std::cout << "BVH4 successfully built (" << bvh4_nodes << " nodes)" << std::endl;
 98 | #else
 99 |     std::cout << "Compiled without Embree. Will only build a GPU BVH." << std::endl;
100 | #endif
101 | 
102 |     auto bvh2_nodes = build_bvh2(out, tri_mesh);
103 |     if (!bvh2_nodes) {
104 |         std::cerr << "Cannot build a BVH2" << std::endl;
105 |         return 1;
106 |     }
107 | 
108 |     std::cout << "BVH2 successfully built (" << bvh2_nodes << " nodes)" << std::endl;
109 | 
110 |     return 0;
111 | }
112 | 


--------------------------------------------------------------------------------
/tools/bvh_extractor/extract_bvh2.cpp:
--------------------------------------------------------------------------------
  1 | #include <fstream>
  2 | 
  3 | #include "traversal.h"
  4 | #include "driver/bvh.h"
  5 | #include "driver/obj.h"
  6 | 
  7 | class Bvh2Builder {
  8 | public:
  9 |     Bvh2Builder(std::vector<Node2>& nodes, std::vector<Tri1>& tris)
 10 |         : nodes_(nodes), tris_(tris)
 11 |     {}
 12 | 
 13 |     void build(const std::vector<Tri>& tris) {
 14 |         builder_.build(tris, NodeWriter(this), LeafWriter(this, tris), 2);
 15 |     }
 16 | 
 17 | #ifdef STATISTICS
 18 |     void print_stats() const { builder_.print_stats(); }
 19 | #endif
 20 | 
 21 | private:
 22 |     struct CostFn {
 23 |         static float leaf_cost(int count, float area) {
 24 |             return count * area;
 25 |         }
 26 |         static float traversal_cost(float area) {
 27 |             return area * 1.0f;
 28 |         }
 29 |     };
 30 | 
 31 |     struct NodeWriter {
 32 |         Bvh2Builder* builder;
 33 | 
 34 |         NodeWriter(Bvh2Builder* builder)
 35 |             : builder(builder)
 36 |         {}
 37 | 
 38 |         template <typename BBoxFn>
 39 |         int operator() (int parent, int child, const BBox& parent_bb, int count, BBoxFn bboxes) {
 40 |             auto& nodes = builder->nodes_;
 41 | 
 42 |             int i = nodes.size();
 43 |             nodes.emplace_back();
 44 | 
 45 |             if (parent >= 0 && child >= 0)
 46 |                 nodes[parent].child[child] = i + 1;
 47 | 
 48 |             assert(count == 2);
 49 | 
 50 |             const BBox& bbox1 = bboxes(0);
 51 |             nodes[i].bounds[0] = bbox1.min.x;
 52 |             nodes[i].bounds[2] = bbox1.min.y;
 53 |             nodes[i].bounds[4] = bbox1.min.z;
 54 |             nodes[i].bounds[1] = bbox1.max.x;
 55 |             nodes[i].bounds[3] = bbox1.max.y;
 56 |             nodes[i].bounds[5] = bbox1.max.z;
 57 | 
 58 |             const BBox& bbox2 = bboxes(1);
 59 |             nodes[i].bounds[ 6] = bbox2.min.x;
 60 |             nodes[i].bounds[ 8] = bbox2.min.y;
 61 |             nodes[i].bounds[10] = bbox2.min.z;
 62 |             nodes[i].bounds[ 7] = bbox2.max.x;
 63 |             nodes[i].bounds[ 9] = bbox2.max.y;
 64 |             nodes[i].bounds[11] = bbox2.max.z;
 65 | 
 66 |             return i;
 67 |         }
 68 |     };
 69 | 
 70 |     struct LeafWriter {
 71 |         Bvh2Builder* builder;
 72 |         const std::vector<Tri>& ref_tris;
 73 | 
 74 |         LeafWriter(Bvh2Builder* builder, const std::vector<Tri>& ref_tris)
 75 |             : builder(builder), ref_tris(ref_tris)
 76 |         {}
 77 | 
 78 |         template <typename RefFn>
 79 |         void operator() (int parent, int child, const BBox& leaf_bb, int ref_count, RefFn refs) {
 80 |             auto& nodes = builder->nodes_;
 81 |             auto& tris  = builder->tris_;
 82 | 
 83 |             nodes[parent].child[child] = ~tris.size();
 84 | 
 85 |             for (int i = 0; i < ref_count; i++) {
 86 |                 const int ref = refs(i);
 87 |                 const Tri& tri = ref_tris[ref];
 88 |                 auto e1 = tri.v0 - tri.v1;
 89 |                 auto e2 = tri.v2 - tri.v0;
 90 |                 auto n = cross(e1, e2);
 91 |                 tris.emplace_back(Tri1 {
 92 |                     { tri.v0.x, tri.v0.y, tri.v0.z}, 0,
 93 |                     { e1.x, e1.y, e1.z}, 0,
 94 |                     { e2.x, e2.y, e2.z}, ref
 95 |                 });
 96 |             }
 97 | 
 98 |             // Add sentinel
 99 |             tris.back().prim_id |= 0x80000000;
100 |         }
101 |     };
102 | 
103 |     SplitBvhBuilder<2, CostFn> builder_;
104 |     std::vector<Node2>& nodes_;
105 |     std::vector<Tri1>& tris_;
106 | };
107 | 
108 | size_t build_bvh2(std::ofstream& out, const obj::TriMesh& tri_mesh) {
109 |     std::vector<Tri> tris;
110 |     for (size_t i = 0; i < tri_mesh.indices.size(); i += 4) {
111 |         auto& v0 = tri_mesh.vertices[tri_mesh.indices[i + 0]];
112 |         auto& v1 = tri_mesh.vertices[tri_mesh.indices[i + 1]];
113 |         auto& v2 = tri_mesh.vertices[tri_mesh.indices[i + 2]];
114 |         tris.emplace_back(v0, v1, v2);
115 |     }
116 | 
117 |     std::vector<Node2> new_nodes;
118 |     std::vector<Tri1>  new_tris;
119 |     Bvh2Builder builder(new_nodes, new_tris);
120 | 
121 |     builder.build(tris);
122 | 
123 |     uint64_t offset = sizeof(uint32_t) * 3 +
124 |         sizeof(Node2) * new_nodes.size() +
125 |         sizeof(Tri1)  * new_tris.size();
126 |     uint32_t block_type = 1;
127 |     uint32_t num_nodes = new_nodes.size();
128 |     uint32_t num_tris  = new_tris.size();
129 | 
130 |     out.write((char*)&offset,     sizeof(uint64_t));
131 |     out.write((char*)&block_type, sizeof(uint32_t));
132 |     out.write((char*)&num_nodes,  sizeof(uint32_t));
133 |     out.write((char*)&num_tris,   sizeof(uint32_t));
134 |     out.write((char*)new_nodes.data(), sizeof(Node2) * new_nodes.size());
135 |     out.write((char*)new_tris.data(),  sizeof(Tri1)  * new_tris.size());
136 | 
137 |     return new_nodes.size();
138 | }
139 | 


--------------------------------------------------------------------------------
/tools/bvh_extractor/extract_bvh4_8.cpp:
--------------------------------------------------------------------------------
 1 | #include <fstream>
 2 | #include <limits>
 3 | 
 4 | #include "traversal.h"
 5 | #include "load_bvh.h"
 6 | #include "driver/embree_bvh.h"
 7 | #include "driver/obj.h"
 8 | 
 9 | template <size_t N, typename BvhNode, typename BvhTri>
10 | void write_embree_bvh(std::ofstream& out, const std::vector<BvhNode>& nodes, const std::vector<BvhTri>& tris) {
11 |     uint64_t offset = sizeof(uint32_t) * 3 +
12 |         sizeof(BvhNode) * nodes.size() +
13 |         sizeof(BvhTri)  * tris.size();
14 |     uint32_t block_type = uint32_t(N == 4 ? BvhType::BVH4_TRI4 : BvhType::BVH8_TRI4);
15 |     uint32_t num_nodes = nodes.size();
16 |     uint32_t num_tris  = tris.size();
17 | 
18 |     out.write((char*)&offset,      sizeof(uint64_t));
19 |     out.write((char*)&block_type,  sizeof(uint32_t));
20 |     out.write((char*)&num_nodes,   sizeof(uint32_t));
21 |     out.write((char*)&num_tris,    sizeof(uint32_t));
22 |     out.write((char*)nodes.data(), sizeof(BvhNode) * nodes.size());
23 |     out.write((char*)tris.data(),  sizeof(BvhTri)  * tris.size());
24 | }
25 | 
26 | size_t build_bvh4(std::ofstream& out, const obj::TriMesh& tri_mesh) {
27 |     std::vector<Node4> nodes;
28 |     std::vector<Tri4> tris;
29 |     if (!build_embree_bvh<4>(tri_mesh, nodes, tris))
30 |         return 0;
31 |     write_embree_bvh<4>(out, nodes, tris);
32 |     return nodes.size();
33 | }
34 | 
35 | size_t build_bvh8(std::ofstream& out, const obj::TriMesh& tri_mesh) {
36 |     std::vector<Node8> nodes;
37 |     std::vector<Tri4> tris;
38 |     if (!build_embree_bvh<8>(tri_mesh, nodes, tris))
39 |         return 0;
40 |     write_embree_bvh<8>(out, nodes, tris);
41 |     return nodes.size();
42 | }
43 | 


--------------------------------------------------------------------------------
/tools/common/load_bvh.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOAD_BVH_H
 2 | #define LOAD_BVH_H
 3 | 
 4 | #include <fstream>
 5 | #include <anydsl_runtime.hpp>
 6 | #include "traversal.h"
 7 | 
 8 | enum class BvhType : uint32_t {
 9 |     BVH2_TRI1 = 1,
10 |     BVH4_TRI4 = 2,
11 |     BVH8_TRI4 = 3
12 | };
13 | 
14 | namespace detail {
15 | 
16 | struct BvhHeader {
17 |     uint32_t node_count;
18 |     uint32_t tri_count;
19 | };
20 | 
21 | inline bool check_header(std::istream& is) {
22 |     uint32_t magic;
23 |     is.read((char*)&magic, sizeof(uint32_t));
24 |     return magic == 0x95CBED1F;
25 | }
26 | 
27 | inline bool locate_block(std::istream& is, BvhType type) {
28 |     uint32_t block_type;
29 |     uint64_t offset = 0;
30 |     do {
31 |         is.seekg(offset, std::istream::cur);
32 | 
33 |         is.read((char*)&offset, sizeof(uint64_t));
34 |         if (is.gcount() != sizeof(std::uint64_t)) return false;
35 |         is.read((char*)&block_type, sizeof(uint32_t));
36 |         if (is.gcount() != sizeof(uint32_t)) return false;
37 | 
38 |         offset -= sizeof(uint32_t);
39 |     } while (!is.eof() && block_type != (uint32_t)type);
40 | 
41 |     return static_cast<bool>(is);
42 | }
43 | 
44 | } // namespace detail
45 | 
46 | template <typename Node, typename Tri>
47 | inline bool load_bvh(const std::string& filename,
48 |                      anydsl::Array<Node>& nodes,
49 |                      anydsl::Array<Tri>& tris,
50 |                      BvhType bvh_type,
51 |                      anydsl::Platform platform,
52 |                      anydsl::Device device) {
53 |     std::ifstream in(filename, std::ifstream::binary);
54 |     if (!in || !detail::check_header(in) || !detail::locate_block(in, bvh_type))
55 |         return false;
56 | 
57 |     detail::BvhHeader header;
58 |     in.read((char*)&header, sizeof(detail::BvhHeader));
59 |     auto host_nodes = std::move(anydsl::Array<Node>(header.node_count));
60 |     auto host_tris  = std::move(anydsl::Array<Tri >(header.tri_count ));
61 |     in.read((char*)host_nodes.data(), sizeof(Node) * header.node_count);
62 |     in.read((char*)host_tris.data(),  sizeof(Tri)  * header.tri_count);
63 | 
64 |     if (platform != anydsl::Platform::Host) {
65 |         nodes = std::move(anydsl::Array<Node>(platform, device, header.node_count));
66 |         tris  = std::move(anydsl::Array<Tri >(platform, device, header.tri_count ));
67 |         anydsl::copy(host_nodes, nodes);
68 |         anydsl::copy(host_tris,  tris);
69 |     } else {
70 |         nodes = std::move(host_nodes);
71 |         tris  = std::move(host_tris);
72 |     }
73 |     return true;
74 | }
75 | 
76 | #endif // LOAD_BVH_H
77 | 


--------------------------------------------------------------------------------
/tools/common/load_rays.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOAD_RAYS_H
 2 | #define LOAD_RAYS_H
 3 | 
 4 | #include <fstream>
 5 | #include <anydsl_runtime.hpp>
 6 | 
 7 | template <typename Ray>
 8 | struct RayTraits {};
 9 | 
10 | struct Ray1;
11 | template <>
12 | struct RayTraits<Ray1> {
13 |     enum { RayPerPacket = 1 };
14 |     static void write_ray(const float* org_dir, float tmin, float tmax, int /*j*/, Ray1& ray) {
15 |         ray.org[0] = org_dir[0];
16 |         ray.org[1] = org_dir[1];
17 |         ray.org[2] = org_dir[2];
18 |         ray.dir[0] = org_dir[3];
19 |         ray.dir[1] = org_dir[4];
20 |         ray.dir[2] = org_dir[5];
21 |         ray.tmin = tmin;
22 |         ray.tmax = tmax;
23 |     }
24 | };
25 | 
26 | struct Ray4;
27 | template <>
28 | struct RayTraits<Ray4> {
29 |     enum { RayPerPacket = 4 };
30 |     static void write_ray(const float* org_dir, float tmin, float tmax, int j, Ray4& ray) {
31 |         ray.org[0][j] = org_dir[0];
32 |         ray.org[1][j] = org_dir[1];
33 |         ray.org[2][j] = org_dir[2];
34 |         ray.dir[0][j] = org_dir[3];
35 |         ray.dir[1][j] = org_dir[4];
36 |         ray.dir[2][j] = org_dir[5];
37 |         ray.tmin[j] = tmin;
38 |         ray.tmax[j] = tmax;
39 |     }
40 | };
41 | 
42 | struct Ray8;
43 | template <>
44 | struct RayTraits<Ray8> {
45 |     enum { RayPerPacket = 8 };
46 |     static void write_ray(const float* org_dir, float tmin, float tmax, int j, Ray8& ray) {
47 |         ray.org[0][j] = org_dir[0];
48 |         ray.org[1][j] = org_dir[1];
49 |         ray.org[2][j] = org_dir[2];
50 |         ray.dir[0][j] = org_dir[3];
51 |         ray.dir[1][j] = org_dir[4];
52 |         ray.dir[2][j] = org_dir[5];
53 |         ray.tmin[j] = tmin;
54 |         ray.tmax[j] = tmax;
55 |     }
56 | };
57 | 
58 | template <typename Ray>
59 | inline bool load_rays(const std::string& filename,
60 |                       anydsl::Array<Ray>& rays,
61 |                       float tmin, float tmax,
62 |                       anydsl::Platform platform,
63 |                       anydsl::Device device) {
64 |     std::ifstream in(filename, std::ifstream::binary);
65 |     if (!in) return false;
66 | 
67 |     in.seekg(0, std::ios_base::end);
68 |     auto size = in.tellg();
69 |     in.seekg(0, std::ios_base::beg);
70 | 
71 |     if (size % (sizeof(float) * 6) != 0) return false;
72 | 
73 |     auto rays_per_packet = RayTraits<Ray>::RayPerPacket;
74 |     auto ray_count = size / (rays_per_packet * sizeof(float) * 6);
75 |     anydsl::Array<Ray> host_rays(ray_count);
76 | 
77 |     for (size_t i = 0; i < ray_count; i++) {
78 |         for (int j = 0; j < rays_per_packet; j++) {
79 |             float org_dir[6];
80 |             in.read((char*)org_dir, sizeof(float) * 6);
81 |             RayTraits<Ray>::write_ray(org_dir, tmin, tmax, j, host_rays[i]);
82 |         }
83 |     }
84 | 
85 |     if (platform != anydsl::Platform::Host) {
86 |         rays = std::move(anydsl::Array<Ray>(platform, device, ray_count));
87 |         anydsl::copy(host_rays, rays);
88 |     } else {
89 |         rays = std::move(host_rays);
90 |     }
91 |     return true;
92 | }
93 | 
94 | #endif // LOAD_RAYS_H
95 | 


--------------------------------------------------------------------------------
/tools/fbuf2png/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_executable(fbuf2png fbuf2png.cpp)
2 | target_include_directories(fbuf2png PUBLIC ${PNG_INCLUDE_DIRS})
3 | target_link_libraries(fbuf2png ${PNG_LIBRARIES})
4 | 


--------------------------------------------------------------------------------
/tools/fbuf2png/fbuf2png.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <fstream>
  3 | #include <vector>
  4 | #include <algorithm>
  5 | #include <png.h>
  6 | #include <cstring>
  7 | 
  8 | static void write_to_stream(png_structp png_ptr, png_bytep data, png_size_t length) {
  9 |     png_voidp a = png_get_io_ptr(png_ptr);
 10 |     ((std::ostream*)a)->write((const char*)data, length);
 11 | }
 12 | 
 13 | static void flush_stream(png_structp png_ptr) {
 14 |     // Nothing to do
 15 | }
 16 | 
 17 | inline void check_argument(int i, int argc, char** argv) {
 18 |     if (i + 1 >= argc) {
 19 |         std::cerr << "Missing argument for " << argv[i] << std::endl;
 20 |         exit(1);
 21 |     }
 22 | }
 23 | 
 24 | inline void usage() {
 25 |     std::cout << "Usage: fbuf2png [options] input output\n"
 26 |                  "Available options:\n"
 27 |                  "  -sx      --width        Sets the width of the image (default: 1024)\n"
 28 |                  "  -sy      --height       Sets the height of the image (default: 1024)\n"
 29 |                  "  -n       --normalize    Normalizes the values contained in the image (disabled by default)\n";
 30 | }
 31 | 
 32 | int main(int argc, char** argv) {
 33 |     bool normalize = false;
 34 |     int width = 1024;
 35 |     int height = 1024;
 36 |     std::vector<std::string> files;
 37 | 
 38 |     for (int i = 1; i < argc; i++) {
 39 |         auto arg = argv[i];
 40 |         if (arg[0] == '-') {
 41 |             if (!strcmp(arg, "-h") || !strcmp(arg, "--help")) {
 42 |                 usage();
 43 |                 return 0;
 44 |             } else if (!strcmp(arg, "-n") || !strcmp(arg, "--normalize")) {
 45 |                 normalize = true;
 46 |             } else if (!strcmp(arg, "-sx") || !strcmp(arg, "--width")) {
 47 |                 check_argument(i, argc, argv);
 48 |                 width = strtol(argv[++i], nullptr, 10);
 49 |             } else if (!strcmp(arg, "-sy") || !strcmp(arg, "--height")) {
 50 |                 check_argument(i, argc, argv);
 51 |                 height = strtol(argv[++i], nullptr, 10);
 52 |             } else {
 53 |                 std::cerr << "Unknown option '" << arg << "'" << std::endl;
 54 |                 return 1;
 55 |             }
 56 |         } else {
 57 |             files.push_back(argv[i]);
 58 |         }
 59 |     }
 60 | 
 61 |     if (files.size() < 2) {
 62 |         std::cerr << "Missing input or output file" << std::endl;
 63 |         return 1;
 64 |     }
 65 |     if (files.size() > 2) {
 66 |         std::cerr << "Too many arguments" << std::endl;
 67 |         return 1;
 68 |     }
 69 | 
 70 |     std::ifstream fbuf_file(files[0], std::ofstream::binary);
 71 |     std::ofstream png_file(files[1], std::ofstream::binary);
 72 |     if (!fbuf_file || !png_file)
 73 |         return 1;
 74 | 
 75 |     // Read fbuf file and convert it to an image
 76 |     std::vector<float> float_image(width * height);
 77 |     if (!fbuf_file.read((char*)float_image.data(), width * height * sizeof(float))) {
 78 |         std::cerr << "Not enough data in the float buffer" << std::endl;
 79 |         return 1;
 80 |     }
 81 |     const float tmax = normalize ? *std::max_element(float_image.begin(), float_image.end()) : 1.0f;
 82 | 
 83 |     png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
 84 |     if (!png_ptr) {
 85 |         return 1;
 86 |     }
 87 | 
 88 |     png_infop info_ptr = png_create_info_struct(png_ptr);
 89 |     if (!info_ptr) {
 90 |         png_destroy_read_struct(&png_ptr, nullptr, nullptr);
 91 |         return 1;
 92 |     }
 93 | 
 94 |     std::vector<uint8_t> row(width * 4);
 95 |     if (setjmp(png_jmpbuf(png_ptr))) {
 96 |         png_destroy_write_struct(&png_ptr, &info_ptr);
 97 |         return 1;
 98 |     }
 99 | 
100 |     png_set_write_fn(png_ptr, &png_file, write_to_stream, flush_stream);
101 | 
102 |     png_set_IHDR(png_ptr, info_ptr, width, height,
103 |                  8, PNG_COLOR_TYPE_RGBA, PNG_INTERLACE_NONE,
104 |                  PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE);
105 | 
106 |     png_write_info(png_ptr, info_ptr);
107 | 
108 |     for (int y = 0; y < height; y++) {
109 |         for (int x = 0; x < width; x++) {
110 |             uint8_t c = 255.0f * float_image[y * width + x] / tmax;
111 |             row[x * 4 + 0] = c;
112 |             row[x * 4 + 1] = c;
113 |             row[x * 4 + 2] = c;
114 |             row[x * 4 + 3] = 255.0f;
115 |         }
116 |         png_write_row(png_ptr, row.data());
117 |     }
118 | 
119 |     png_write_end(png_ptr, info_ptr);
120 |     png_destroy_write_struct(&png_ptr, &info_ptr);
121 | 
122 |     return 0;
123 | }
124 | 


--------------------------------------------------------------------------------
/tools/ray_gen/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_executable(ray_gen ray_gen.cpp)
2 | target_include_directories(ray_gen PUBLIC ../common ../../src)
3 | target_link_libraries(ray_gen PUBLIC ${AnyDSL_runtime_LIBRARIES})
4 | # Needs the interface file generated by bench_traversal
5 | add_dependencies(ray_gen bench_traversal)
6 | 


--------------------------------------------------------------------------------