├── .gitignore ├── CMakeLists.txt ├── LICENSE.txt ├── README.md ├── benchmarks ├── bench.sh ├── benchmark.py ├── benchmark_aila.py ├── fix_results_arm.py ├── gen_formulas.py ├── gen_table.py ├── profile.sh ├── results.txt ├── results_aila.txt ├── results_arm_par.txt ├── results_par.txt └── scenes │ └── generate.sh ├── cmake ├── modules │ ├── FindLZ4.cmake │ └── FindSDL2.cmake └── test │ ├── run_rodent.cmake │ └── run_traversal.cmake ├── refs ├── CMakeLists.txt ├── cmake │ └── modules │ │ ├── FindEmbree.cmake │ │ ├── FindISPC.cmake │ │ ├── FindOptiX.cmake │ │ ├── FindSDL2.cmake │ │ └── FindTBB.cmake └── src │ ├── CMakeLists.txt │ ├── bench_shading.cpp │ ├── bench_shading.h │ ├── bench_shading.ispc │ ├── color.h │ ├── common.h │ ├── common.isph │ ├── embree_path_tracer.cpp │ ├── embree_path_tracer.h │ ├── embree_path_tracer.ispc │ ├── file_path.h │ ├── float2.h │ ├── float3.h │ ├── float4.h │ ├── image.cpp │ ├── image.h │ ├── math.isph │ ├── obj.cpp │ ├── obj.h │ ├── optix_path_tracer.cpp │ ├── optix_path_tracer.cu │ └── optix_path_tracer.h ├── src ├── CMakeLists.txt ├── core │ ├── color.impala │ ├── common.impala │ ├── cpu_common.impala │ ├── matrix.impala │ ├── random.impala │ ├── sort.impala │ └── vector.impala ├── driver │ ├── bbox.h │ ├── buffer.h │ ├── bvh.h │ ├── color.h │ ├── common.h │ ├── converter.cpp │ ├── driver.cpp │ ├── embree_bvh.h │ ├── file_path.h │ ├── float2.h │ ├── float3.h │ ├── float4.h │ ├── image.cpp │ ├── image.h │ ├── interface.cpp │ ├── obj.cpp │ ├── obj.h │ └── tri.h ├── dummy_main.impala ├── render │ ├── camera.impala │ ├── driver.impala │ ├── geometry.impala │ ├── image.impala │ ├── light.impala │ ├── mapping_cpu.impala │ ├── mapping_gpu.impala │ ├── material.impala │ ├── renderer.impala │ └── scene.impala └── traversal │ ├── intersection.impala │ ├── mapping_cpu.impala │ ├── mapping_gpu.impala │ └── stack.impala ├── testing ├── cornell_box.mtl ├── cornell_box.obj ├── ref-cornell.png ├── ref-primary.png ├── ref-random.png ├── sponza-primary.rays ├── sponza-random.rays └── sponza.bvh └── tools ├── CMakeLists.txt ├── bench_aila ├── CMakeLists.txt ├── CudaTracerKernels.hpp ├── bench_aila.cpp └── kepler_dynamic_fetch.cu ├── bench_embree ├── CMakeLists.txt └── bench_embree.cpp ├── bench_interface ├── CMakeLists.txt ├── bench_interface.cpp └── bench_interface.impala ├── bench_shading ├── CMakeLists.txt ├── bench_shading.cpp └── bench_shading.impala ├── bench_traversal ├── CMakeLists.txt ├── bench_traversal.cpp └── bench_traversal.impala ├── bvh_extractor ├── CMakeLists.txt ├── bvh_extractor.cpp ├── extract_bvh2.cpp └── extract_bvh4_8.cpp ├── common ├── load_bvh.h └── load_rays.h ├── fbuf2png ├── CMakeLists.txt └── fbuf2png.cpp └── ray_gen ├── CMakeLists.txt └── ray_gen.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | # Auto generated interfaces 2 | tools/common/traversal.h 3 | tools/common/shading.h 4 | src/driver/interface.h 5 | 6 | # Compiled Object files 7 | *.slo 8 | *.lo 9 | *.o 10 | 11 | # Configuration file 12 | *.conf 13 | 14 | # Float buffers 15 | *.fbuf 16 | 17 | # Image files 18 | *.png 19 | 20 | # Precompiled Headers 21 | *.gch 22 | *.pch 23 | 24 | # Compiled Dynamic libraries 25 | *.so 26 | *.dylib 27 | *.dll 28 | 29 | # Fortran module files 30 | *.mod 31 | 32 | # Compiled Static libraries 33 | *.lai 34 | *.la 35 | *.a 36 | *.lib 37 | 38 | # Executables 39 | *.exe 40 | *.out 41 | *.app 42 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(rodent) 2 | 3 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) 4 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) 5 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) 6 | 7 | cmake_minimum_required(VERSION 3.1) 8 | 9 | find_package(AnyDSL_runtime REQUIRED) 10 | include_directories(${AnyDSL_runtime_INCLUDE_DIRS}) 11 | 12 | set(CLANG_FLAGS -O3 -march=native -ffast-math CACHE STRING "Clang compilation options") 13 | 14 | set(CMAKE_CXX_STANDARD 14) 15 | 16 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules) 17 | 18 | set(COLOR_TTY_AVAILABLE TRUE) 19 | if (WIN32) 20 | # By default, Windows console does not support ANSI escape codes 21 | set(COLOR_TTY_AVAILABLE FALSE) 22 | endif () 23 | set(COLORIZE ${COLOR_TTY_AVAILABLE} CACHE BOOL "Set to TRUE to enable colorized output. Requires an ANSI compliant terminal.") 24 | 25 | # Try to find Embree 26 | find_path(EMBREE_ROOT_DIR include/embree3/rtcore.h DOC "Embree source code directory") 27 | find_path(EMBREE_LIBRARY_DIR 28 | NAMES 29 | embree_sse42.dll 30 | embree_avx.dll 31 | embree_avx2.dll 32 | libembree_sse42.so 33 | libembree_avx.so 34 | libembree_avx2.so 35 | libembree_sse42.a 36 | libembree_avx.a 37 | libembree_avx2.a 38 | HINTS ${EMBREE_ROOT_DIR}/build 39 | DOC "Embree library directory") 40 | find_path(EMBREE_CMAKE_DIR 41 | NAMES 42 | embree-config.cmake 43 | HINTS ${EMBREE_LIBRARY_DIR} 44 | DOC "Embree CMake directory") 45 | 46 | if (NOT ${EMBREE_ROOT_DIR} STREQUAL "EMBREE_ROOT_DIR-NOTFOUND" AND 47 | NOT ${EMBREE_LIBRARY_DIR} STREQUAL "EMBREE_LIBRARY_DIR-NOTFOUND" AND 48 | NOT ${EMBREE_CMAKE_DIR} STREQUAL "EMBREE_CMAKE_DIR-NOTFOUND") 49 | set(EMBREE_VERSION 3) 50 | find_library(EMBREE3_SHARED_LIBRARY NAMES embree3.dll libembree3.so HINTS ${EMBREE_LIBRARY_DIR} DOC "Embree library") 51 | if (EMBREE3_SHARED_LIBRARY) 52 | set(EMBREE_SHARED_LIBRARY ${EMBREE3_SHARED_LIBRARY}) 53 | else () 54 | set(EMBREE_VERSION 2) 55 | find_library(EMBREE2_SHARED_LIBRARY NAMES embree.dll libembree.so HINTS ${EMBREE_LIBRARY_DIR} DOC "Embree library") 56 | set(EMBREE_SHARED_LIBRARY ${EMBREE2_SHARED_LIBRARY}) 57 | endif() 58 | add_definitions(-DEMBREE_VERSION=${EMBREE_VERSION}) 59 | 60 | find_library(EMBREE_SSE42_LIBRARY NAMES embree_sse42.lib libembree_sse42.a HINTS ${EMBREE_LIBRARY_DIR} DOC "Embree SSE42 library") 61 | find_library(EMBREE_AVX_LIBRARY NAMES embree_avx.lib libembree_avx.a HINTS ${EMBREE_LIBRARY_DIR} DOC "Embree AVX library") 62 | find_library(EMBREE_AVX2_LIBRARY NAMES embree_avx2.lib libembree_avx2.a HINTS ${EMBREE_LIBRARY_DIR} DOC "Embree AVX2 library") 63 | find_library(EMBREE_SIMD_LIBRARY NAMES simd.lib libsimd.a HINTS ${EMBREE_LIBRARY_DIR} DOC "Embree SIMD library") 64 | find_library(EMBREE_TASKING_LIBRARY NAMES tasking.lib libtasking.a HINTS ${EMBREE_LIBRARY_DIR} DOC "Embree tasking library") 65 | find_library(EMBREE_SYS_LIBRARY NAMES sys.lib libsys.a HINTS ${EMBREE_LIBRARY_DIR} DOC "Embree system library") 66 | 67 | find_library(PTHREAD_LIBRARY NAMES pthread.dll libpthread.so HINTS /usr/lib /usr/lib64) 68 | find_library(TBB_LIBRARY NAMES tbb.dll libtbb.so HINTS /usr/lib /usr/lib64) 69 | find_library(TBB_MALLOC_LIBRARY NAMES tbbmalloc.dll libtbbmalloc.so HINTS /usr/lib /usr/lib64) 70 | 71 | set(EMBREE_DEPENDENCIES 72 | ${EMBREE_SHARED_LIBRARY} 73 | ${EMBREE_SSE42_LIBRARY} 74 | ${EMBREE_AVX_LIBRARY} 75 | ${EMBREE_AVX2_LIBRARY} 76 | ${EMBREE_TASKING_LIBRARY} 77 | ${EMBREE_SYS_LIBRARY} 78 | ${EMBREE_SIMD_LIBRARY}) 79 | 80 | if (NOT PTHREAD_LIBRARY STREQUAL "PTHREAD_LIBRARY-NOTFOUND") 81 | # pthreads is not required on Windows 82 | set(EMBREE_DEPENDENCIES ${EMBREE_DEPENDENCIES} ${PTHREAD_LIBRARY}) 83 | endif() 84 | 85 | function(get_embree_tasking_system defs libs) 86 | include(${EMBREE_CMAKE_DIR}/embree-config.cmake) 87 | set(${defs} "" PARENT_SCOPE) 88 | set(${libs} "" PARENT_SCOPE) 89 | if (EMBREE_TASKING_SYSTEM STREQUAL "TBB") 90 | set(${defs} -DTASKING_TBB PARENT_SCOPE) 91 | set(${libs} ${TBB_LIBRARY} ${TBB_MALLOC_LIBRARY} PARENT_SCOPE) 92 | elseif (EMBREE_TASKING_SYSTEM STREQUAL "PPL") 93 | set(${defs} -DTASKING_PPL PARENT_SCOPE) 94 | else () 95 | set(${defs} -DTASKING_INTERNAL PARENT_SCOPE) 96 | endif() 97 | endfunction() 98 | 99 | get_embree_tasking_system(EMBREE_DEFINITIONS EMBREE_TASKING_DEPENDENCIES) 100 | set(EMBREE_DEPENDENCIES ${EMBREE_DEPENDENCIES} ${EMBREE_TASKING_DEPENDENCIES}) 101 | 102 | message(STATUS "Embree found") 103 | set(EMBREE_FOUND ON) 104 | endif() 105 | 106 | # For tests 107 | include(CTest) 108 | find_package(ImageMagick COMPONENTS compare QUIET) 109 | 110 | add_subdirectory(src) 111 | add_subdirectory(tools) 112 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2019 Arsène Pérard-Gayot 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Rodent 2 | 3 | Rodent is a BVH traversal library and renderer implemented using the AnyDSL compiler framework (https://anydsl.github.io/). 4 | 5 | # Building 6 | 7 | The dependencies are: CMake, AnyDSL, libpng, SDL2, and optionally the Embree sources for the benchmarking tools. 8 | Once the dependencies are installed, use the following commands to build the project: 9 | 10 | mkdir build 11 | cd build 12 | # Set the OBJ file to use with the SCENE_FILE variable 13 | # By default, SCENE_FILE=../testing/cornell_box.obj 14 | cmake .. -DSCENE_FILE=myfile.obj 15 | # Optional: Create benchmarking tools for Embree and BVH extractor tools 16 | # cmake .. -DEMBREE_ROOT_DIR= 17 | make 18 | 19 | # Testing 20 | 21 | This section assumes that the current directory is the build directory. To run rodent, just type: 22 | 23 | bin/rodent 24 | 25 | You may want to change the initial camera parameters using the command line options `--eye`, `--dir` and `--up`. Run `bin/rodent --help` to get a full list of options. 26 | 27 | When ImageMagick is found by Cmake, use the following commands to test the traversal code with the provided test scene: 28 | 29 | make test 30 | 31 | This will only test the primary ray distribution with the packet, single, and hybrid variants. 32 | To test all possible combinations, or if you do not have ImageMagick installed, use the benchmarking tool directly: 33 | 34 | bin/bench_traversal -bvh ../testing/sponza.bvh -ray ../testing/sponza-primary.rays --bench 50 --warmup 10 --tmax 5000 -o output-hybrid-primary.fbuf 35 | bin/bench_traversal -bvh ../testing/sponza.bvh -ray ../testing/sponza-primary.rays --bench 50 --warmup 10 --tmax 5000 -s -o output-single-primary.fbuf 36 | bin/bench_traversal -bvh ../testing/sponza.bvh -ray ../testing/sponza-random.rays --bench 50 --warmup 10 --tmax 1 -o output-hybrid-random.fbuf 37 | bin/bench_traversal -bvh ../testing/sponza.bvh -ray ../testing/sponza-random.rays --bench 50 --warmup 10 --tmax 1 -s -o output-single-random.fbuf 38 | bin/fbuf2png -n output-hybrid-primary.fbuf output-hybrid-primary.png 39 | bin/fbuf2png -n output-single-primary.fbuf output-single-primary.png 40 | bin/fbuf2png -n output-hybrid-random.fbuf output-hybrid-random.png 41 | bin/fbuf2png -n output-single-random.fbuf output-single-random.png 42 | 43 | This will run the traversal on the test set, and generate images as a result. For the primary ray distribution, the _hybrid_ and _single_ variants should generate the same images. The reference images for primary and random rays are in the `testing` directory. 44 | 45 | Running `bin/bench_traversal --help` will provide a list of additional options. 46 | -------------------------------------------------------------------------------- /benchmarks/bench.sh: -------------------------------------------------------------------------------- 1 | # Requires the following variables: 2 | # - ANYDSL_DIR: Path to the AnyDSL installation directory 3 | # - EMBREE_ROOT_DIR: Path to the root of the Embree sources 4 | # - SCENES_DIR: Path to the scenes directory 5 | 6 | mkdir -p renderers 7 | cd renderers 8 | 9 | # Number of benchmarking iterations 10 | ITER=20 11 | 12 | # Set to OFF to disable fusion for megakernel mappings (will be slower) 13 | FUSION=ON 14 | 15 | # Paths to Embree and the AnyDSL runtime 16 | ANYDSL_RUNTIME_DIR=$ANYDSL_DIR/runtime/build/share/anydsl/cmake 17 | 18 | # Paths to scene files 19 | LIVING_ROOM_SCENE=$SCENES_DIR/living_room/living_room.obj 20 | BATHROOM_SCENE=$SCENES_DIR/salle_de_bain/salle_de_bain.obj 21 | BEDROOM_SCENE=$SCENES_DIR/bedroom/bedroom.obj 22 | DINING_ROOM_SCENE=$SCENES_DIR/dining_room/dining_room.obj 23 | KITCHEN_SCENE=$SCENES_DIR/kitchen/kitchen.obj 24 | STAIRCASE_SCENE=$SCENES_DIR/wooden_staircase/wooden_staircase.obj 25 | 26 | # Enable more NVPTX opts (currently disabled because these options are detrimental to perf.) 27 | #if [ "$2" == "nvvm-megakernel" -o "$2" == "nvvm-streaming" -o "$2" == "nvvm" ]; then 28 | # export ANYDSL_LLVM_ARGS="-nvptx-f32ftz -nvptx-prec-divf32=0 -nvptx-prec-sqrtf32=0 -nvptx-sched4reg" 29 | #fi 30 | 31 | # The compiler may need a large stack space 32 | ulimit -s 65536 33 | 34 | echo "Benchmarking device $1 on platform $2" 35 | 36 | BENCH_COMPILATION=false 37 | echo "Building..." 38 | if $BENCH_COMPILATION ; then 39 | mkdir -p living_room && cd living_room && cmake ../../.. -DEMBREE_ROOT_DIR=${EMBREE_ROOT_DIR} -DAnyDSL_runtime_DIR=${ANYDSL_RUNTIME_DIR} -DCMAKE_BUILD_TYPE=Release -DMAX_PATH_LEN=20 -DSPP=4 -DTARGET_DEVICE=$1 -DTARGET_PLATFORM=$2 -DSCENE_FILE=${LIVING_ROOM_SCENE} -DDISABLE_GUI=ON -DMEGAKERNEL_FUSION=$FUSION && make -j convert driver && time make -j rodent && cd .. && 40 | mkdir -p bathroom && cd bathroom && cmake ../../.. -DEMBREE_ROOT_DIR=${EMBREE_ROOT_DIR} -DAnyDSL_runtime_DIR=${ANYDSL_RUNTIME_DIR} -DCMAKE_BUILD_TYPE=Release -DMAX_PATH_LEN=20 -DSPP=4 -DTARGET_DEVICE=$1 -DTARGET_PLATFORM=$2 -DSCENE_FILE=${BATHROOM_SCENE} -DDISABLE_GUI=ON -DMEGAKERNEL_FUSION=$FUSION && make -j convert driver && time make -j rodent && cd .. && 41 | mkdir -p bedroom && cd bedroom && cmake ../../.. -DEMBREE_ROOT_DIR=${EMBREE_ROOT_DIR} -DAnyDSL_runtime_DIR=${ANYDSL_RUNTIME_DIR} -DCMAKE_BUILD_TYPE=Release -DMAX_PATH_LEN=20 -DSPP=4 -DTARGET_DEVICE=$1 -DTARGET_PLATFORM=$2 -DSCENE_FILE=${BEDROOM_SCENE} -DDISABLE_GUI=ON -DMEGAKERNEL_FUSION=$FUSION && make -j convert driver && time make -j rodent && cd .. && 42 | mkdir -p dining_room && cd dining_room && cmake ../../.. -DEMBREE_ROOT_DIR=${EMBREE_ROOT_DIR} -DAnyDSL_runtime_DIR=${ANYDSL_RUNTIME_DIR} -DCMAKE_BUILD_TYPE=Release -DMAX_PATH_LEN=20 -DSPP=4 -DTARGET_DEVICE=$1 -DTARGET_PLATFORM=$2 -DSCENE_FILE=${DINING_ROOM_SCENE} -DDISABLE_GUI=ON -DMEGAKERNEL_FUSION=$FUSION && make -j convert driver && time make -j rodent && cd .. && 43 | mkdir -p kitchen && cd kitchen && cmake ../../.. -DEMBREE_ROOT_DIR=${EMBREE_ROOT_DIR} -DAnyDSL_runtime_DIR=${ANYDSL_RUNTIME_DIR} -DCMAKE_BUILD_TYPE=Release -DMAX_PATH_LEN=20 -DSPP=4 -DTARGET_DEVICE=$1 -DTARGET_PLATFORM=$2 -DSCENE_FILE=${KITCHEN_SCENE} -DDISABLE_GUI=ON -DMEGAKERNEL_FUSION=$FUSION && make -j convert driver && time make -j rodent && cd .. && 44 | mkdir -p staircase && cd staircase && cmake ../../.. -DEMBREE_ROOT_DIR=${EMBREE_ROOT_DIR} -DAnyDSL_runtime_DIR=${ANYDSL_RUNTIME_DIR} -DCMAKE_BUILD_TYPE=Release -DMAX_PATH_LEN=20 -DSPP=4 -DTARGET_DEVICE=$1 -DTARGET_PLATFORM=$2 -DSCENE_FILE=${STAIRCASE_SCENE} -DDISABLE_GUI=ON -DMEGAKERNEL_FUSION=$FUSION && make -j convert driver && time make -j rodent && cd .. || { echo "Compilation failed" ; exit 1 ; } 45 | exit 0 46 | else 47 | mkdir -p living_room && cd living_room && cmake ../../.. -DEMBREE_ROOT_DIR=${EMBREE_ROOT_DIR} -DAnyDSL_runtime_DIR=${ANYDSL_RUNTIME_DIR} -DCMAKE_BUILD_TYPE=Release -DMAX_PATH_LEN=20 -DSPP=4 -DTARGET_DEVICE=$1 -DTARGET_PLATFORM=$2 -DSCENE_FILE=${LIVING_ROOM_SCENE} -DDISABLE_GUI=ON -DMEGAKERNEL_FUSION=$FUSION && cmake --build . --target rodent & 48 | mkdir -p bathroom && cd bathroom && cmake ../../.. -DEMBREE_ROOT_DIR=${EMBREE_ROOT_DIR} -DAnyDSL_runtime_DIR=${ANYDSL_RUNTIME_DIR} -DCMAKE_BUILD_TYPE=Release -DMAX_PATH_LEN=20 -DSPP=4 -DTARGET_DEVICE=$1 -DTARGET_PLATFORM=$2 -DSCENE_FILE=${BATHROOM_SCENE} -DDISABLE_GUI=ON -DMEGAKERNEL_FUSION=$FUSION && cmake --build . --target rodent & 49 | mkdir -p bedroom && cd bedroom && cmake ../../.. -DEMBREE_ROOT_DIR=${EMBREE_ROOT_DIR} -DAnyDSL_runtime_DIR=${ANYDSL_RUNTIME_DIR} -DCMAKE_BUILD_TYPE=Release -DMAX_PATH_LEN=20 -DSPP=4 -DTARGET_DEVICE=$1 -DTARGET_PLATFORM=$2 -DSCENE_FILE=${BEDROOM_SCENE} -DDISABLE_GUI=ON -DMEGAKERNEL_FUSION=$FUSION && cmake --build . --target rodent & 50 | mkdir -p dining_room && cd dining_room && cmake ../../.. -DEMBREE_ROOT_DIR=${EMBREE_ROOT_DIR} -DAnyDSL_runtime_DIR=${ANYDSL_RUNTIME_DIR} -DCMAKE_BUILD_TYPE=Release -DMAX_PATH_LEN=20 -DSPP=4 -DTARGET_DEVICE=$1 -DTARGET_PLATFORM=$2 -DSCENE_FILE=${DINING_ROOM_SCENE} -DDISABLE_GUI=ON -DMEGAKERNEL_FUSION=$FUSION && cmake --build . --target rodent & 51 | mkdir -p kitchen && cd kitchen && cmake ../../.. -DEMBREE_ROOT_DIR=${EMBREE_ROOT_DIR} -DAnyDSL_runtime_DIR=${ANYDSL_RUNTIME_DIR} -DCMAKE_BUILD_TYPE=Release -DMAX_PATH_LEN=20 -DSPP=4 -DTARGET_DEVICE=$1 -DTARGET_PLATFORM=$2 -DSCENE_FILE=${KITCHEN_SCENE} -DDISABLE_GUI=ON -DMEGAKERNEL_FUSION=$FUSION && cmake --build . --target rodent & 52 | mkdir -p staircase && cd staircase && cmake ../../.. -DEMBREE_ROOT_DIR=${EMBREE_ROOT_DIR} -DAnyDSL_runtime_DIR=${ANYDSL_RUNTIME_DIR} -DCMAKE_BUILD_TYPE=Release -DMAX_PATH_LEN=20 -DSPP=4 -DTARGET_DEVICE=$1 -DTARGET_PLATFORM=$2 -DSCENE_FILE=${STAIRCASE_SCENE} -DDISABLE_GUI=ON -DMEGAKERNEL_FUSION=$FUSION && cmake --build . --target rodent & 53 | # Wait for all tasks to finish before benchmarking 54 | wait || { echo "Compilation failed" ; exit 1 ; } 55 | fi 56 | 57 | echo "Running..." 58 | 59 | cd living_room 60 | bin/rodent --bench $ITER --eye -1.8 1 -5 --dir -0.1 0 1 --up 0 1 0 --fov 60 --width 1920 --height 1088 -o render.png 2> /dev/null | sed -n 's/#/Living Room/p' 61 | cd .. 62 | 63 | cd bathroom 64 | bin/rodent --bench $ITER --eye -2.26 15.62 35.23 --dir -22.18 -5.32 -97.36 --up 0 1 0 --fov 60 --width 1920 --height 1088 -o render.png 2> /dev/null | sed -n 's/#/Bathroom/p' 65 | cd .. 66 | 67 | cd bedroom 68 | bin/rodent --bench $ITER --eye 3.5 1 3.5 --dir -1 0 -1 --up 0 1 0 --fov 60 --width 1920 --height 1088 -o render.png 2> /dev/null | sed -n 's/#/Bedroom/p' 69 | cd .. 70 | 71 | cd dining_room 72 | bin/rodent --bench $ITER --eye -4 1.3 0.0 --dir 1 -0.1 0 --up 0 1 0 --fov 48 --width 1920 --height 1088 -o render.png 2> /dev/null | sed -n 's/#/Dining Room/p' 73 | cd .. 74 | 75 | cd kitchen 76 | bin/rodent --bench $ITER --eye 0.5 1.6 3 --dir -0.4 -0.05 -1 --up 0 1 0 --fov 60 --width 1920 --height 1088 -o render.png 2> /dev/null | sed -n 's/#/Kitchen/p' 77 | cd .. 78 | 79 | cd staircase 80 | bin/rodent --bench $ITER --eye 0 1.6 4.5 --dir 0 0 -1 --up 0 1 0 --fov 38 --height 1280 --width 720 -o render.png 2> /dev/null | sed -n 's/#/Staircase/p' 81 | cd .. 82 | -------------------------------------------------------------------------------- /benchmarks/benchmark.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python3 2 | import subprocess 3 | 4 | iters = "20" 5 | warmups = "5" 6 | bench_rodent = "../build_embree2/bin/bench_traversal" 7 | bench_embree = "../build_embree2/bin/bench_embree" 8 | variants = [ 9 | "--ray-width 8 --bvh-width 4", 10 | "--ray-width 8 --bvh-width 4 -p", 11 | "--bvh-width 4 -s", 12 | "--ray-width 8 --bvh-width 8", 13 | "--ray-width 8 --bvh-width 8 -p", 14 | "--bvh-width 8 -s" 15 | ] 16 | scenes = [ 17 | "sponza", 18 | "crown", 19 | "san-miguel", 20 | "powerplant" 21 | ] 22 | offsets = { 23 | "sponza": (0.01, 10.0), 24 | "crown": (0.01, 10.0), 25 | "san-miguel": (0.01, 5.0), 26 | "powerplant": (0.01, 1000.0) 27 | } 28 | 29 | def bench_mrays(args): 30 | pipe = subprocess.Popen(args, stdout = subprocess.PIPE) 31 | for line in pipe.stdout: 32 | elems = line.split() 33 | if elems[1] == b'Mrays/sec': 34 | return float(elems[0]) 35 | return None 36 | 37 | def main(): 38 | distribs = ["primary", "ao", "bounces"] 39 | for scene in scenes: 40 | for variant in variants: 41 | for rays in distribs: 42 | (tmin, ao_max) = offsets[scene] 43 | tmax = 1.0e9 44 | args = ["-ray", "scenes/" + scene + "/" + rays + ".rays", "--bench", iters, "--warmup", warmups] 45 | if rays == "ao": 46 | tmax = ao_max 47 | args += ["-any"] 48 | args += ["--tmin", str(tmin), "--tmax", str(tmax)] 49 | args += variant.split() 50 | #print(scene, ": ", " ".join(args)) 51 | mrays_embree = bench_mrays([bench_embree, "-obj", "scenes/" + scene + "/" + scene + ".obj"] + args) if not "-p" in variant else None 52 | mrays_rodent = bench_mrays([bench_rodent, "-bvh", "scenes/" + scene + "/" + scene + ".bvh"] + args) 53 | print("{} : {} : {} : {} : {}".format(scene, rays, variant, mrays_embree, mrays_rodent)) 54 | 55 | if __name__ == "__main__": 56 | main() 57 | -------------------------------------------------------------------------------- /benchmarks/benchmark_aila.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python3 2 | import subprocess 3 | import os 4 | 5 | iters = "500" 6 | warmups = "100" 7 | bin_dir = "/space/perard/sources/rodent/build_embree2/bin" 8 | bench_dir = "/space/perard/sources/rodent/benchmarks" 9 | bench_rodent = "./bench_traversal" 10 | bench_aila = "./bench_aila" 11 | scenes = [ 12 | "sponza", 13 | "crown", 14 | "san-miguel", 15 | "powerplant" 16 | ] 17 | offsets = { 18 | "sponza": (0.01, 10.0), 19 | "crown": (0.01, 10.0), 20 | "san-miguel": (0.01, 5.0), 21 | "powerplant": (0.01, 1000.0) 22 | } 23 | 24 | def bench_mrays(args): 25 | pipe = subprocess.Popen(args, stdout = subprocess.PIPE, env=dict(os.environ, ANYDSL_PROFILE='full'), cwd=bin_dir) 26 | for line in pipe.stdout: 27 | elems = line.split() 28 | if elems[1] == b'Mrays/sec': 29 | return float(elems[0]) 30 | return None 31 | 32 | def main(): 33 | distribs = ["primary", "ao", "bounces"] 34 | for scene in scenes: 35 | for rays in distribs: 36 | (tmin, ao_max) = offsets[scene] 37 | tmax = 1.0e9 38 | if rays == "ao": 39 | tmax = ao_max 40 | args = ["-ray", bench_dir + "/scenes/" + scene + "/" + rays + ".rays", "--tmin", str(tmin), "--tmax", str(tmax), "--bench", iters, "--warmup", warmups] 41 | #print(scene, ": ", " ".join(args)) 42 | mrays_aila = bench_mrays([bench_aila, "-bvh", bench_dir + "/scenes/" + scene + "/" + scene + ".bvh"] + args) 43 | mrays_rodent = bench_mrays([bench_rodent, "-gpu", "nvvm", "-bvh", bench_dir + "/scenes/" + scene + "/" + scene + ".bvh"] + args) 44 | print("{} : {} : {} : {}".format(scene, rays, mrays_aila, mrays_rodent)) 45 | 46 | if __name__ == "__main__": 47 | main() 48 | -------------------------------------------------------------------------------- /benchmarks/fix_results_arm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import sys 3 | 4 | def main(): 5 | rays = ["primary", "ao", "bounces"] 6 | scenes = [ 7 | ("sponza"), 8 | ("crown"), 9 | ("san-miguel"), 10 | ("powerplant") 11 | ] 12 | ref_variant = "-w 4 -s" 13 | results = [] 14 | 15 | for line in sys.stdin.readlines(): 16 | elems = line.split(":") 17 | scene = elems[0].strip() 18 | ray = elems[1].strip() 19 | variant = elems[2].strip() 20 | ref = elems[-2].strip() 21 | res = elems[-1].strip() 22 | results.append((scene, ray, variant, ref, res)) 23 | 24 | for scene in scenes: 25 | kept_scene = list(filter(lambda res: res[0] == scene, results)) 26 | for ray in rays: 27 | res_a, res_b = list(filter(lambda res: res[1] == ray, kept_scene)) 28 | if res_b[2] == ref_variant: 29 | res_a, res_b = res_b, res_a 30 | print("{} : {} : {} : {} : {}".format(scene, ray, "fixed", res_a[-1], res_b[-1])) 31 | 32 | 33 | if __name__ == "__main__": 34 | main() 35 | -------------------------------------------------------------------------------- /benchmarks/gen_formulas.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python3 2 | import sys 3 | 4 | def main(): 5 | formulas = {} 6 | with open(sys.argv[1], "r") as f: 7 | for line in f: 8 | elems = line.split(':') 9 | ray = elems[1].strip() 10 | ref = float(elems[-2].strip()) 11 | ours = float(elems[-1].strip()) 12 | if not ray in formulas: 13 | formulas[ray] = [(ref, ours)] 14 | else: 15 | formulas[ray].append((ref, ours)) 16 | 17 | for ray, factors in formulas.items(): 18 | print(ray + " = pow(", end="") 19 | for i, factor in enumerate(factors): 20 | ref, ours = factor 21 | print("({}/{})".format(ours, ref), end="") 22 | if i != len(factors)-1: 23 | print(" * ",end="") 24 | print(", 1.0/{})".format(float(len(factors)))) 25 | 26 | if __name__ == "__main__": 27 | main() 28 | -------------------------------------------------------------------------------- /benchmarks/gen_table.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import sys 3 | 4 | def print_results(results, rays): 5 | for ray, ray_name in rays: 6 | elem = next((res for res in results if res[1] == ray), None) 7 | if elem == None: 8 | print("& -- & -- ", end="") 9 | else: 10 | ref = float(elem[-2]) if elem[-2] != 'None' else None 11 | res = float(elem[-1]) if elem[-1] != 'None' else None 12 | if ref == None or res == None: 13 | if res == None: 14 | print("& -- ", end="") 15 | else: 16 | print("& {:0.2f} ".format(res), end="") 17 | if ref == None: 18 | print("& -- ", end="") 19 | else: 20 | print("& {:0.2f} ".format(ref), end="") 21 | else: 22 | print("& {:0.2f} ({:+0.0f}\\%) ".format(res, 100.0*(res - ref) / ref), end = "") 23 | print("& {:0.2f} ".format(ref), end = "") 24 | 25 | 26 | def main(): 27 | rays = [ 28 | ("primary", "Primary"), 29 | ("ao", "AO"), 30 | ("bounces", "Diffuse") 31 | ] 32 | tables = [ 33 | { 34 | "title" : "BVH2", 35 | "variants" : [] 36 | }, 37 | { 38 | "title" : "BVH4", 39 | "variants" : [ 40 | ("--bvh-width 4 -s", "Single"), 41 | ("--ray-width 8 --bvh-width 4 -p", "Packet"), 42 | ("--ray-width 8 --bvh-width 4", "Hybrid") 43 | ] 44 | }, 45 | { 46 | "title" : "BVH8", 47 | "variants" : [ 48 | ("--bvh-width 8 -s", "Single"), 49 | ("--ray-width 8 --bvh-width 8 -p", "Packet"), 50 | ("--ray-width 8 --bvh-width 8", "Hybrid") 51 | ] 52 | } 53 | ] 54 | scenes = [ 55 | ("sponza", "Sponza"), 56 | ("crown", "Crown"), 57 | ("san-miguel", "San-Miguel"), 58 | ("powerplant", "Powerplant") 59 | ] 60 | results = [] 61 | 62 | for line in sys.stdin.readlines(): 63 | elems = line.split(":") 64 | scene = elems[0].strip() 65 | ray = elems[1].strip() 66 | variant = elems[2].strip() 67 | ref = elems[-2].strip() 68 | res = elems[-1].strip() 69 | results.append((scene, ray, variant, ref, res)) 70 | 71 | for table in tables: 72 | title = table["title"] 73 | variants = table["variants"] 74 | print("% {}".format(title)) 75 | for scene, scene_name in scenes: 76 | if len(variants) > 0: 77 | print("\midrule") 78 | print("\\multirow{{{}}}{{*}}{{{}}} & ".format(len(variants), scene_name), end="") 79 | else: 80 | print("{} ".format(scene_name), end="") 81 | kept_scene = list(filter(lambda x: x[0] == scene, results)) 82 | if len(variants) > 0: 83 | for i, (variant, variant_name) in enumerate(variants): 84 | if i > 0: 85 | print(" & ", end="") 86 | print("{} ".format(variant_name), end="") 87 | kept_variant = filter(lambda x: x[2] == variant, kept_scene) 88 | print_results(kept_variant, rays) 89 | print("\\\\") 90 | else: 91 | print_results(kept_scene, rays) 92 | print("\\\\") 93 | 94 | if __name__ == "__main__": 95 | main() 96 | -------------------------------------------------------------------------------- /benchmarks/profile.sh: -------------------------------------------------------------------------------- 1 | echo "Profiling..." 2 | 3 | echo "" > profile.txt 4 | 5 | ITER=5 6 | 7 | cd renderers 8 | 9 | cd living_room 10 | taskset 0x1 bin/rodent --bench $ITER --eye -1.8 1 -5 --dir -0.1 0 1 --up 0 1 0 --fov 60 --width 1920 --height 1088 -o render.png 2> /dev/null >> ../../profile.txt 11 | cd .. 12 | 13 | cd bathroom 14 | taskset 0x1 bin/rodent --bench $ITER --eye -2.26 15.62 35.23 --dir -22.18 -5.32 -97.36 --up 0 1 0 --fov 60 --width 1920 --height 1088 -o render.png 2> /dev/null >> ../../profile.txt 15 | cd .. 16 | 17 | cd bedroom 18 | taskset 0x1 bin/rodent --bench $ITER --eye 3.5 1 3.5 --dir -1 0 -1 --up 0 1 0 --fov 60 --width 1920 --height 1088 -o render.png 2> /dev/null >> ../../profile.txt 19 | cd .. 20 | 21 | cd dining_room 22 | taskset 0x1 bin/rodent --bench $ITER --eye -4 1.3 0.0 --dir 1 -0.1 0 --up 0 1 0 --fov 48 --width 1920 --height 1088 -o render.png 2> /dev/null >> ../../profile.txt 23 | cd .. 24 | 25 | cd kitchen 26 | taskset 0x1 bin/rodent --bench $ITER --eye 0.5 1.6 3 --dir -0.4 -0.05 -1 --up 0 1 0 --fov 60 --width 1920 --height 1088 -o render.png 2> /dev/null >> ../../profile.txt 27 | cd .. 28 | 29 | cd staircase 30 | taskset 0x1 bin/rodent --bench $ITER --eye 0 1.6 4.5 --dir 0 0 -1 --up 0 1 0 --fov 38 --height 1280 --width 720 -o render.png 2> /dev/null >> ../../profile.txt 31 | cd .. 32 | -------------------------------------------------------------------------------- /benchmarks/results.txt: -------------------------------------------------------------------------------- 1 | sponza : primary : -w 4 : 7.54638 : 7.3053 2 | sponza : ao : -w 4 : 14.7275 : 16.3316 3 | sponza : bounces : -w 4 : 1.88431 : 1.73046 4 | sponza : primary : -w 4 -p : None : 7.86724 5 | sponza : ao : -w 4 -p : None : 16.3132 6 | sponza : bounces : -w 4 -p : None : 1.42685 7 | sponza : primary : -w 4 -s : 3.10173 : 2.86832 8 | sponza : ao : -w 4 -s : 6.06024 : 5.68684 9 | sponza : bounces : -w 4 -s : 1.86277 : 1.65231 10 | sponza : primary : -w 8 : 7.60413 : 7.38906 11 | sponza : ao : -w 8 : 14.5298 : 16.4992 12 | sponza : bounces : -w 8 : 2.2073 : 2.05149 13 | sponza : primary : -w 8 -p : None : 8.15224 14 | sponza : ao : -w 8 -p : None : 16.2574 15 | sponza : bounces : -w 8 -p : None : 1.48211 16 | sponza : primary : -w 8 -s : 4.18268 : 3.83035 17 | sponza : ao : -w 8 -s : 8.12357 : 7.13171 18 | sponza : bounces : -w 8 -s : 2.28985 : 2.13325 19 | crown : primary : -w 4 : 19.8355 : 21.7261 20 | crown : ao : -w 4 : 8.53402 : 8.00505 21 | crown : bounces : -w 4 : 3.98229 : 3.5772 22 | crown : primary : -w 4 -p : None : 22.9433 23 | crown : ao : -w 4 -p : None : 7.23937 24 | crown : bounces : -w 4 -p : None : 2.85439 25 | crown : primary : -w 4 -s : 11.5832 : 10.198 26 | crown : ao : -w 4 -s : 6.10901 : 5.25563 27 | crown : bounces : -w 4 -s : 3.72163 : 3.25043 28 | crown : primary : -w 8 : 18.4549 : 20.0882 29 | crown : ao : -w 8 : 8.86579 : 8.55806 30 | crown : bounces : -w 8 : 4.45182 : 4.04803 31 | crown : primary : -w 8 -p : None : 22.3033 32 | crown : ao : -w 8 -p : None : 7.32274 33 | crown : bounces : -w 8 -p : None : 2.98485 34 | crown : primary : -w 8 -s : 13.1341 : 11.7769 35 | crown : ao : -w 8 -s : 7.14388 : 5.9681 36 | crown : bounces : -w 8 -s : 4.40347 : 3.95503 37 | san-miguel : primary : -w 4 : 4.63054 : 4.51849 38 | san-miguel : ao : -w 4 : 2.9492 : 2.52732 39 | san-miguel : bounces : -w 4 : 1.29591 : 1.11484 40 | san-miguel : primary : -w 4 -p : None : 4.93072 41 | san-miguel : ao : -w 4 -p : None : 2.26067 42 | san-miguel : bounces : -w 4 -p : None : 0.891493 43 | san-miguel : primary : -w 4 -s : 2.67431 : 2.09576 44 | san-miguel : ao : -w 4 -s : 2.51641 : 2.11315 45 | san-miguel : bounces : -w 4 -s : 1.35706 : 1.178 46 | san-miguel : primary : -w 8 : 3.94365 : 3.84696 47 | san-miguel : ao : -w 8 : 3.17926 : 2.80414 48 | san-miguel : bounces : -w 8 : 1.45565 : 1.25251 49 | san-miguel : primary : -w 8 -p : None : 4.2969 50 | san-miguel : ao : -w 8 -p : None : 2.32471 51 | san-miguel : bounces : -w 8 -p : None : 0.901289 52 | san-miguel : primary : -w 8 -s : 3.22086 : 2.29832 53 | san-miguel : ao : -w 8 -s : 2.95476 : 2.5676 54 | san-miguel : bounces : -w 8 -s : 1.60873 : 1.41938 55 | powerplant : primary : -w 4 : 10.2047 : 10.3249 56 | powerplant : ao : -w 4 : 19.8713 : 21.8186 57 | powerplant : bounces : -w 4 : 2.4224 : 2.1449 58 | powerplant : primary : -w 4 -p : None : 10.8075 59 | powerplant : ao : -w 4 -p : None : 23.0244 60 | powerplant : bounces : -w 4 -p : None : 1.70842 61 | powerplant : primary : -w 4 -s : 5.93011 : 4.96106 62 | powerplant : ao : -w 4 -s : 9.52665 : 8.2603 63 | powerplant : bounces : -w 4 -s : 2.46661 : 2.20007 64 | powerplant : primary : -w 8 : 9.33797 : 8.99668 65 | powerplant : ao : -w 8 : 18.8956 : 20.9723 66 | powerplant : bounces : -w 8 : 2.75902 : 2.46662 67 | powerplant : primary : -w 8 -p : None : 9.96959 68 | powerplant : ao : -w 8 -p : None : 22.0081 69 | powerplant : bounces : -w 8 -p : None : 1.80079 70 | powerplant : primary : -w 8 -s : 6.27344 : 5.87107 71 | powerplant : ao : -w 8 -s : 12.1059 : 10.6569 72 | powerplant : bounces : -w 8 -s : 2.90945 : 2.60646 73 | -------------------------------------------------------------------------------- /benchmarks/results_aila.txt: -------------------------------------------------------------------------------- 1 | sponza : primary : 363.22 : 373.097 2 | sponza : ao : 975.008 : 1031.68 3 | sponza : bounces : 143.587 : 146.283 4 | crown : primary : 816.357 : 788.193 5 | crown : ao : 401.609 : 372.405 6 | crown : bounces : 164.526 : 157.337 7 | san-miguel : primary : 204.338 : 194.698 8 | san-miguel : ao : 153.253 : 149.804 9 | san-miguel : bounces : 59.0768 : 67.614 10 | powerplant : primary : 525.016 : 473.339 11 | powerplant : ao : 1112.64 : 1086.86 12 | powerplant : bounces : 142.308 : 130.621 13 | -------------------------------------------------------------------------------- /benchmarks/results_arm_par.txt: -------------------------------------------------------------------------------- 1 | sponza : primary : -w 4 : None : 2.7481 2 | sponza : ao : -w 4 : None : 5.35983 3 | sponza : bounces : -w 4 : None : 0.953911 4 | sponza : primary : -w 4 -s : None : 1.3769 5 | sponza : ao : -w 4 -s : None : 2.66208 6 | sponza : bounces : -w 4 -s : None : 0.99925 7 | crown : primary : -w 4 : None : 9.81564 8 | crown : ao : -w 4 : None : 3.64903 9 | crown : bounces : -w 4 : None : 1.86756 10 | crown : primary : -w 4 -s : None : 5.8034 11 | crown : ao : -w 4 -s : None : 3.00981 12 | crown : bounces : -w 4 -s : None : 1.90784 13 | san-miguel : primary : -w 4 : None : 2.07353 14 | san-miguel : ao : -w 4 : None : 1.49059 15 | san-miguel : bounces : -w 4 : None : 0.720948 16 | san-miguel : primary : -w 4 -s : None : 1.06802 17 | san-miguel : ao : -w 4 -s : None : 1.31238 18 | san-miguel : bounces : -w 4 -s : None : 0.781979 19 | powerplant : primary : -w 4 : None : 4.44325 20 | powerplant : ao : -w 4 : None : 8.19047 21 | powerplant : bounces : -w 4 : None : 1.09488 22 | powerplant : primary : -w 4 -s : None : 2.59246 23 | powerplant : ao : -w 4 -s : None : 4.05906 24 | powerplant : bounces : -w 4 -s : None : 1.22522 25 | -------------------------------------------------------------------------------- /benchmarks/results_par.txt: -------------------------------------------------------------------------------- 1 | sponza : primary : -w 4 : 36.3476 : 34.725 2 | sponza : ao : -w 4 : 70.6553 : 76.3449 3 | sponza : bounces : -w 4 : 11.0716 : 9.77876 4 | sponza : primary : -w 4 -p : None : 36.0903 5 | sponza : ao : -w 4 -p : None : 75.9812 6 | sponza : bounces : -w 4 -p : None : 7.74625 7 | sponza : primary : -w 4 -s : 18.1877 : 14.8939 8 | sponza : ao : -w 4 -s : 36.4553 : 28.598 9 | sponza : bounces : -w 4 -s : 11.2891 : 9.36764 10 | sponza : primary : -w 8 : 36.3964 : 34.8446 11 | sponza : ao : -w 8 : 67.8053 : 76.7301 12 | sponza : bounces : -w 8 : 12.7442 : 11.461 13 | sponza : primary : -w 8 -p : None : 38.2885 14 | sponza : ao : -w 8 -p : None : 77.2587 15 | sponza : bounces : -w 8 -p : None : 7.88341 16 | sponza : primary : -w 8 -s : 22.7849 : 18.7401 17 | sponza : ao : -w 8 -s : 44.717 : 35.7588 18 | sponza : bounces : -w 8 -s : 13.3862 : 11.8594 19 | crown : primary : -w 4 : 97.8603 : 102.512 20 | crown : ao : -w 4 : 44.2639 : 40.2826 21 | crown : bounces : -w 4 : 22.2004 : 19.4772 22 | crown : primary : -w 4 -p : None : 104.206 23 | crown : ao : -w 4 -p : None : 34.9943 24 | crown : bounces : -w 4 -p : None : 14.952 25 | crown : primary : -w 4 -s : 63.9428 : 53.0047 26 | crown : ao : -w 4 -s : 33.7507 : 28.0173 27 | crown : bounces : -w 4 -s : 21.4867 : 17.9545 28 | crown : primary : -w 8 : 89.923 : 95.4834 29 | crown : ao : -w 8 : 44.2455 : 42.1222 30 | crown : bounces : -w 8 : 23.1584 : 21.0389 31 | crown : primary : -w 8 -p : None : 103.194 32 | crown : ao : -w 8 -p : None : 35.5137 33 | crown : bounces : -w 8 -p : None : 15.2423 34 | crown : primary : -w 8 -s : 70.0337 : 59.2134 35 | crown : ao : -w 8 -s : 38.9787 : 32.6553 36 | crown : bounces : -w 8 -s : 23.8139 : 20.9049 37 | san-miguel : primary : -w 4 : 23.0406 : 22.0645 38 | san-miguel : ao : -w 4 : 15.9148 : 13.8224 39 | san-miguel : bounces : -w 4 : 7.32533 : 6.45785 40 | san-miguel : primary : -w 4 -p : None : 23.2798 41 | san-miguel : ao : -w 4 -p : None : 11.859 42 | san-miguel : bounces : -w 4 -p : None : 4.93395 43 | san-miguel : primary : -w 4 -s : 15.4667 : 10.8329 44 | san-miguel : ao : -w 4 -s : 14.0318 : 11.4903 45 | san-miguel : bounces : -w 4 -s : 7.6656 : 6.8292 46 | san-miguel : primary : -w 8 : 19.1285 : 18.7394 47 | san-miguel : ao : -w 8 : 16.3181 : 14.7668 48 | san-miguel : bounces : -w 8 : 7.62373 : 6.98282 49 | san-miguel : primary : -w 8 -p : None : 20.2968 50 | san-miguel : ao : -w 8 -p : None : 11.8617 51 | san-miguel : bounces : -w 8 -p : None : 4.85699 52 | san-miguel : primary : -w 8 -s : 17.9463 : 11.3314 53 | san-miguel : ao : -w 8 -s : 16.133 : 13.5902 54 | san-miguel : bounces : -w 8 -s : 8.51997 : 7.75063 55 | powerplant : primary : -w 4 : 50.633 : 49.3437 56 | powerplant : ao : -w 4 : 95.4166 : 102.892 57 | powerplant : bounces : -w 4 : 13.8832 : 11.8606 58 | powerplant : primary : -w 4 -p : None : 50.9431 59 | powerplant : ao : -w 4 -p : None : 106.134 60 | powerplant : bounces : -w 4 -p : None : 8.97721 61 | powerplant : primary : -w 4 -s : 33.1721 : 25.3444 62 | powerplant : ao : -w 4 -s : 52.6649 : 42.33 63 | powerplant : bounces : -w 4 -s : 14.254 : 12.0286 64 | powerplant : primary : -w 8 : 44.8194 : 43.0222 65 | powerplant : ao : -w 8 : 88.0362 : 98.0998 66 | powerplant : bounces : -w 8 : 14.658 : 13.2946 67 | powerplant : primary : -w 8 -p : None : 46.8461 68 | powerplant : ao : -w 8 -p : None : 101.687 69 | powerplant : bounces : -w 8 -p : None : 9.25009 70 | powerplant : primary : -w 8 -s : 34.5967 : 29.0171 71 | powerplant : ao : -w 8 -s : 64.9127 : 52.1655 72 | powerplant : bounces : -w 8 -s : 15.9856 : 13.9619 73 | -------------------------------------------------------------------------------- /benchmarks/scenes/generate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ARTY=/space/perard/sources/arty/build/src/arty 3 | cd sponza 4 | export ARTY_SCENE_SIZE=4000 5 | export ARTY_AO_FACTOR=0.8 6 | export ARTY_AO_OFFSET=0.1 7 | ${ARTY} sponza.yml -a 4 -s 75 -o sponza-ao.png 8 | ${ARTY} sponza.yml -a 1 -s 10 -o sponza-pt.png 9 | cd .. 10 | cd crown 11 | export ARTY_SCENE_SIZE=200 12 | export ARTY_AO_FACTOR=0.9 13 | export ARTY_AO_OFFSET=0.0 14 | ${ARTY} crown.yml -a 4 -s 75 -o crown-ao.png 15 | ${ARTY} crown.yml -a 1 -s 10 -o crown-pt.png 16 | cd .. 17 | cd san-miguel 18 | export ARTY_SCENE_SIZE=50 19 | export ARTY_AO_FACTOR=0.9 20 | export ARTY_AO_OFFSET=0.0 21 | ${ARTY} san-miguel.yml -a 4 -s 75 -o san-miguel-ao.png 22 | ${ARTY} san-miguel.yml -a 1 -s 10 -o san-miguel-pt.png 23 | cd .. 24 | cd powerplant 25 | export ARTY_SCENE_SIZE=200000 26 | export ARTY_AO_FACTOR=0.9 27 | export ARTY_AO_OFFSET=0.0 28 | ${ARTY} powerplant.yml -a 4 -s 75 -o powerplant-ao.png 29 | ${ARTY} powerplant.yml -a 1 -s 10 -o powerplant-pt.png 30 | cd .. 31 | -------------------------------------------------------------------------------- /cmake/modules/FindLZ4.cmake: -------------------------------------------------------------------------------- 1 | find_path(LZ4_INCLUDE_DIR NAMES lz4.h) 2 | find_library(LZ4_LIBRARY NAMES lz4) 3 | 4 | include(FindPackageHandleStandardArgs) 5 | find_package_handle_standard_args(LZ4 DEFAULT_MSG LZ4_LIBRARY LZ4_INCLUDE_DIR) 6 | -------------------------------------------------------------------------------- /cmake/test/run_rodent.cmake: -------------------------------------------------------------------------------- 1 | execute_process(COMMAND ${RODENT} --bench 50 -o ${CMAKE_CURRENT_BINARY_DIR}/${RODENT_OUTPUT}.png ${RODENT_ARGS} RESULT_VARIABLE CMD_RESULT WORKING_DIRECTORY ${RODENT_DIR}) 2 | if (CMD_RESULT) 3 | message(FATAL_ERROR "Error running rodent") 4 | endif() 5 | execute_process(COMMAND ${IM_COMPARE} -metric MSE ${TESTING_DIR}/ref-cornell.png ${RODENT_OUTPUT}.png ${RODENT_OUTPUT}-diff.png RESULT_VARIABLE CMD_RESULT) 6 | if (CMD_RESULT) 7 | message(FATAL_ERROR "The output of rodent '${RODENT_OUTPUT}.png' does not match the reference '${TESTING_DIR}/ref-cornell.png'") 8 | endif() 9 | -------------------------------------------------------------------------------- /cmake/test/run_traversal.cmake: -------------------------------------------------------------------------------- 1 | execute_process(COMMAND ${BENCH_TRAVERSAL} -bvh ${TESTING_DIR}/sponza.bvh -ray ${TESTING_DIR}/sponza-primary.rays --bench 1 --warmup 0 --tmin 0.01 --tmax 5000 -o ${TRAVERSAL_OUTPUT}.fbuf ${BENCH_TRAVERSAL_ARGS} RESULT_VARIABLE CMD_RESULT) 2 | if (CMD_RESULT) 3 | message(FATAL_ERROR "Error running the traversal benchmark tool") 4 | endif() 5 | execute_process(COMMAND ${FBUF2PNG} -n ${TRAVERSAL_OUTPUT}.fbuf ${TRAVERSAL_OUTPUT}.png RESULT_VARIABLE CMD_RESULT) 6 | if (CMD_RESULT) 7 | message(FATAL_ERROR "Error running fbuf2png") 8 | endif() 9 | execute_process(COMMAND ${IM_COMPARE} -metric MSE ${TESTING_DIR}/ref-primary.png ${TRAVERSAL_OUTPUT}.png ${TRAVERSAL_OUTPUT}-diff.png RESULT_VARIABLE CMD_RESULT) 10 | if (CMD_RESULT) 11 | message(FATAL_ERROR "The output of the traversal '${TRAVERSAL_OUTPUT}.png' does not match the reference '${TESTING_DIR}/ref-primary.png'") 12 | endif() 13 | -------------------------------------------------------------------------------- /refs/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(cats) 2 | cmake_minimum_required(VERSION 3.0) 3 | 4 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules) 5 | 6 | find_package(SDL2 REQUIRED) 7 | find_package(CUDA REQUIRED) 8 | find_package(JPEG REQUIRED) 9 | find_package(PNG REQUIRED) 10 | find_package(OptiX REQUIRED) 11 | find_package(Embree REQUIRED) 12 | find_package(ISPC REQUIRED) 13 | find_package(TBB REQUIRED) 14 | 15 | add_subdirectory(src) 16 | -------------------------------------------------------------------------------- /refs/cmake/modules/FindEmbree.cmake: -------------------------------------------------------------------------------- 1 | find_path(Embree_DIR include/embree3/rtcore.h HINTS /usr) 2 | find_path(Embree_INCLUDE_DIR embree3/rtcore.h HINTS ${Embree_DIR}/include) 3 | find_library(Embree_LIBRARY NAMES embree3 PATHS ${Embree_DIR}/lib64 ${Embree_DIR}/lib) 4 | 5 | include(FindPackageHandleStandardArgs) 6 | find_package_handle_standard_args(Embree REQUIRED_VARS Embree_LIBRARY Embree_INCLUDE_DIR) 7 | -------------------------------------------------------------------------------- /refs/cmake/modules/FindISPC.cmake: -------------------------------------------------------------------------------- 1 | find_path(ISPC_DIR ispc HINTS /usr/bin) 2 | 3 | include(FindPackageHandleStandardArgs) 4 | find_package_handle_standard_args(ISPC REQUIRED_VARS ISPC_DIR) 5 | -------------------------------------------------------------------------------- /refs/src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories(${OptiX_INCLUDE} ${CUDA_INCLUDE_DIRS} ${SDL2_INCLUDE_DIRS} ${Embree_INCLUDE_DIR} ${TBB_INCLUDE_DIRS}) 2 | 3 | cuda_wrap_srcs(optix_path_tracer PTX OPTIX_GENERATED_FILES optix_path_tracer.cu) 4 | add_executable(optix_path_tracer optix_path_tracer.cpp obj.cpp obj.h image.cpp ${OPTIX_GENERATED_FILES}) 5 | target_link_libraries(optix_path_tracer PUBLIC ${SDL2_LIBRARY} ${optix_LIBRARY} ${optixu_LIBRARY} ${PNG_LIBRARY} ${JPEG_LIBRARY}) 6 | target_compile_definitions(optix_path_tracer PUBLIC -DGENERATED_PTX_FILE="${OPTIX_GENERATED_FILES}") 7 | 8 | set(ENABLE_TIMING FALSE CACHE BOOL "Enables/disables timing information for Embree path tracer") 9 | add_executable(embree_path_tracer embree_path_tracer.cpp obj.cpp image.cpp ${CMAKE_CURRENT_BINARY_DIR}/embree_path_tracer.ispc.generated.o) 10 | if (${ENABLE_TIMING}) 11 | set(ISPC_FLAGS -DENABLE_TIMING) 12 | target_compile_definitions(embree_path_tracer PUBLIC -DENABLE_TIMING -DFORCE_SERIAL_TIMING) 13 | endif() 14 | add_custom_command( 15 | OUTPUT embree_path_tracer.ispc.generated.o 16 | COMMAND ${ISPC_DIR}/ispc ${CMAKE_CURRENT_SOURCE_DIR}/embree_path_tracer.ispc -o embree_path_tracer.ispc.generated.o -O3 --target=avx2-i32x8 -I${Embree_INCLUDE_DIR} ${ISPC_FLAGS} 17 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} 18 | DEPENDS ${ISPC_DIR}/ispc ${CMAKE_CURRENT_SOURCE_DIR}/embree_path_tracer.ispc ${CMAKE_CURRENT_SOURCE_DIR}/common.isph ${CMAKE_CURRENT_SOURCE_DIR}/math.isph) 19 | target_link_libraries(embree_path_tracer PUBLIC ${SDL2_LIBRARY} ${Embree_LIBRARY} ${PNG_LIBRARY} ${JPEG_LIBRARY} ${TBB_LIBRARIES}) 20 | 21 | add_executable(bench_shading bench_shading.cpp ${CMAKE_CURRENT_BINARY_DIR}/bench_shading.ispc.generated.o) 22 | add_custom_command( 23 | OUTPUT bench_shading.ispc.generated.o 24 | COMMAND ${ISPC_DIR}/ispc ${CMAKE_CURRENT_SOURCE_DIR}/bench_shading.ispc -o bench_shading.ispc.generated.o -O3 --target=avx2-i32x8 -I${Embree_INCLUDE_DIR} ${ISPC_FLAGS} 25 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} 26 | DEPENDS ${ISPC_DIR}/ispc ${CMAKE_CURRENT_SOURCE_DIR}/bench_shading.ispc ${CMAKE_CURRENT_SOURCE_DIR}/common.isph ${CMAKE_CURRENT_SOURCE_DIR}/math.isph) 27 | -------------------------------------------------------------------------------- /refs/src/bench_shading.h: -------------------------------------------------------------------------------- 1 | #ifndef BENCH_SHADING_H 2 | #define BENCH_SHADING_H 3 | 4 | #if defined(ISPC) && !defined(ISPC_STD_C99_DATATYPES) 5 | #define ISPC_STD_C99_DATATYPES 6 | typedef unsigned int32 uint32_t; 7 | typedef unsigned int64 uint64_t; 8 | typedef int32 int32_t; 9 | typedef int64 int64_t; 10 | #endif 11 | 12 | struct stream_s { 13 | uint32_t* rnd; 14 | int32_t* depth; 15 | int32_t* geom_id; 16 | int32_t* prim_id; 17 | float* mis; 18 | float* contrib_r; 19 | float* contrib_g; 20 | float* contrib_b; 21 | float* org_x; 22 | float* org_y; 23 | float* org_z; 24 | float* dir_x; 25 | float* dir_y; 26 | float* dir_z; 27 | float* tmin; 28 | float* tmax; 29 | float* t; 30 | float* u; 31 | float* v; 32 | }; 33 | 34 | #endif // BENCH_SHADING 35 | -------------------------------------------------------------------------------- /refs/src/color.h: -------------------------------------------------------------------------------- 1 | #ifndef COLOR_H 2 | #define COLOR_H 3 | 4 | #include "float3.h" 5 | #include "float4.h" 6 | 7 | struct rgba; 8 | 9 | struct rgb : public float3 { 10 | rgb() {} 11 | rgb(const float3& rgb) : float3(rgb) {} 12 | rgb(float r, float g, float b) : float3(r, g, b) {} 13 | explicit rgb(float x) : float3(x) {} 14 | explicit rgb(const rgba& rgba); 15 | 16 | rgb& operator += (const rgb& p) { 17 | *this = *this + p; 18 | return *this; 19 | } 20 | }; 21 | 22 | struct rgba : public float4 { 23 | rgba() {} 24 | rgba(const float4& rgba) : float4(rgba) {} 25 | rgba(float r, float g, float b, float a) : float4(r, g, b, a) {} 26 | explicit rgba(float x) : float4(x) {} 27 | explicit rgba(const rgb& rgb, float a) : float4(rgb, a) {} 28 | 29 | rgba& operator += (const rgba& p) { 30 | *this = *this + p; 31 | return *this; 32 | } 33 | }; 34 | 35 | inline rgb::rgb(const rgba& rgba) : float3(rgba) {} 36 | 37 | inline rgb gamma(const rgb& c, float g = 0.5f) { 38 | return rgb(std::pow(c.x, g), std::pow(c.y, g), std::pow(c.z, g)); 39 | } 40 | 41 | inline rgba gamma(const rgba& c, float g = 0.5f) { 42 | return rgba(std::pow(c.x, g), std::pow(c.y, g), std::pow(c.z, g), c.w); 43 | } 44 | 45 | inline rgb clamp(const rgb& val, const rgb& min, const rgb& max) { 46 | return rgb(clamp(val.x, min.x, max.x), 47 | clamp(val.y, min.y, max.y), 48 | clamp(val.z, min.z, max.z)); 49 | } 50 | 51 | inline rgba clamp(const rgba& val, const rgba& min, const rgba& max) { 52 | return rgba(clamp(val.x, min.x, max.x), 53 | clamp(val.y, min.y, max.y), 54 | clamp(val.z, min.z, max.z), 55 | clamp(val.w, min.w, max.w)); 56 | } 57 | 58 | #endif // COLOR_H 59 | -------------------------------------------------------------------------------- /refs/src/common.h: -------------------------------------------------------------------------------- 1 | #ifndef COMMON_H 2 | #define COMMON_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | // Round to the integer above the division. 10 | inline uint32_t round_up(uint32_t val, uint32_t div) { 11 | auto mod = val % div; 12 | return val + (mod ? div - mod : 0); 13 | } 14 | 15 | /// Clamps a between b and c. 16 | template 17 | inline T clamp(T a, T b, T c) { 18 | return (a < b) ? b : ((a > c) ? c : a); 19 | } 20 | 21 | /// Returns the integer that is greater or equal to the logarithm base 2 of the argument. 22 | template 23 | inline T closest_log2(T i) { 24 | T p = 1, q = 0; 25 | while (i > p) p <<= 1, q++; 26 | return q; 27 | } 28 | 29 | /// Reinterprets a floating point number as an integer. 30 | inline int32_t float_as_int(float f) { 31 | union { float vf; int32_t vi; } v; 32 | v.vf = f; 33 | return v.vi; 34 | } 35 | 36 | /// Reinterprets an integer as a floating point number. 37 | inline float int_as_float(int32_t i) { 38 | union { float vf; int32_t vi; } v; 39 | v.vi = i; 40 | return v.vf; 41 | } 42 | 43 | inline void error [[noreturn]] () { 44 | std::cerr << std::endl; 45 | abort(); 46 | } 47 | 48 | /// Outputs an error message in the console. 49 | template 50 | inline void error [[noreturn]] (T t, Args... args) { 51 | #if COLORIZE 52 | std::cerr << "\033[1;31m"; 53 | #endif 54 | std::cerr << t; 55 | #if COLORIZE 56 | std::cerr << "\033[0m"; 57 | #endif 58 | error(args...); 59 | } 60 | 61 | inline void info() { 62 | std::cout << std::endl; 63 | } 64 | 65 | /// Outputs an information message in the console. 66 | template 67 | inline void info(T t, Args... args) { 68 | std::cout << t; 69 | info(args...); 70 | } 71 | 72 | inline void warn() { 73 | std::clog << std::endl; 74 | } 75 | 76 | /// Outputs an warning message in the console. 77 | template 78 | inline void warn(T t, Args... args) { 79 | #if COLORIZE 80 | std::clog << "\033[1;33m"; 81 | #endif 82 | std::clog << t; 83 | #if COLORIZE 84 | std::clog << "\033[0m"; 85 | #endif 86 | warn(args...); 87 | } 88 | 89 | #endif // COMMON_H 90 | -------------------------------------------------------------------------------- /refs/src/embree_path_tracer.h: -------------------------------------------------------------------------------- 1 | #ifndef EMBREE_PATH_TRACER_H 2 | #define EMBREE_PATH_TRACER_H 3 | 4 | #if defined(ISPC) && !defined(ISPC_STD_C99_DATATYPES) 5 | #define ISPC_STD_C99_DATATYPES 6 | typedef unsigned int32 uint32_t; 7 | typedef unsigned int64 uint64_t; 8 | typedef int32 int32_t; 9 | typedef int64 int64_t; 10 | #endif 11 | 12 | struct rgb_s { 13 | float r, g, b; 14 | }; 15 | 16 | struct float3_s { 17 | float x, y, z; 18 | }; 19 | 20 | struct float2_s { 21 | float x, y; 22 | }; 23 | 24 | struct image_s { 25 | uint32_t* pixels; 26 | uint32_t width; 27 | uint32_t height; 28 | }; 29 | 30 | struct camera_s { 31 | struct float3_s eye; 32 | struct float3_s dir; 33 | struct float3_s right; 34 | struct float3_s up; 35 | float w, h; 36 | }; 37 | 38 | struct light_s { 39 | struct float3_s v0; 40 | struct float3_s v1; 41 | struct float3_s v2; 42 | struct float3_s n; 43 | float inv_area; 44 | struct rgb_s color; 45 | }; 46 | 47 | struct material_s { 48 | struct rgb_s ke; 49 | struct rgb_s kd; 50 | struct rgb_s ks; 51 | struct rgb_s tf; 52 | float ns; 53 | float ni; 54 | int32_t map_kd; 55 | int32_t map_ks; 56 | int32_t light; 57 | uint32_t illum; 58 | }; 59 | 60 | struct counters_s { 61 | int64_t total_rays; 62 | int64_t primary; 63 | int64_t shadow; 64 | int64_t shade; 65 | int64_t bounces; 66 | int64_t total; 67 | }; 68 | 69 | struct scene_s { 70 | RTCScene scene; 71 | 72 | float* film; 73 | uint32_t film_width; 74 | uint32_t film_height; 75 | 76 | uint32_t* indices; 77 | struct float3_s* vertices; 78 | struct float3_s* normals; 79 | struct float2_s* texcoords; 80 | struct camera_s* camera; 81 | struct image_s* images; 82 | struct material_s* materials; 83 | struct light_s* lights; 84 | 85 | float pdf_lightpick; 86 | uint32_t num_lights; 87 | }; 88 | 89 | #endif // EMBREE_PATH_TRACER_H 90 | -------------------------------------------------------------------------------- /refs/src/file_path.h: -------------------------------------------------------------------------------- 1 | #ifndef FILE_PATH_H 2 | #define FILE_PATH_H 3 | 4 | #include 5 | #include 6 | 7 | /// Represents a path in the file system. 8 | class FilePath { 9 | public: 10 | FilePath(const std::string& path) 11 | : path_(path) 12 | { 13 | std::replace(path_.begin(), path_.end(), '\\', '/'); 14 | auto pos = path_.rfind('/'); 15 | base_ = (pos != std::string::npos) ? path_.substr(0, pos) : "."; 16 | file_ = (pos != std::string::npos) ? path_.substr(pos + 1) : path_; 17 | } 18 | 19 | const std::string& path() const { return path_; } 20 | const std::string& base_name() const { return base_; } 21 | const std::string& file_name() const { return file_; } 22 | 23 | std::string extension() const { 24 | auto pos = file_.rfind('.'); 25 | return (pos != std::string::npos) ? file_.substr(pos + 1) : std::string(); 26 | } 27 | 28 | std::string remove_extension() const { 29 | auto pos = file_.rfind('.'); 30 | return (pos != std::string::npos) ? file_.substr(0, pos) : file_; 31 | } 32 | 33 | operator const std::string& () const { 34 | return path(); 35 | } 36 | 37 | private: 38 | std::string path_; 39 | std::string base_; 40 | std::string file_; 41 | }; 42 | 43 | #endif // FILE_PATH_H 44 | -------------------------------------------------------------------------------- /refs/src/float2.h: -------------------------------------------------------------------------------- 1 | #ifndef FLOAT2_H 2 | #define FLOAT2_H 3 | 4 | #include 5 | #include "common.h" 6 | 7 | struct float3; 8 | struct float4; 9 | 10 | struct float2 { 11 | union { 12 | struct { float x, y; }; 13 | float values[2]; 14 | }; 15 | 16 | float2() {} 17 | explicit float2(float x) : x(x), y(x) {} 18 | explicit float2(const float3& xy); 19 | explicit float2(const float4& xy); 20 | float2(float x, float y) : x(x), y(y) {} 21 | 22 | bool operator == (const float2& other) const { 23 | return x == other.x && y == other.y; 24 | } 25 | 26 | bool operator != (const float2& other) const { 27 | return x != other.x || y != other.y; 28 | } 29 | 30 | float operator [] (size_t i) const { return values[i]; } 31 | float& operator [] (size_t i) { return values[i]; } 32 | 33 | float2& operator += (const float2& a) { 34 | x += a.x; y += a.y; 35 | return *this; 36 | } 37 | 38 | float2& operator -= (const float2& a) { 39 | x -= a.x; y -= a.y; 40 | return *this; 41 | } 42 | 43 | float2& operator *= (float a) { 44 | x *= a; y *= a; 45 | return *this; 46 | } 47 | 48 | float2& operator *= (const float2& a) { 49 | x *= a.x; y *= a.y; 50 | return *this; 51 | } 52 | }; 53 | 54 | inline float2 operator * (float a, const float2& b) { 55 | return float2(a * b.x, a * b.y); 56 | } 57 | 58 | inline float2 operator * (const float2& a, float b) { 59 | return float2(a.x * b, a.y * b); 60 | } 61 | 62 | inline float2 operator / (const float2& a, float b) { 63 | return a * (1.0f / b); 64 | } 65 | 66 | inline float2 operator - (const float2& a, const float2& b) { 67 | return float2(a.x - b.x, a.y - b.y); 68 | } 69 | 70 | inline float2 operator + (const float2& a, const float2& b) { 71 | return float2(a.x + b.x, a.y + b.y); 72 | } 73 | 74 | inline float2 operator * (const float2& a, const float2& b) { 75 | return float2(a.x * b.x, a.y * b.y); 76 | } 77 | 78 | inline float2 min(const float2& a, const float2& b) { 79 | return float2(a.x < b.x ? a.x : b.x, 80 | a.y < b.y ? a.y : b.y); 81 | } 82 | 83 | inline float2 max(const float2& a, const float2& b) { 84 | return float2(a.x > b.x ? a.x : b.x, 85 | a.y > b.y ? a.y : b.y); 86 | } 87 | 88 | inline float dot(const float2& a, const float2& b) { 89 | return a.x * b.x + a.y * b.y; 90 | } 91 | 92 | inline float lensqr(const float2& a) { 93 | return dot(a, a); 94 | } 95 | 96 | inline float length(const float2& a) { 97 | return std::sqrt(dot(a, a)); 98 | } 99 | 100 | inline float2 normalize(const float2& a) { 101 | return a * (1.0f / length(a)); 102 | } 103 | 104 | #endif // FLOAT2_H 105 | -------------------------------------------------------------------------------- /refs/src/float3.h: -------------------------------------------------------------------------------- 1 | #ifndef FLOAT3_H 2 | #define FLOAT3_H 3 | 4 | #include 5 | #include "common.h" 6 | #include "float2.h" 7 | 8 | struct float4; 9 | 10 | struct float3 { 11 | union { 12 | struct { float x, y, z; }; 13 | float values[3]; 14 | }; 15 | 16 | float3() {} 17 | explicit float3(float x) : x(x), y(x), z(x) {} 18 | explicit float3(const float4& xyz); 19 | float3(float x, float y, float z) : x(x), y(y), z(z) {} 20 | float3(const float2& xy, float z) : x(xy.x), y(xy.y), z(z) {} 21 | float3(float x, const float2& yz) : x(x), y(yz.x), z(yz.y) {} 22 | 23 | bool operator == (const float3& other) const { 24 | return x == other.x && y == other.y && z == other.z; 25 | } 26 | 27 | bool operator != (const float3& other) const { 28 | return x != other.x || y != other.y || z != other.z; 29 | } 30 | 31 | float operator [] (size_t i) const { return values[i]; } 32 | float& operator [] (size_t i) { return values[i]; } 33 | 34 | float3& operator += (const float3& a) { 35 | x += a.x; y += a.y; z += a.z; 36 | return *this; 37 | } 38 | 39 | float3& operator -= (const float3& a) { 40 | x -= a.x; y -= a.y; z -= a.z; 41 | return *this; 42 | } 43 | 44 | float3& operator *= (float a) { 45 | x *= a; y *= a; z *= a; 46 | return *this; 47 | } 48 | 49 | float3& operator *= (const float3& a) { 50 | x *= a.x; y *= a.y; z *= a.z; 51 | return *this; 52 | } 53 | }; 54 | 55 | inline float2::float2(const float3& xy) 56 | : x(xy.x), y(xy.y) 57 | {} 58 | 59 | inline float3 operator * (float a, const float3& b) { 60 | return float3(a * b.x, a * b.y, a * b.z); 61 | } 62 | 63 | inline float3 operator * (const float3& a, float b) { 64 | return float3(a.x * b, a.y * b, a.z * b); 65 | } 66 | 67 | inline float3 operator / (const float3& a, float b) { 68 | return a * (1.0f / b); 69 | } 70 | 71 | inline float3 operator - (const float3& a, const float3& b) { 72 | return float3(a.x - b.x, a.y - b.y, a.z - b.z); 73 | } 74 | 75 | inline float3 operator - (const float3& a) { 76 | return float3(-a.x, -a.y, -a.z); 77 | } 78 | 79 | inline float3 operator + (const float3& a, const float3& b) { 80 | return float3(a.x + b.x, a.y + b.y, a.z + b.z); 81 | } 82 | 83 | inline float3 operator * (const float3& a, const float3& b) { 84 | return float3(a.x * b.x, a.y * b.y, a.z * b.z); 85 | } 86 | 87 | inline float3 operator / (const float3& a, const float3& b) { 88 | return float3(a.x / b.x, a.y / b.y, a.z / b.z); 89 | } 90 | 91 | inline float3 cross(const float3& a, const float3& b) { 92 | return float3(a.y * b.z - a.z * b.y, 93 | a.z * b.x - a.x * b.z, 94 | a.x * b.y - a.y * b.x); 95 | } 96 | 97 | inline float3 rotate(const float3& v, const float3& axis, float angle) { 98 | float q[4]; 99 | q[0] = axis.x * sinf(angle / 2); 100 | q[1] = axis.y * sinf(angle / 2); 101 | q[2] = axis.z * sinf(angle / 2); 102 | q[3] = std::cos(angle / 2); 103 | 104 | float p[4]; 105 | p[0] = q[3] * v.x + q[1] * v.z - q[2] * v.y; 106 | p[1] = q[3] * v.y - q[0] * v.z + q[2] * v.x; 107 | p[2] = q[3] * v.z + q[0] * v.y - q[1] * v.x; 108 | p[3] = -(q[0] * v.x + q[1] * v.y + q[2] * v.z); 109 | 110 | return float3(p[3] * -q[0] + p[0] * q[3] + p[1] * -q[2] - p[2] * -q[1], 111 | p[3] * -q[1] - p[0] * -q[2] + p[1] * q[3] + p[2] * -q[0], 112 | p[3] * -q[2] + p[0] * -q[1] - p[1] * -q[0] + p[2] * q[3]); 113 | } 114 | 115 | inline float3 min(const float3& a, const float3& b) { 116 | return float3(a.x < b.x ? a.x : b.x, 117 | a.y < b.y ? a.y : b.y, 118 | a.z < b.z ? a.z : b.z); 119 | } 120 | 121 | inline float3 max(const float3& a, const float3& b) { 122 | return float3(a.x > b.x ? a.x : b.x, 123 | a.y > b.y ? a.y : b.y, 124 | a.z > b.z ? a.z : b.z); 125 | } 126 | 127 | inline float dot(const float3& a, const float3& b) { 128 | return a.x * b.x + a.y * b.y + a.z * b.z; 129 | } 130 | 131 | inline float lensqr(const float3& a) { 132 | return dot(a, a); 133 | } 134 | 135 | inline float length(const float3& a) { 136 | return std::sqrt(dot(a, a)); 137 | } 138 | 139 | inline float3 normalize(const float3& a) { 140 | return a * (1.0f / length(a)); 141 | } 142 | 143 | #endif // FLOAT3_H 144 | -------------------------------------------------------------------------------- /refs/src/float4.h: -------------------------------------------------------------------------------- 1 | #ifndef FLOAT4_H 2 | #define FLOAT4_H 3 | 4 | #include 5 | #include "common.h" 6 | #include "float2.h" 7 | #include "float3.h" 8 | 9 | struct float4 { 10 | union { 11 | struct { float x, y, z, w; }; 12 | float values[4]; 13 | }; 14 | 15 | float4() {} 16 | explicit float4(float x) : x(x), y(x), z(x), w(x) {} 17 | float4(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {} 18 | float4(const float3& xyz, float w) : x(xyz.x), y(xyz.y), z(xyz.z), w(w) {} 19 | float4(float x, const float3& yzw) : x(x), y(yzw.x), z(yzw.y), w(yzw.z) {} 20 | float4(const float2& xy, float z, float w) : x(xy.x), y(xy.y), z(z), w(w) {} 21 | float4(float x, const float2& yz, float w) : x(x), y(yz.x), z(yz.y), w(w) {} 22 | float4(float x, float y, const float2& zw) : x(x), y(y), z(zw.x), w(zw.y) {} 23 | float4(const float2& xy, const float2& zw) : x(xy.x), y(xy.y), z(zw.x), w(zw.y) {} 24 | 25 | bool operator == (const float4& other) const { 26 | return x == other.x && y == other.y && z == other.z && w != other.w; 27 | } 28 | 29 | bool operator != (const float4& other) const { 30 | return x != other.x || y != other.y || z != other.z || w != other.w; 31 | } 32 | 33 | float operator [] (size_t i) const { return values[i]; } 34 | float& operator [] (size_t i) { return values[i]; } 35 | 36 | float4& operator += (const float4& a) { 37 | x += a.x; y += a.y; z += a.z; w += a.w; 38 | return *this; 39 | } 40 | 41 | float4& operator -= (const float4& a) { 42 | x -= a.x; y -= a.y; z -= a.z; w -= a.w; 43 | return *this; 44 | } 45 | 46 | float4& operator *= (float a) { 47 | x *= a; y *= a; z *= a; w *= a; 48 | return *this; 49 | } 50 | 51 | float4& operator *= (const float4& a) { 52 | x *= a.x; y *= a.y; z *= a.z; w *= a.w; 53 | return *this; 54 | } 55 | }; 56 | 57 | inline float2::float2(const float4& xy) 58 | : x(xy.x), y(xy.y) 59 | {} 60 | 61 | inline float3::float3(const float4& xyz) 62 | : x(xyz.x), y(xyz.y), z(xyz.z) 63 | {} 64 | 65 | inline float4 operator * (float a, const float4& b) { 66 | return float4(a * b.x, a * b.y, a * b.z, a * b.w); 67 | } 68 | 69 | inline float4 operator * (const float4& a, float b) { 70 | return float4(a.x * b, a.y * b, a.z * b, a.w * b); 71 | } 72 | 73 | inline float4 operator / (const float4& a, float b) { 74 | return a * (1.0f / b); 75 | } 76 | 77 | inline float4 operator - (const float4& a, const float4& b) { 78 | return float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); 79 | } 80 | 81 | inline float4 operator - (const float4& a) { 82 | return float4(-a.x, -a.y, -a.z, -a.w); 83 | } 84 | 85 | inline float4 operator + (const float4& a, const float4& b) { 86 | return float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); 87 | } 88 | 89 | inline float4 operator * (const float4& a, const float4& b) { 90 | return float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); 91 | } 92 | 93 | inline float4 abs(const float4& a) { 94 | return float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w)); 95 | } 96 | 97 | inline float4 min(const float4& a, const float4& b) { 98 | return float4(a.x < b.x ? a.x : b.x, 99 | a.y < b.y ? a.y : b.y, 100 | a.z < b.z ? a.z : b.z, 101 | a.w < b.w ? a.w : b.w); 102 | } 103 | 104 | inline float4 max(const float4& a, const float4& b) { 105 | return float4(a.x > b.x ? a.x : b.x, 106 | a.y > b.y ? a.y : b.y, 107 | a.z > b.z ? a.z : b.z, 108 | a.w > b.w ? a.w : b.w); 109 | } 110 | 111 | inline float dot(const float4& a, const float4& b) { 112 | return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; 113 | } 114 | 115 | inline float lensqr(const float4& a) { 116 | return dot(a, a); 117 | } 118 | 119 | inline float length(const float4& a) { 120 | return std::sqrt(dot(a, a)); 121 | } 122 | 123 | inline float4 normalize(const float4& a) { 124 | return a * (1.0f / length(a)); 125 | } 126 | 127 | inline float4 clamp(const float4& val, const float4& min, const float4& max) { 128 | return float4(clamp(val.x, min.x, max.x), 129 | clamp(val.y, min.y, max.y), 130 | clamp(val.z, min.z, max.z), 131 | clamp(val.w, min.w, max.w)); 132 | } 133 | 134 | #endif // FLOAT4_H 135 | -------------------------------------------------------------------------------- /refs/src/image.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | #include "image.h" 9 | 10 | static void gamma_correct(ImageRgba32& img) { 11 | for (size_t y = 0; y < img.height; ++y) { 12 | for (size_t x = 0; x < img.width; ++x) { 13 | auto* pix = &img.pixels[4 * (y * img.width + x)]; 14 | for (int i = 0; i < 3; ++i) 15 | pix[i] = std::pow(pix[i] * (1.0f / 255.0f), 2.2f) * 255.0f; 16 | } 17 | } 18 | } 19 | 20 | static void read_from_stream(png_structp png_ptr, png_bytep data, png_size_t length) { 21 | png_voidp a = png_get_io_ptr(png_ptr); 22 | ((std::istream*)a)->read((char*)data, length); 23 | } 24 | 25 | bool load_png(const FilePath& path, ImageRgba32& img) { 26 | std::ifstream file(path, std::ifstream::binary); 27 | if (!file) 28 | return false; 29 | 30 | // Read signature 31 | char sig[8]; 32 | file.read(sig, 8); 33 | if (!png_check_sig((unsigned char*)sig, 8)) 34 | return false; 35 | 36 | png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); 37 | if (!png_ptr) 38 | return false; 39 | 40 | png_infop info_ptr = png_create_info_struct(png_ptr); 41 | if (!info_ptr) { 42 | png_destroy_read_struct(&png_ptr, nullptr, nullptr); 43 | return false; 44 | } 45 | 46 | if (setjmp(png_jmpbuf(png_ptr))) { 47 | png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); 48 | return false; 49 | } 50 | 51 | png_set_sig_bytes(png_ptr, 8); 52 | png_set_read_fn(png_ptr, (png_voidp)&file, read_from_stream); 53 | png_read_info(png_ptr, info_ptr); 54 | 55 | img.width = png_get_image_width(png_ptr, info_ptr); 56 | img.height = png_get_image_height(png_ptr, info_ptr); 57 | img.channels = 4; 58 | 59 | png_uint_32 color_type = png_get_color_type(png_ptr, info_ptr); 60 | png_uint_32 bit_depth = png_get_bit_depth(png_ptr, info_ptr); 61 | 62 | // Expand paletted and grayscale images to RGB 63 | if (color_type == PNG_COLOR_TYPE_PALETTE) { 64 | png_set_palette_to_rgb(png_ptr); 65 | } else if (color_type == PNG_COLOR_TYPE_GRAY || 66 | color_type == PNG_COLOR_TYPE_GRAY_ALPHA) { 67 | png_set_gray_to_rgb(png_ptr); 68 | } 69 | 70 | // Transform to 8 bit per channel 71 | if (bit_depth == 16) 72 | png_set_strip_16(png_ptr); 73 | 74 | // Get alpha channel when there is one 75 | if (png_get_valid(png_ptr, info_ptr, PNG_INFO_tRNS)) 76 | png_set_tRNS_to_alpha(png_ptr); 77 | 78 | // Otherwise add an opaque alpha channel 79 | else 80 | png_set_filler(png_ptr, 0xFF, PNG_FILLER_AFTER); 81 | 82 | img.pixels.reset(new uint8_t[img.channels * img.width * img.height]); 83 | std::unique_ptr row_bytes(new png_byte[img.width * 4]); 84 | for (size_t y = 0; y < img.height; y++) { 85 | png_read_row(png_ptr, row_bytes.get(), nullptr); 86 | uint8_t* img_row = img.pixels.get() + 4 * img.width * (img.height - 1 - y); 87 | for (size_t x = 0; x < img.width; x++) { 88 | for (size_t c = 0; c < img.channels; ++c) 89 | img_row[x * img.channels + c] = row_bytes[x * 4 + c]; 90 | } 91 | } 92 | 93 | png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); 94 | gamma_correct(img); 95 | return true; 96 | } 97 | 98 | struct enhanced_jpeg_decompress_struct : jpeg_decompress_struct { 99 | jmp_buf jmp; 100 | std::istream* is; 101 | JOCTET src_buf[1024]; 102 | }; 103 | 104 | static void jpeg_error_exit(j_common_ptr cinfo) { 105 | cinfo->err->output_message(cinfo); 106 | longjmp(reinterpret_cast(cinfo)->jmp, 1); 107 | } 108 | 109 | static void jpeg_output_message(j_common_ptr) {} 110 | 111 | static void jpeg_no_op(j_decompress_ptr) {} 112 | 113 | static int jpeg_fill_input_buffer(j_decompress_ptr cinfo) { 114 | auto enhanced = static_cast(cinfo); 115 | enhanced->is->read((char*)enhanced->src_buf, 1024); 116 | cinfo->src->bytes_in_buffer = enhanced->is->gcount(); 117 | cinfo->src->next_input_byte = enhanced->src_buf; 118 | return TRUE; 119 | } 120 | 121 | static void jpeg_skip_input_data(j_decompress_ptr cinfo, long num_bytes) { 122 | auto enhanced = static_cast(cinfo); 123 | if (num_bytes != 0) { 124 | if (num_bytes < long(cinfo->src->bytes_in_buffer)) { 125 | cinfo->src->next_input_byte += num_bytes; 126 | cinfo->src->bytes_in_buffer -= num_bytes; 127 | } else { 128 | enhanced->is->seekg(num_bytes - cinfo->src->bytes_in_buffer, std::ios_base::cur); 129 | cinfo->src->bytes_in_buffer = 0; 130 | } 131 | } 132 | } 133 | 134 | bool load_jpg(const FilePath& path, ImageRgba32& image) { 135 | std::ifstream file(path, std::ifstream::binary); 136 | if (!file) 137 | return false; 138 | 139 | enhanced_jpeg_decompress_struct cinfo; 140 | cinfo.is = &file; 141 | jpeg_error_mgr jerr; 142 | 143 | cinfo.err = jpeg_std_error(&jerr); 144 | jerr.error_exit = jpeg_error_exit; 145 | jerr.output_message = jpeg_output_message; 146 | jpeg_create_decompress(&cinfo); 147 | 148 | if (setjmp(cinfo.jmp)) { 149 | jpeg_abort_decompress(&cinfo); 150 | jpeg_destroy_decompress(&cinfo); 151 | return false; 152 | } 153 | 154 | jpeg_source_mgr src; 155 | src.init_source = jpeg_no_op; 156 | src.fill_input_buffer = jpeg_fill_input_buffer; 157 | src.skip_input_data = jpeg_skip_input_data; 158 | src.resync_to_restart = jpeg_resync_to_restart; 159 | src.term_source = jpeg_no_op; 160 | src.bytes_in_buffer = 0; 161 | cinfo.src = &src; 162 | 163 | jpeg_read_header(&cinfo, true); 164 | jpeg_start_decompress(&cinfo); 165 | image.width = cinfo.output_width; 166 | image.height = cinfo.output_height; 167 | auto image_size = image.width * image.height * 4; 168 | image.pixels.reset(new uint8_t[image_size]); 169 | std::fill(image.pixels.get(), image.pixels.get() + image_size, 0); 170 | image.channels = cinfo.output_components; 171 | 172 | std::unique_ptr row(new JSAMPLE[image.width * image.channels]); 173 | for (size_t y = 0; y < image.height; y++) { 174 | auto src_ptr = row.get(); 175 | auto dst_ptr = &image.pixels[(image.height - 1 - y) * image.width * 4]; 176 | jpeg_read_scanlines(&cinfo, &src_ptr, 1); 177 | for (size_t x = 0; x < image.width; ++x, src_ptr += image.channels, dst_ptr += 4) { 178 | for (size_t c = 0; c < image.channels; c++) 179 | dst_ptr[c] = src_ptr[c]; 180 | } 181 | } 182 | 183 | jpeg_finish_decompress(&cinfo); 184 | jpeg_destroy_decompress(&cinfo); 185 | gamma_correct(image); 186 | return true; 187 | } 188 | -------------------------------------------------------------------------------- /refs/src/image.h: -------------------------------------------------------------------------------- 1 | #ifndef IMAGE_H 2 | #define IMAGE_H 3 | 4 | #include "file_path.h" 5 | 6 | struct ImageRgba32 { 7 | std::unique_ptr pixels; 8 | size_t width, height; 9 | size_t channels; 10 | }; 11 | 12 | bool load_png(const FilePath&, ImageRgba32&); 13 | bool load_jpg(const FilePath&, ImageRgba32&); 14 | 15 | #endif // IMAGE_H 16 | -------------------------------------------------------------------------------- /refs/src/math.isph: -------------------------------------------------------------------------------- 1 | #ifndef QUALIFIER 2 | #error "Please define the preprocessor symbol QUALIFIER before including this file" 3 | #endif 4 | 5 | inline struct float3_s QUALIFIER make_float3(float QUALIFIER x, float QUALIFIER y, float QUALIFIER z) { 6 | struct float3_s QUALIFIER v = { x, y, z }; 7 | return v; 8 | } 9 | 10 | inline struct float2_s QUALIFIER make_float2(float QUALIFIER x, float QUALIFIER y) { 11 | struct float2_s QUALIFIER v = { x, y }; 12 | return v; 13 | } 14 | 15 | inline struct rgb_s QUALIFIER make_rgb(float QUALIFIER r, float QUALIFIER g, float QUALIFIER b) { 16 | struct rgb_s QUALIFIER c = { r, g, b }; 17 | return c; 18 | } 19 | 20 | inline struct rgb_s QUALIFIER operator+ (const struct rgb_s QUALIFIER a, const struct rgb_s QUALIFIER b) { 21 | return make_rgb(a.r + b.r, a.g + b.g, a.b + b.b); 22 | } 23 | 24 | inline struct float3_s QUALIFIER operator+ (const struct float3_s QUALIFIER a, const struct float3_s QUALIFIER b) { 25 | return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); 26 | } 27 | 28 | inline struct float3_s QUALIFIER operator- (const struct float3_s QUALIFIER a, const struct float3_s QUALIFIER b) { 29 | return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); 30 | } 31 | 32 | inline struct float2_s QUALIFIER operator+ (const struct float2_s QUALIFIER a, const struct float2_s QUALIFIER b) { 33 | return make_float2(a.x + b.x, a.y + b.y); 34 | } 35 | 36 | inline struct rgb_s QUALIFIER operator* (const struct rgb_s QUALIFIER a, float QUALIFIER b) { 37 | return make_rgb(a.r * b, a.g * b, a.b * b); 38 | } 39 | 40 | inline struct float3_s QUALIFIER operator* (const struct float3_s QUALIFIER a, float QUALIFIER b) { 41 | return make_float3(a.x * b, a.y * b, a.z * b); 42 | } 43 | 44 | inline struct rgb_s QUALIFIER operator* (const struct rgb_s QUALIFIER a, const struct rgb_s QUALIFIER b) { 45 | return make_rgb(a.r * b.r, a.g * b.g, a.b * b.b); 46 | } 47 | 48 | inline struct float2_s QUALIFIER operator* (const struct float2_s QUALIFIER a, float QUALIFIER b) { 49 | return make_float2(a.x * b, a.y * b); 50 | } 51 | 52 | inline struct float3_s QUALIFIER negate(const struct float3_s QUALIFIER v) { 53 | return make_float3(-v.x, -v.y, -v.z); 54 | } 55 | 56 | inline float QUALIFIER dot(const struct float3_s QUALIFIER a, const struct float3_s QUALIFIER b) { 57 | return a.x * b.x + a.y * b.y + a.z * b.z; 58 | } 59 | 60 | inline float QUALIFIER length(const struct float3_s QUALIFIER a) { 61 | return sqrt(dot(a, a)); 62 | } 63 | 64 | inline struct float3_s QUALIFIER normalize(const struct float3_s QUALIFIER v) { 65 | return v * (1.0f / length(v)); 66 | } 67 | 68 | inline float QUALIFIER lerp(float QUALIFIER a, float QUALIFIER b, float QUALIFIER t) { 69 | return a * (1.0f - t) + b * t; 70 | } 71 | 72 | inline struct rgb_s QUALIFIER lerp(struct rgb_s QUALIFIER a, struct rgb_s QUALIFIER b, float QUALIFIER t) { 73 | return a * (1.0f - t) + b * t; 74 | } 75 | 76 | inline float QUALIFIER luminance(struct rgb_s QUALIFIER c) { 77 | return c.r * 0.2126f + c.g * 0.7152f + c.b * 0.0722f; 78 | } 79 | 80 | inline float QUALIFIER fastlog2(float QUALIFIER x) { 81 | unsigned int QUALIFIER vx = intbits(x); 82 | unsigned int QUALIFIER mx = (vx & 0x007FFFFFu) | 0x3f000000u; 83 | float QUALIFIER y = vx * 1.1920928955078125e-7f; 84 | float QUALIFIER z = floatbits(mx); 85 | return y - 124.22551499f - 1.498030302f * z - 1.72587999f / (0.3520887068f + z); 86 | } 87 | 88 | inline float QUALIFIER fastpow2(float QUALIFIER p) { 89 | float QUALIFIER off = p < 0.0f ? 1.0f : 0.0f; 90 | float QUALIFIER clipp = p < -126.0f ? -126.0f : p; 91 | int QUALIFIER w = clipp; 92 | float QUALIFIER z = clipp - w + off; 93 | int QUALIFIER v = (1u << 23u) * (clipp + 121.2740575f + 27.7280233f / (4.84252568f - z) - 1.49012907f * z); 94 | return floatbits(v); 95 | } 96 | 97 | inline float QUALIFIER fastpow(float QUALIFIER x, float QUALIFIER y) { 98 | return fastpow2(y * fastlog2(x)); 99 | } 100 | -------------------------------------------------------------------------------- /refs/src/obj.h: -------------------------------------------------------------------------------- 1 | #ifndef LOAD_OBJ_H 2 | #define LOAD_OBJ_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "float3.h" 9 | #include "color.h" 10 | #include "file_path.h" 11 | 12 | namespace obj { 13 | 14 | struct Index { 15 | int v, n, t; 16 | }; 17 | 18 | struct Face { 19 | std::vector indices; 20 | int material; 21 | }; 22 | 23 | struct Group { 24 | std::vector faces; 25 | }; 26 | 27 | struct Object { 28 | std::vector groups; 29 | }; 30 | 31 | struct Material { 32 | rgb ka; 33 | rgb kd; 34 | rgb ks; 35 | rgb ke; 36 | float ns; 37 | float ni; 38 | rgb tf; 39 | float tr; 40 | float d; 41 | int illum; 42 | std::string map_ka; 43 | std::string map_kd; 44 | std::string map_ks; 45 | std::string map_ke; 46 | std::string map_bump; 47 | std::string map_d; 48 | }; 49 | 50 | struct File { 51 | std::vector objects; 52 | std::vector vertices; 53 | std::vector normals; 54 | std::vector texcoords; 55 | std::vector materials; 56 | std::vector mtl_libs; 57 | }; 58 | 59 | typedef std::unordered_map MaterialLib; 60 | 61 | struct TriMesh { 62 | std::vector vertices; 63 | std::vector indices; 64 | std::vector normals; 65 | std::vector face_normals; 66 | std::vector texcoords; 67 | }; 68 | 69 | bool load_obj(const FilePath&, File&); 70 | bool load_mtl(const FilePath&, MaterialLib&); 71 | TriMesh compute_tri_mesh(const File&, const MaterialLib&, size_t); 72 | 73 | } // namespace obj 74 | 75 | #endif // LOAD_OBJ_H 76 | -------------------------------------------------------------------------------- /refs/src/optix_path_tracer.h: -------------------------------------------------------------------------------- 1 | #ifndef OPTIX_PATH_TRACER_H 2 | #define OPTIX_PATH_TRACER_H 3 | 4 | struct Material { 5 | float3 kd; 6 | float3 ks; 7 | float3 ke; 8 | float3 tf; 9 | int map_kd; 10 | int map_ks; 11 | float ns; 12 | float ni; 13 | uint illum; 14 | }; 15 | 16 | struct Light { 17 | float3 v0; 18 | float3 v1; 19 | float3 v2; 20 | float3 normal; 21 | float inv_area; 22 | float3 intensity; 23 | }; 24 | 25 | #endif // OPTIX_PATH_TRACER_H 26 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CPUINFO_PATH "/proc/cpuinfo" CACHE STRING "Path to the CPU info file on the system") 2 | set(TARGET_PLATFORM "" CACHE STRING "Target platform for the converter tool. Leave empty to autodetect CPU. Use converter --help to list possible platforms.") 3 | set(TARGET_DEVICE "" CACHE STRING "Target device on the selected platform. Leave empty to use the default.") 4 | set(SCENE_FILE "${PROJECT_SOURCE_DIR}/testing/cornell_box.obj" CACHE FILEPATH "Absolute path to an OBJ scene") 5 | set(MEGAKERNEL_FUSION ON CACHE BOOL "Set to true to enable simple shader fusion for megakernel mappings") 6 | set(MAX_PATH_LEN "64" CACHE STRING "Maximum path length") 7 | set(DISABLE_GUI OFF CACHE BOOL "Set to true to disable GUI") 8 | set(SPP "4" CACHE STRING "Samples per pixel") 9 | if (SCENE_FILE STREQUAL "") 10 | message(FATAL_ERROR "Please specify a valid OBJ scene in the SCENE_FILE variable") 11 | endif() 12 | set(CONVERTER_OPTIONS "") 13 | if (NOT TARGET_PLATFORM STREQUAL "") 14 | set(CONVERTER_OPTIONS "--target" "${TARGET_PLATFORM}") 15 | endif() 16 | if (NOT TARGET_DEVICE STREQUAL "") 17 | set(CONVERTER_OPTIONS ${CONVERTER_OPTIONS} "--device" "${TARGET_DEVICE}") 18 | if (MEGAKERNEL_FUSION AND (TARGET_PLATFORM STREQUAL "nvvm-megakernel" OR TARGET_PLATFORM STREQUAL "amdgpu-megakernel")) 19 | set(CONVERTER_OPTIONS ${CONVERTER_OPTIONS} "--fusion") 20 | endif() 21 | endif() 22 | 23 | set(RODENT_SRCS 24 | core/color.impala 25 | core/common.impala 26 | core/cpu_common.impala 27 | core/matrix.impala 28 | core/random.impala 29 | core/sort.impala 30 | core/vector.impala 31 | render/image.impala 32 | render/camera.impala 33 | render/geometry.impala 34 | render/light.impala 35 | render/material.impala 36 | render/renderer.impala 37 | render/scene.impala 38 | render/driver.impala 39 | render/mapping_cpu.impala 40 | render/mapping_gpu.impala 41 | traversal/intersection.impala 42 | traversal/stack.impala 43 | traversal/mapping_cpu.impala 44 | traversal/mapping_gpu.impala) 45 | 46 | set(DRIVER_SRCS 47 | driver/driver.cpp 48 | driver/interface.cpp 49 | driver/interface.h 50 | driver/obj.cpp 51 | driver/obj.h 52 | driver/image.cpp 53 | driver/image.h 54 | driver/bvh.h 55 | driver/float2.h 56 | driver/float3.h 57 | driver/float4.h 58 | driver/file_path.h 59 | driver/common.h 60 | driver/color.h) 61 | 62 | set(CONVERTER_SRCS 63 | driver/converter.cpp 64 | driver/obj.cpp 65 | driver/obj.h 66 | driver/file_path.h 67 | driver/interface.h 68 | driver/bvh.h) 69 | 70 | anydsl_runtime_wrap(RODENT_OBJS 71 | NAME "rodent" 72 | CLANG_FLAGS ${CLANG_FLAGS} 73 | IMPALA_FLAGS --log-level info 74 | FILES ${RODENT_SRCS} ${CMAKE_CURRENT_BINARY_DIR}/main.impala) 75 | 76 | anydsl_runtime_wrap(DISCARD_TMP_OBJS 77 | NAME "interface" 78 | FILES ${RODENT_SRCS} dummy_main.impala 79 | INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/driver/interface) 80 | 81 | if (NOT DISABLE_GUI) 82 | find_package(SDL2 REQUIRED) 83 | endif() 84 | find_package(PNG REQUIRED) 85 | find_package(JPEG REQUIRED) 86 | find_package(LZ4 REQUIRED) 87 | 88 | add_executable(converter ${CONVERTER_SRCS}) 89 | target_include_directories(converter PUBLIC ${LZ4_INCLUDE_DIR}) 90 | target_link_libraries(converter ${LZ4_LIBRARY}) 91 | target_compile_definitions(converter PUBLIC -DCPUINFO_PATH="${CPUINFO_PATH}") 92 | if (COLORIZE) 93 | target_compile_definitions(converter PUBLIC -DCOLORIZE) 94 | endif() 95 | if (EMBREE_FOUND) 96 | target_include_directories(converter PUBLIC ${EMBREE_ROOT_DIR}/include ${EMBREE_ROOT_DIR} ${EMBREE_LIBRARY_DIR}) 97 | target_link_libraries(converter ${EMBREE_DEPENDENCIES}) 98 | target_compile_definitions(converter PUBLIC ${EMBREE_DEFINITIONS} -DENABLE_EMBREE_BVH) 99 | endif() 100 | 101 | add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/main.impala 102 | COMMAND converter ${SCENE_FILE} ${CONVERTER_OPTIONS} --max-path-len ${MAX_PATH_LEN} --samples-per-pixel ${SPP} 103 | COMMAND ${CMAKE_COMMAND} -E rename ${CMAKE_BINARY_DIR}/main.impala ${CMAKE_CURRENT_BINARY_DIR}/main.impala 104 | WORKING_DIRECTORY ${CMAKE_BINARY_DIR} 105 | DEPENDS ${SCENE_FILE} converter) 106 | 107 | add_custom_target(convert DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/main.impala) 108 | 109 | set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/main.impala PROPERTIES GENERATED TRUE) 110 | 111 | add_library(driver ${DRIVER_SRCS}) 112 | target_include_directories(driver PUBLIC ${LZ4_INCLUDE_DIR}) 113 | if (EMBREE_FOUND) 114 | target_include_directories(driver PUBLIC ${EMBREE_ROOT_DIR}/include ${EMBREE_ROOT_DIR} ${EMBREE_LIBRARY_DIR} ${PNG_INCLUDE_DIRS} ${JPEG_INCLUDE_DIRS} ${LZ4_INCLUDE_DIR}) 115 | target_link_libraries(driver ${EMBREE_DEPENDENCIES}) 116 | target_compile_definitions(driver PUBLIC -DENABLE_EMBREE_DEVICE) 117 | endif() 118 | if (NOT DISABLE_GUI) 119 | target_include_directories(driver PUBLIC ${SDL2_INCLUDE_DIR}) 120 | target_link_libraries(driver ${SDL2_LIBRARY}) 121 | else() 122 | target_compile_definitions(driver PUBLIC -DDISABLE_GUI) 123 | endif() 124 | if (COLORIZE) 125 | target_compile_definitions(driver PUBLIC -DCOLORIZE) 126 | endif() 127 | 128 | add_executable(rodent ${RODENT_OBJS}) 129 | target_link_libraries(rodent driver ${AnyDSL_runtime_LIBRARIES} ${PNG_LIBRARIES} ${JPEG_LIBRARIES} ${LZ4_LIBRARY}) 130 | 131 | if (SCENE_FILE STREQUAL "${PROJECT_SOURCE_DIR}/testing/cornell_box.obj") 132 | # Test rodent when the cornell box is used 133 | add_test(NAME rodent_cornell COMMAND ${CMAKE_COMMAND} -DRODENT=$ -DIM_COMPARE=${ImageMagick_compare_EXECUTABLE} "-DRODENT_ARGS=--eye;0;1;2.7;--dir;0;0;-1;--up;0;1;0" -DTESTING_DIR=${PROJECT_SOURCE_DIR}/testing -DRODENT_DIR=${CMAKE_BINARY_DIR} -DRODENT_OUTPUT=rodent-cornell-output -P ${PROJECT_SOURCE_DIR}/cmake/test/run_rodent.cmake) 134 | endif() 135 | -------------------------------------------------------------------------------- /src/core/color.impala: -------------------------------------------------------------------------------- 1 | struct Color { 2 | r: f32, 3 | g: f32, 4 | b: f32 5 | } 6 | 7 | fn @make_color(r: f32, g: f32, b: f32) -> Color { 8 | Color { 9 | r: r, 10 | g: g, 11 | b: b 12 | } 13 | } 14 | 15 | fn @color_add(a: Color, b: Color) -> Color { 16 | make_color(a.r + b.r, a.g + b.g, a.b + b.b) 17 | } 18 | 19 | fn @color_mul(a: Color, b: Color) -> Color { 20 | make_color(a.r * b.r, a.g * b.g, a.b * b.b) 21 | } 22 | 23 | fn @color_mulf(c: Color, f: f32) -> Color { 24 | make_color(c.r * f, c.g * f, c.b * f) 25 | } 26 | 27 | fn @color_lerp(a: Color, b: Color, t: f32) -> Color { 28 | make_color((1.0f - t) * a.r + t * b.r, 29 | (1.0f - t) * a.g + t * b.g, 30 | (1.0f - t) * a.b + t * b.b) 31 | } 32 | 33 | fn @color_luminance(c: Color) -> f32 { 34 | c.r * 0.2126f + c.g * 0.7152f + c.b * 0.0722f 35 | } 36 | 37 | fn @is_black(c: Color) -> bool { 38 | c.r == 0.0f && c.g == 0.0f && c.b == 0.0f 39 | } 40 | 41 | fn @vec3_to_color(v: Vec3) -> Color { 42 | make_color(v.x, v.y, v.z) 43 | } 44 | 45 | fn @color_to_vec3(c: Color) -> Vec3 { 46 | make_vec3(c.r, c.g, c.b) 47 | } 48 | 49 | static black = Color { r: 0.0f, g: 0.0f, b: 0.0f }; 50 | static white = Color { r: 1.0f, g: 1.0f, b: 1.0f }; 51 | static pink = Color { r: 1.0f, g: 0.0f, b: 1.0f }; 52 | -------------------------------------------------------------------------------- /src/core/common.impala: -------------------------------------------------------------------------------- 1 | // Constants ----------------------------------------------------------------------- 2 | 3 | static flt_eps = 1.1920928955e-07f; 4 | static flt_max = 3.4028234664e+38f; 5 | static flt_min = 1.1754943509e-38f; 6 | static flt_inf = 1.0f / 0.0f; 7 | static flt_pi = 3.14159265359f; 8 | static flt_sqrt2 = 1.41421356237f; 9 | static flt_sqrt3 = 1.73205080757f; 10 | 11 | // Fast division ------------------------------------------------------------------- 12 | 13 | struct FastDiv { 14 | m: u32, 15 | s1: u32, 16 | s2: u32 17 | } 18 | 19 | fn @make_fast_div(div: u32) -> FastDiv { 20 | let log = ilog2(div as i32) as u32; 21 | let max = 1u64 << 32u64; 22 | let m = (max << log as u64) / (div as u64) - max + 1u64; 23 | let s1 = select(log < 1u32, log, 1u32); 24 | let s2 = select(log > 1u32, log - 1u32, 0u32); 25 | FastDiv { 26 | m: m as u32, 27 | s1: s1, 28 | s2: s2 29 | } 30 | } 31 | 32 | fn @fast_div(fd: FastDiv, i: u32) -> u32 { 33 | let t = (((fd.m as u64) * (i as u64)) >> 32u64) as u32; 34 | (t + ((i - t) >> fd.s1)) >> fd.s2 35 | } 36 | 37 | // Fast power ---------------------------------------------------------------------- 38 | 39 | // Inspired from: 40 | // http://www.machinedlearnings.com/2011/06/fast-approximate-logarithm-exponential.html 41 | 42 | fn @fastlog2(x: f32) -> f32 { 43 | let vx = bitcast[u32](x); 44 | let mx = (vx & 0x007FFFFFu) | 0x3f000000u; 45 | let y = (vx as f32) * 1.1920928955078125e-7f; 46 | let z = bitcast[f32](mx); 47 | y - 124.22551499f - 1.498030302f * z - 1.72587999f / (0.3520887068f + z) 48 | } 49 | 50 | fn @fastpow2(p: f32) -> f32 { 51 | let offset = select(p < 0.0f, 1.0f, 0.0f); 52 | let clipp = select(p < -126.0f, -126.0f, p); 53 | let w = clipp as i32; 54 | let z = clipp - (w as f32) + offset; 55 | let v = ((1u << 23u) as f32 * (clipp + 121.2740575f + 27.7280233f / (4.84252568f - z) - 1.49012907f * z)) as i32; 56 | bitcast[f32](v) 57 | } 58 | 59 | fn @fastpow(x: f32, p: f32) -> f32 { 60 | fastpow2(p * fastlog2(x)) 61 | } 62 | 63 | // Likely/unlikely ----------------------------------------------------------------- 64 | 65 | extern "device" { 66 | fn "llvm.expect.i1" expect(bool, bool) -> bool; 67 | } 68 | 69 | fn @ likely(cond: bool) -> bool { expect(cond, true) } 70 | fn @unlikely(cond: bool) -> bool { expect(cond, false) } 71 | 72 | // Misc. --------------------------------------------------------------------------- 73 | 74 | fn @once(body: fn () -> ()) -> () { 75 | @@body() 76 | } 77 | 78 | fn @prodsign(x: f32, y: f32) -> f32 { 79 | bitcast[f32](bitcast[i32](x) ^ (bitcast[i32](y) & bitcast[i32](0x80000000u))) 80 | } 81 | 82 | fn @safe_rcp(x: f32) -> f32 { 83 | let min_rcp = 1e-8f; 84 | if select(x > 0.0f, x, -x) < min_rcp { prodsign(flt_max, x) } else { 1.0f / x } 85 | } 86 | 87 | fn @round_up(n: i32, d: i32) -> i32 { 88 | let m = n % d; 89 | n + (if m != 0 { d - m } else { 0 }) 90 | } 91 | 92 | fn @round_down(n: i32, d: i32) -> i32 { 93 | (n / d) * d 94 | } 95 | 96 | fn @sqrt_newton(y: f32, p: f32) -> f32 { 97 | fn @(?x) newton(x: f32) -> f32 { 98 | if (x * x - y) > p { 99 | newton(x - (x * x - y) / (2.0f * x)) 100 | } else { 101 | x 102 | } 103 | } 104 | newton(1.0f) 105 | } 106 | 107 | fn @ilog2(i: i32) -> i32 { 108 | fn @(?i) ilog2_helper(i: i32, p: i32) -> i32 { 109 | if i <= (1 << p) { 110 | p 111 | } else { 112 | ilog2_helper(i, p + 1) 113 | } 114 | } 115 | ilog2_helper(i, 0) 116 | } 117 | 118 | fn @lerp(a: f32, b: f32, k: f32) -> f32 { 119 | (1.0f - k) * a + k * b 120 | } 121 | 122 | fn @lerp2(a: f32, b: f32, c: f32, k1: f32, k2: f32) -> f32 { 123 | (1.0f - k1 - k2) * a + k1 * b + k2 * c 124 | } 125 | 126 | fn @triangle_area(math: Intrinsics, v0: Vec3, v1: Vec3, v2: Vec3) -> f32 { 127 | let e1 = vec3_sub(v1, v0); 128 | let e2 = vec3_sub(v2, v0); 129 | let n = vec3_cross(e1, e2); 130 | 0.5f * vec3_len(math, n) 131 | } 132 | 133 | fn @positive_cos(a: Vec3, b: Vec3) -> f32 { 134 | let cos = vec3_dot(a, b); 135 | if cos >= 0.0f { cos } else { 0.0f } 136 | } 137 | 138 | fn @swap_f32(a: &mut f32, b: &mut f32) -> () { 139 | let tmp = *a; 140 | *a = *b; 141 | *b = tmp; 142 | } 143 | 144 | fn @swap_i32(a: &mut i32, b: &mut i32) -> () { 145 | let tmp = *a; 146 | *a = *b; 147 | *b = tmp; 148 | } 149 | 150 | fn @swap_u32(a: &mut u32, b: &mut u32) -> () { 151 | let tmp = *a; 152 | *a = *b; 153 | *b = tmp; 154 | } 155 | -------------------------------------------------------------------------------- /src/core/cpu_common.impala: -------------------------------------------------------------------------------- 1 | // Misc. --------------------------------------------------------------------------- 2 | 3 | extern "C" { 4 | fn clock_us() -> i64; 5 | } 6 | 7 | static cpu_profiling_enabled = false; 8 | static cpu_profiling_serial = false; 9 | 10 | // Profiles the function given as argument 11 | fn @cpu_profile(counter: &mut i64, body: fn () -> ()) -> () { 12 | if cpu_profiling_enabled { 13 | let start = clock_us(); 14 | body(); 15 | let end = clock_us(); 16 | if cpu_profiling_serial { 17 | *counter += end - start; 18 | } else { 19 | atomic(1u32, counter, end - start, 7u32, ""); 20 | } 21 | } else { 22 | body() 23 | } 24 | } 25 | 26 | // Iterate over the bit that are set in a mask (assumes that mask != 0) 27 | fn cpu_one_bits(mut mask: i32, @body: fn (i32) -> ()) -> () { 28 | let lane = cpu_ctz32(mask, true); 29 | @@body(lane); 30 | mask &= mask - 1; 31 | if mask != 0 { 32 | cpu_one_bits(mask, body, return) 33 | } 34 | } 35 | 36 | // Performs a horizontal reduction over vector lanes 37 | fn @(?n) cpu_reduce(value: f32, n: i32, op: fn (f32, f32) -> f32) -> f32 { 38 | if n >= 2 { 39 | let m = n / 2; 40 | cpu_reduce(op(value, rv_shuffle(value, m)), m, op) 41 | } else { 42 | value 43 | } 44 | } 45 | 46 | // Prefetches a chunk of memory 47 | fn @cpu_prefetch_bytes(ptr: &[u8], bytes: i32) -> () { 48 | for i in unroll_step(0, bytes, 64) { 49 | cpu_prefetch(&ptr(i), 0 /* read */, 3 /* closest locality */, 1 /* data */); 50 | } 51 | } 52 | 53 | // Returns the first vector lane index i for which value[i] == lane 54 | fn @cpu_index_of(value: f32, lane: f32) -> i32 { 55 | cpu_ctz32(rv_ballot(value == lane), true) 56 | } 57 | 58 | // Vectorizes an arbitrary range 59 | fn @vectorized_range(vector_width: i32, a: i32, b: i32, body: fn (i32, i32) -> ()) -> () { 60 | if vector_width == 1 { 61 | for i in range(a, b) { 62 | body(i, 1); 63 | } 64 | } else { 65 | let n_vec = round_down(b - a, vector_width); 66 | for i in range_step(a, a + n_vec, vector_width) { 67 | for j in vectorize(vector_width) { 68 | @@body(i + j, vector_width) 69 | } 70 | } 71 | for i in range(a + n_vec, b) { 72 | @@body(i, 1) 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/core/matrix.impala: -------------------------------------------------------------------------------- 1 | struct Mat2x2 { 2 | col: [Vec2 * 2] 3 | } 4 | 5 | struct Mat3x3 { 6 | col: [Vec3 * 3] 7 | } 8 | 9 | struct Mat3x4 { 10 | col: [Vec3 * 4] 11 | } 12 | 13 | struct Mat4x4 { 14 | col: [Vec4 * 4] 15 | } 16 | 17 | fn @make_mat2x2(c0: Vec2, c1: Vec2) -> Mat2x2 { 18 | Mat2x2 { 19 | col: [c0, c1] 20 | } 21 | } 22 | 23 | fn @make_mat3x3(c0: Vec3, c1: Vec3, c2: Vec3) -> Mat3x3 { 24 | Mat3x3 { 25 | col: [c0, c1, c2] 26 | } 27 | } 28 | 29 | fn @make_orthonormal_mat3x3(n: Vec3) -> Mat3x3 { 30 | let sign = select(n.z >= 0.0f, 1.0f, -1.0f); 31 | let a = -1.0f / (sign + n.z); 32 | let b = n.x * n.y * a; 33 | 34 | let t = make_vec3(1.0f + sign * n.x * n.x * a, sign * b, -sign * n.x); 35 | let bt = make_vec3(b, sign + n.y * n.y * a, -n.y); 36 | Mat3x3 { 37 | col: [t, bt, n] 38 | } 39 | } 40 | 41 | fn @make_mat3x4(c0: Vec3, c1: Vec3, c2: Vec3, c3: Vec3) -> Mat3x4 { 42 | Mat3x4 { 43 | col: [c0, c1, c2, c3] 44 | } 45 | } 46 | 47 | fn @make_mat4x4(c0: Vec4, c1: Vec4, c2: Vec4, c3: Vec4) -> Mat4x4 { 48 | Mat4x4 { 49 | col: [c0, c1, c2, c3] 50 | } 51 | } 52 | 53 | fn @mat2x2_row(m: Mat2x2, i: i32) -> Vec2 { 54 | [make_vec2(m.col(0).x, m.col(1).x), 55 | make_vec2(m.col(0).y, m.col(1).y)](i) 56 | } 57 | 58 | fn @mat3x3_row(m: Mat3x3, i: i32) -> Vec3 { 59 | [make_vec3(m.col(0).x, m.col(1).x, m.col(2).x), 60 | make_vec3(m.col(0).y, m.col(1).y, m.col(2).y), 61 | make_vec3(m.col(0).z, m.col(1).z, m.col(2).z)](i) 62 | } 63 | 64 | fn @mat3x4_row(m: Mat3x4, i: i32) -> Vec4 { 65 | [make_vec4(m.col(0).x, m.col(1).x, m.col(2).x, m.col(3).x), 66 | make_vec4(m.col(0).y, m.col(1).y, m.col(2).y, m.col(3).y), 67 | make_vec4(m.col(0).z, m.col(1).z, m.col(2).z, m.col(3).z)](i) 68 | } 69 | 70 | fn @mat4x4_row(m: Mat4x4, i: i32) -> Vec4 { 71 | [make_vec4(m.col(0).x, m.col(1).x, m.col(2).x, m.col(3).x), 72 | make_vec4(m.col(0).y, m.col(1).y, m.col(2).y, m.col(3).y), 73 | make_vec4(m.col(0).z, m.col(1).z, m.col(2).z, m.col(3).z), 74 | make_vec4(m.col(0).w, m.col(1).w, m.col(2).w, m.col(3).w)](i) 75 | } 76 | 77 | fn @mat2x2_identity() -> Mat2x2 { 78 | make_mat2x2(make_vec2(1.0f, 0.0f), 79 | make_vec2(0.0f, 1.0f)) 80 | } 81 | 82 | fn @mat3x3_identity() -> Mat3x3 { 83 | make_mat3x3(make_vec3(1.0f, 0.0f, 0.0f), 84 | make_vec3(0.0f, 1.0f, 0.0f), 85 | make_vec3(0.0f, 0.0f, 1.0f)) 86 | } 87 | 88 | fn @mat3x4_identity() -> Mat3x4 { 89 | make_mat3x4(make_vec3(1.0f, 0.0f, 0.0f), 90 | make_vec3(0.0f, 1.0f, 0.0f), 91 | make_vec3(0.0f, 0.0f, 1.0f), 92 | make_vec3(0.0f, 0.0f, 0.0f)) 93 | } 94 | 95 | fn @mat4x4_identity() -> Mat4x4 { 96 | make_mat4x4(make_vec4(1.0f, 0.0f, 0.0f, 0.0f), 97 | make_vec4(0.0f, 1.0f, 0.0f, 0.0f), 98 | make_vec4(0.0f, 0.0f, 1.0f, 0.0f), 99 | make_vec4(0.0f, 0.0f, 0.0f, 1.0f)) 100 | } 101 | 102 | fn @mat2x2_mul(m: Mat2x2, v: Vec2) -> Vec2 { 103 | make_vec2(vec2_dot(mat2x2_row(m, 0), v), 104 | vec2_dot(mat2x2_row(m, 1), v)) 105 | } 106 | 107 | fn @mat3x3_mul(m: Mat3x3, v: Vec3) -> Vec3 { 108 | make_vec3(vec3_dot(mat3x3_row(m, 0), v), 109 | vec3_dot(mat3x3_row(m, 1), v), 110 | vec3_dot(mat3x3_row(m, 2), v)) 111 | } 112 | 113 | fn @mat3x4_mul(m: Mat3x4, v: Vec4) -> Vec3 { 114 | make_vec3(vec4_dot(mat3x4_row(m, 0), v), 115 | vec4_dot(mat3x4_row(m, 1), v), 116 | vec4_dot(mat3x4_row(m, 2), v)) 117 | } 118 | 119 | fn @mat4x4_mul(m: Mat4x4, v: Vec4) -> Vec4 { 120 | make_vec4(vec4_dot(mat4x4_row(m, 0), v), 121 | vec4_dot(mat4x4_row(m, 1), v), 122 | vec4_dot(mat4x4_row(m, 2), v), 123 | vec4_dot(mat4x4_row(m, 3), v)) 124 | } 125 | 126 | fn @mat2x2_matmul(a: Mat2x2, b: Mat2x2) -> Mat2x2 { 127 | make_mat2x2(mat2x2_mul(a, b.col(0)), 128 | mat2x2_mul(a, b.col(1))) 129 | } 130 | 131 | fn @mat3x3_matmul(a: Mat3x3, b: Mat3x3) -> Mat3x3 { 132 | make_mat3x3(mat3x3_mul(a, b.col(0)), 133 | mat3x3_mul(a, b.col(1)), 134 | mat3x3_mul(a, b.col(2))) 135 | } 136 | 137 | fn @mat4x4_matmul(a: Mat4x4, b: Mat4x4) -> Mat4x4 { 138 | make_mat4x4(mat4x4_mul(a, b.col(0)), 139 | mat4x4_mul(a, b.col(1)), 140 | mat4x4_mul(a, b.col(2)), 141 | mat4x4_mul(a, b.col(3))) 142 | } 143 | -------------------------------------------------------------------------------- /src/core/random.impala: -------------------------------------------------------------------------------- 1 | // Change these variables to use another random number generator 2 | //type RndState = u64; 3 | //static randi = mwc64x; 4 | type RndState = u32; 5 | static randi = xorshift; 6 | 7 | fn @randf(rnd: &mut RndState) -> f32 { 8 | // Assumes IEEE 754 floating point format 9 | let x = randi(rnd) as u32; 10 | bitcast[f32]((127u32 << 23u32) | (x & 0x7FFFFFu32)) - 1.0f 11 | } 12 | 13 | // MWC64X: http://cas.ee.ic.ac.uk/people/dt10/research/rngs-gpu-mwc64x.html 14 | fn @mwc64x(seed: &mut u64) -> i32 { 15 | let c = *seed >> 32u64; 16 | let x = *seed & 0xFFFFFFFFu64; 17 | *seed = x * 4294883355u64 + c; 18 | (x as i32)^(c as i32) 19 | } 20 | 21 | // 32-bit version of the xorshift random number generator 22 | fn @xorshift(seed: &mut u32) -> i32 { 23 | let mut x = *seed; 24 | x = select(x == 0u32, 1u32, x); 25 | x ^= x << 13u32; 26 | x ^= x >> 17u32; 27 | x ^= x << 5u32; 28 | *seed = x; 29 | x as i32 30 | } 31 | 32 | // Result of sampling a direction 33 | struct DirSample { 34 | dir: Vec3, 35 | pdf: f32 36 | } 37 | 38 | fn @make_dir_sample(math: Intrinsics, c: f32, s: f32, phi: f32, pdf: f32) -> DirSample { 39 | let x = s * math.cosf(phi); 40 | let y = s * math.sinf(phi); 41 | let z = c; 42 | DirSample { 43 | dir: make_vec3(x, y, z), 44 | pdf: pdf 45 | } 46 | } 47 | 48 | // Samples a point uniformly on a triangle 49 | fn @sample_triangle(mut u: f32, mut v: f32, v0: Vec3, v1: Vec3, v2: Vec3) -> Vec3 { 50 | if (u + v > 1.0f) { 51 | u = 1.0f - u; 52 | v = 1.0f - v; 53 | } 54 | vec3_add( 55 | vec3_add(vec3_mulf(v0, 1.0f - v - u), 56 | vec3_mulf(v1, u)), 57 | vec3_mulf(v2, v) 58 | ) 59 | } 60 | 61 | // Probability density function for uniform sphere sampling 62 | fn @uniform_sphere_pdf() -> f32 { 1.0f / (4.0f * flt_pi) } 63 | 64 | // Samples a direction uniformly on a sphere 65 | fn @sample_uniform_sphere(math: Intrinsics, u: f32, v: f32) -> DirSample { 66 | let c = 2.0f * v - 1.0f; 67 | let s = math.sqrtf(1.0f - c * c); 68 | let phi = 2.0f * flt_pi * u; 69 | make_dir_sample(math, c, s, phi, uniform_sphere_pdf()) 70 | } 71 | 72 | // Probability density function for cosine weighted hemisphere sampling 73 | fn @cosine_hemisphere_pdf(c: f32) -> f32 { c * (1.0f / flt_pi) } 74 | 75 | // Samples a direction on a hemisphere proportionally to the cosine with the surface normal 76 | fn @sample_cosine_hemisphere(math: Intrinsics, u: f32, v: f32) -> DirSample { 77 | let c = math.sqrtf(1.0f - v); 78 | let s = math.sqrtf(v); 79 | let phi = 2.0f * flt_pi * u; 80 | make_dir_sample(math, c, s, phi, cosine_hemisphere_pdf(c)) 81 | } 82 | 83 | // Probability density function for cosine-power weighted hemisphere sampling 84 | fn @cosine_power_hemisphere_pdf(math: Intrinsics, c: f32, k: f32) -> f32 { 85 | fastpow/*math.powf*/(c, k) * (k + 1.0f) * (1.0f / (2.0f * flt_pi)) 86 | } 87 | 88 | // Samples a direction on a hemisphere proportionally to the power of the cosine with the surface normal 89 | fn @sample_cosine_power_hemisphere(math: Intrinsics, k: f32, u: f32, v: f32) -> DirSample { 90 | let c = math.fminf(fastpow/*math.powf*/(v, 1.0f / (k + 1.0f)), 1.0f); 91 | let s = math.sqrtf(1.0f - c * c); 92 | let phi = 2.0f * flt_pi * u; 93 | // We have: 94 | // pow(c, k) = pow(pow(v, 1 / (k + 1)), k) 95 | // = pow(v, k / (k + 1)) 96 | // = v * pow(v, -1 / (k + 1)) 97 | // = v / c 98 | let pow_c_k = select(c != 0.0f, v / c, 0.0f); 99 | let pdf = pow_c_k * (k + 1.0f) * (1.0f / (2.0f * flt_pi)); 100 | make_dir_sample(math, c, s, phi, pdf) 101 | } 102 | 103 | // Initializer for Bernstein's hash function 104 | fn @bernstein_init() -> u32 { 5381u32 } 105 | 106 | // Bernstein's hash function 107 | fn @bernstein_hash(mut h: u32, d: u32) -> u32 { 108 | h = (h * 33u32) ^ ( d & 0xFFu32); 109 | h = (h * 33u32) ^ ((d >> 8u32) & 0xFFu32); 110 | h = (h * 33u32) ^ ((d >> 16u32) & 0xFFu32); 111 | h = (h * 33u32) ^ ((d >> 24u32) & 0xFFu32); 112 | h 113 | } 114 | 115 | // Initializer for the FNV hash function 116 | fn @fnv_init() -> u32 { 0x811C9DC5u32 } 117 | 118 | // FNV hash function 119 | fn @fnv_hash(mut h: u32, d: u32) -> u32 { 120 | h = (h * 16777619u32) ^ ( d & 0xFFu32); 121 | h = (h * 16777619u32) ^ ((d >> 8u32) & 0xFFu32); 122 | h = (h * 16777619u32) ^ ((d >> 16u32) & 0xFFu32); 123 | h = (h * 16777619u32) ^ ((d >> 24u32) & 0xFFu32); 124 | h 125 | } 126 | 127 | // Returns the probability to continue given the contribution of a path 128 | fn @russian_roulette(c: Color, clamp: f32) -> f32 { 129 | let prob = 2.0f * color_luminance(c); 130 | if prob > clamp { clamp } else { prob } 131 | } 132 | -------------------------------------------------------------------------------- /src/core/sort.impala: -------------------------------------------------------------------------------- 1 | type SortingNetwork = fn (i32, fn (i32, i32) -> ()) -> (); 2 | 3 | fn @bose_nelson_sort(n: i32, cmp_swap: fn (i32, i32) -> ()) -> () { 4 | fn @(?i & ?len) p_star(i: i32, len: i32) -> () { 5 | if len > 1 { 6 | let m = len / 2; 7 | p_star(i, m); 8 | p_star((i + m), (len - m)); 9 | p_bracket(i, m, (i + m), (len - m)); 10 | } 11 | } 12 | 13 | fn @(?i1 & ?len1 & ?i2 & ?len2) p_bracket(i1: i32, len1: i32, i2: i32, len2: i32) -> () { 14 | if len1 == 1 && len2 == 1 { 15 | cmp_swap(i1, i2); 16 | } else if len1 == 1 && len2 == 2 { 17 | cmp_swap(i1, i2 + 1); 18 | cmp_swap(i1, i2); 19 | } else if len1 == 2 && len2 == 1 { 20 | cmp_swap(i1, i2); 21 | cmp_swap(i1 + 1, i2); 22 | } else { 23 | let a = len1 / 2; 24 | let b = select(len1 % 2 != 0, len2 / 2, (len2 + 1) / 2); 25 | p_bracket(i1, a, i2, b); 26 | p_bracket((i1 + a), (len1 - a), (i2 + b), (len2 - b)); 27 | p_bracket((i1 + a), (len1 - a), i2, b); 28 | } 29 | } 30 | 31 | p_star(0, n) 32 | } 33 | 34 | fn @batcher_sort(n: i32, cmp_swap: fn (i32, i32) -> ()) -> () { 35 | fn @(?i & ?len & ?r) merge(i: i32, len: i32, r: i32) -> () { 36 | let step = r * 2; 37 | if step < len { 38 | merge(i, len, step); 39 | merge(i + r, len, step); 40 | for j in unroll_step(i + r, i + len - r, step) @{ 41 | // Remove comparators for non-existing elements 42 | if j < n && j + r < n { 43 | cmp_swap(j, j + r) 44 | } 45 | } 46 | } else { 47 | // idem 48 | if i < n && i + r < n { 49 | cmp_swap(i, i + r); 50 | } 51 | } 52 | } 53 | 54 | fn @(?i & ?len) sort(i: i32, len: i32) -> () { 55 | if len > 1 { 56 | let m = len / 2; 57 | sort(i, m); 58 | sort(i + m, m); 59 | merge(i, len, 1) 60 | } 61 | } 62 | 63 | // Compute closest power of two 64 | let p = 1 << ilog2(n); 65 | sort(0, p) 66 | } 67 | 68 | fn @bitonic_sort(n: i32, cmp_swap: fn (i32, i32) -> ()) -> () { 69 | fn @(?i & ?len) merge(i: i32, len: i32, dir: bool) -> () { 70 | if len > 1 { 71 | // Compute greatest power of two lower than len 72 | let m = 1 << (ilog2(len) - 1); 73 | 74 | for j in unroll(i, i + len - m) @{ 75 | cmp_swap(select(dir, j, j + m), select(dir, j + m, j)); 76 | } 77 | 78 | merge(i, m, dir); 79 | merge(i + m, len - m, dir); 80 | } 81 | } 82 | 83 | fn @(?i & ?len) sort(i: i32, len: i32, dir: bool) -> () { 84 | if len > 1 { 85 | let m = len / 2; 86 | sort(i, m, !dir); 87 | sort(i + m, len - m, dir); 88 | merge(i, len, dir); 89 | } 90 | } 91 | 92 | sort(0, n, true) 93 | } 94 | -------------------------------------------------------------------------------- /src/core/vector.impala: -------------------------------------------------------------------------------- 1 | struct Vec2 { 2 | x: f32, 3 | y: f32 4 | } 5 | 6 | struct Vec3 { 7 | x: f32, 8 | y: f32, 9 | z: f32 10 | } 11 | 12 | struct Vec4 { 13 | x: f32, 14 | y: f32, 15 | z: f32, 16 | w: f32 17 | } 18 | 19 | fn @make_vec2(x: f32, y: f32) -> Vec2 { Vec2 { x: x, y: y } } 20 | fn @make_vec3(x: f32, y: f32, z: f32) -> Vec3 { Vec3 { x: x, y: y, z: z } } 21 | fn @make_vec4(x: f32, y: f32, z: f32, w: f32) -> Vec4 { Vec4 { x: x, y: y, z: z, w: w } } 22 | 23 | fn @vec2_to_3(v: Vec2, z: f32) -> Vec3 { make_vec3(v.x, v.y, z) } 24 | fn @vec2_to_4(v: Vec2, z: f32, w: f32) -> Vec4 { make_vec4(v.x, v.y, z, w) } 25 | fn @vec3_to_2(v: Vec3) -> Vec2 { make_vec2(v.x, v.y) } 26 | fn @vec3_to_4(v: Vec3, w: f32) -> Vec4 { make_vec4(v.x, v.y, v.z, w) } 27 | fn @vec4_to_3(v: Vec4) -> Vec3 { make_vec3(v.x, v.y, v.z) } 28 | fn @vec4_to_2(v: Vec4) -> Vec2 { make_vec2(v.x, v.y) } 29 | 30 | fn @vec2_map(v: Vec2, f: fn (f32) -> f32) -> Vec2 { make_vec2(@@f(v.x), @@f(v.y)) } 31 | fn @vec3_map(v: Vec3, f: fn (f32) -> f32) -> Vec3 { make_vec3(@@f(v.x), @@f(v.y), @@f(v.z)) } 32 | fn @vec4_map(v: Vec4, f: fn (f32) -> f32) -> Vec4 { make_vec4(@@f(v.x), @@f(v.y), @@f(v.z), @@f(v.w)) } 33 | 34 | fn @vec2_zip(a: Vec2, b: Vec2, f: fn (f32, f32) -> f32) -> Vec2 { make_vec2(@@f(a.x, b.x), @@f(a.y, b.y)) } 35 | fn @vec3_zip(a: Vec3, b: Vec3, f: fn (f32, f32) -> f32) -> Vec3 { make_vec3(@@f(a.x, b.x), @@f(a.y, b.y), @@f(a.z, b.z)) } 36 | fn @vec4_zip(a: Vec4, b: Vec4, f: fn (f32, f32) -> f32) -> Vec4 { make_vec4(@@f(a.x, b.x), @@f(a.y, b.y), @@f(a.z, b.z), @@f(a.w, b.w)) } 37 | 38 | fn @vec2_add(a: Vec2, b: Vec2) -> Vec2 { vec2_zip(a, b, |x, y| x + y) } 39 | fn @vec3_add(a: Vec3, b: Vec3) -> Vec3 { vec3_zip(a, b, |x, y| x + y) } 40 | fn @vec4_add(a: Vec4, b: Vec4) -> Vec4 { vec4_zip(a, b, |x, y| x + y) } 41 | fn @vec2_sub(a: Vec2, b: Vec2) -> Vec2 { vec2_zip(a, b, |x, y| x - y) } 42 | fn @vec3_sub(a: Vec3, b: Vec3) -> Vec3 { vec3_zip(a, b, |x, y| x - y) } 43 | fn @vec4_sub(a: Vec4, b: Vec4) -> Vec4 { vec4_zip(a, b, |x, y| x - y) } 44 | fn @vec2_mul(a: Vec2, b: Vec2) -> Vec2 { vec2_zip(a, b, |x, y| x * y) } 45 | fn @vec3_mul(a: Vec3, b: Vec3) -> Vec3 { vec3_zip(a, b, |x, y| x * y) } 46 | fn @vec4_mul(a: Vec4, b: Vec4) -> Vec4 { vec4_zip(a, b, |x, y| x * y) } 47 | fn @vec2_div(a: Vec2, b: Vec2) -> Vec2 { vec2_zip(a, b, |x, y| x / y) } 48 | fn @vec3_div(a: Vec3, b: Vec3) -> Vec3 { vec3_zip(a, b, |x, y| x / y) } 49 | fn @vec4_div(a: Vec4, b: Vec4) -> Vec4 { vec4_zip(a, b, |x, y| x / y) } 50 | 51 | fn @vec2_neg(v: Vec2) -> Vec2 { vec2_map(v, |x| -x) } 52 | fn @vec3_neg(v: Vec3) -> Vec3 { vec3_map(v, |x| -x) } 53 | fn @vec4_neg(v: Vec4) -> Vec4 { vec4_map(v, |x| -x) } 54 | 55 | fn @vec2_mulf(v: Vec2, t: f32) -> Vec2 { vec2_mul(v, make_vec2(t, t)) } 56 | fn @vec3_mulf(v: Vec3, t: f32) -> Vec3 { vec3_mul(v, make_vec3(t, t, t)) } 57 | fn @vec4_mulf(v: Vec4, t: f32) -> Vec4 { vec4_mul(v, make_vec4(t, t, t, t)) } 58 | 59 | fn @vec2_dot(a: Vec2, b: Vec2) -> f32 { a.x * b.x + a.y * b.y } 60 | fn @vec3_dot(a: Vec3, b: Vec3) -> f32 { a.x * b.x + a.y * b.y + a.z * b.z } 61 | fn @vec4_dot(a: Vec4, b: Vec4) -> f32 { a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w } 62 | 63 | fn @vec3_cross(a: Vec3, b: Vec3) -> Vec3 { 64 | make_vec3(a.y * b.z - a.z * b.y, 65 | a.z * b.x - a.x * b.z, 66 | a.x * b.y - a.y * b.x) 67 | } 68 | 69 | fn @vec2_len2(v: Vec2) -> f32 { vec2_dot(v, v) } 70 | fn @vec3_len2(v: Vec3) -> f32 { vec3_dot(v, v) } 71 | fn @vec4_len2(v: Vec4) -> f32 { vec4_dot(v, v) } 72 | 73 | fn @vec2_reflect(v: Vec2, n: Vec2) -> Vec2 { vec2_sub(vec2_mulf(n, 2.0f * vec2_dot(n, v)), v) } 74 | fn @vec3_reflect(v: Vec3, n: Vec3) -> Vec3 { vec3_sub(vec3_mulf(n, 2.0f * vec3_dot(n, v)), v) } 75 | fn @vec4_reflect(v: Vec4, n: Vec4) -> Vec4 { vec4_sub(vec4_mulf(n, 2.0f * vec4_dot(n, v)), v) } 76 | 77 | fn @vec2_len(math: Intrinsics, v: Vec2) -> f32 { math.sqrtf(vec2_len2(v)) } 78 | fn @vec3_len(math: Intrinsics, v: Vec3) -> f32 { math.sqrtf(vec3_len2(v)) } 79 | fn @vec4_len(math: Intrinsics, v: Vec4) -> f32 { math.sqrtf(vec4_len2(v)) } 80 | 81 | fn @vec2_normalize(math: Intrinsics, v: Vec2) -> Vec2 { vec2_mulf(v, 1.0f / vec2_len(math, v)) } 82 | fn @vec3_normalize(math: Intrinsics, v: Vec3) -> Vec3 { vec3_mulf(v, 1.0f / vec3_len(math, v)) } 83 | fn @vec4_normalize(math: Intrinsics, v: Vec4) -> Vec4 { vec4_mulf(v, 1.0f / vec4_len(math, v)) } 84 | 85 | fn @vec2_lerp(a: Vec2, b: Vec2, k: f32) -> Vec2 { vec2_zip(a, b, |x, y| lerp(x, y, k)) } 86 | fn @vec3_lerp(a: Vec3, b: Vec3, k: f32) -> Vec3 { vec3_zip(a, b, |x, y| lerp(x, y, k)) } 87 | fn @vec4_lerp(a: Vec4, b: Vec4, k: f32) -> Vec4 { vec4_zip(a, b, |x, y| lerp(x, y, k)) } 88 | 89 | fn @vec2_lerp2(a: Vec2, b: Vec2, c: Vec2, u: f32, v: f32) -> Vec2 { 90 | Vec2 { 91 | x: lerp2(a.x, b.x, c.x, u, v), 92 | y: lerp2(a.y, b.y, c.y, u, v) 93 | } 94 | } 95 | fn @vec3_lerp2(a: Vec3, b: Vec3, c: Vec3, u: f32, v: f32) -> Vec3 { 96 | Vec3 { 97 | x: lerp2(a.x, b.x, c.x, u, v), 98 | y: lerp2(a.y, b.y, c.y, u, v), 99 | z: lerp2(a.z, b.z, c.z, u, v) 100 | } 101 | } 102 | fn @vec4_lerp2(a: Vec4, b: Vec4, c: Vec4, u: f32, v: f32) -> Vec4 { 103 | Vec4 { 104 | x: lerp2(a.x, b.x, c.x, u, v), 105 | y: lerp2(a.y, b.y, c.y, u, v), 106 | z: lerp2(a.z, b.z, c.z, u, v), 107 | w: lerp2(a.w, b.w, c.w, u, v) 108 | } 109 | } 110 | 111 | fn @vec2_min(a: Vec2, b: Vec2) -> Vec2 { vec2_zip(a, b, |x, y| select(x < y, x, y)) } 112 | fn @vec3_min(a: Vec3, b: Vec3) -> Vec3 { vec3_zip(a, b, |x, y| select(x < y, x, y)) } 113 | fn @vec4_min(a: Vec4, b: Vec4) -> Vec4 { vec4_zip(a, b, |x, y| select(x < y, x, y)) } 114 | 115 | fn @vec2_max(a: Vec2, b: Vec2) -> Vec2 { vec2_zip(a, b, |x, y| select(x > y, x, y)) } 116 | fn @vec3_max(a: Vec3, b: Vec3) -> Vec3 { vec3_zip(a, b, |x, y| select(x > y, x, y)) } 117 | fn @vec4_max(a: Vec4, b: Vec4) -> Vec4 { vec4_zip(a, b, |x, y| select(x > y, x, y)) } 118 | -------------------------------------------------------------------------------- /src/driver/bbox.h: -------------------------------------------------------------------------------- 1 | #ifndef BBOX_H 2 | #define BBOX_H 3 | 4 | #include 5 | #include 6 | #include "float3.h" 7 | 8 | /// Bounding box represented by its two extreme points. 9 | struct BBox { 10 | float3 min, max; 11 | 12 | BBox() {} 13 | BBox(const float3& f) : min(f), max(f) {} 14 | BBox(const float3& min, const float3& max) : min(min), max(max) {} 15 | 16 | BBox& extend(const BBox& bb) { 17 | min = ::min(min, bb.min); 18 | max = ::max(max, bb.max); 19 | return *this; 20 | } 21 | 22 | BBox& extend(const float3& v) { 23 | min = ::min(min, v); 24 | max = ::max(max, v); 25 | return *this; 26 | } 27 | 28 | float half_area() const { 29 | const float3 len = max - min; 30 | const float kx = std::max(len.x, 0.0f); 31 | const float ky = std::max(len.y, 0.0f); 32 | const float kz = std::max(len.z, 0.0f); 33 | return kx * (ky + kz) + ky * kz; 34 | } 35 | 36 | BBox& overlap(const BBox& bb) { 37 | min = ::max(min, bb.min); 38 | max = ::min(max, bb.max); 39 | return *this; 40 | } 41 | 42 | bool is_empty() const { 43 | return min.x > max.x || 44 | min.y > max.y || 45 | min.z > max.z; 46 | } 47 | 48 | 49 | bool is_inside(const float3& v) const { 50 | return v.x >= min.x && v.y >= min.y && v.z >= min.z && 51 | v.x <= max.x && v.y <= max.y && v.z <= max.z; 52 | } 53 | 54 | bool is_overlapping(const BBox& bb) const { 55 | return min.x <= bb.max.x && max.x >= bb.min.x && 56 | min.y <= bb.max.y && max.y >= bb.min.y && 57 | min.z <= bb.max.z && max.z >= bb.min.z; 58 | } 59 | 60 | static BBox empty() { return BBox(float3(FLT_MAX), float3(-FLT_MAX)); } 61 | static BBox full() { return BBox(float3(-FLT_MAX), float3(FLT_MAX)); } 62 | }; 63 | 64 | #endif // BBOX_H 65 | -------------------------------------------------------------------------------- /src/driver/buffer.h: -------------------------------------------------------------------------------- 1 | #ifndef BUFFER_H 2 | #define BUFFER_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | static void skip_buffer(std::istream& is) { 11 | size_t in_size = 0, out_size = 0; 12 | is.read((char*)&in_size, sizeof(uint32_t)); 13 | is.read((char*)&out_size, sizeof(uint32_t)); 14 | is.seekg(out_size, std::ios::cur); 15 | } 16 | 17 | template 18 | static void decompress(const std::vector& in, Array& out) { 19 | LZ4_decompress_safe(in.data(), (char*)out.data(), in.size(), out.size() * sizeof(out[0])); 20 | } 21 | 22 | template 23 | static void read_buffer(std::istream& is, Array& array) { 24 | size_t in_size = 0, out_size = 0; 25 | is.read((char*)&in_size, sizeof(uint32_t)); 26 | is.read((char*)&out_size, sizeof(uint32_t)); 27 | std::vector in(out_size); 28 | is.read(in.data(), in.size()); 29 | array = std::move(Array(in_size / sizeof(array[0]))); 30 | decompress(in, array); 31 | } 32 | 33 | template 34 | static void read_buffer(const std::string& file_name, Array& array) { 35 | std::ifstream is(file_name, std::ios::binary); 36 | read_buffer(is, array); 37 | } 38 | 39 | template 40 | static void compress(const Array& in, std::vector& out) { 41 | size_t in_size = sizeof(in[0]) * in.size(); 42 | out.resize(LZ4_compressBound(in_size)); 43 | out.resize(LZ4_compress_default((const char*)in.data(), out.data(), in_size, out.size())); 44 | } 45 | 46 | template 47 | static void write_buffer(std::ostream& os, const Array& array) { 48 | std::vector out; 49 | compress(array, out); 50 | size_t in_size = sizeof(array[0]) * array.size(); 51 | size_t out_size = out.size(); 52 | os.write((char*)&in_size, sizeof(uint32_t)); 53 | os.write((char*)&out_size, sizeof(uint32_t)); 54 | os.write(out.data(), out.size()); 55 | } 56 | 57 | template 58 | static void write_buffer(const std::string& file_name, const Array& array) { 59 | std::ofstream of(file_name, std::ios::binary); 60 | write_buffer(of, array); 61 | } 62 | 63 | #endif // BUFFER_H 64 | -------------------------------------------------------------------------------- /src/driver/color.h: -------------------------------------------------------------------------------- 1 | #ifndef COLOR_H 2 | #define COLOR_H 3 | 4 | #include "float3.h" 5 | #include "float4.h" 6 | 7 | struct rgba; 8 | 9 | struct rgb : public float3 { 10 | rgb() {} 11 | rgb(const float3& rgb) : float3(rgb) {} 12 | rgb(float r, float g, float b) : float3(r, g, b) {} 13 | explicit rgb(float x) : float3(x) {} 14 | explicit rgb(const rgba& rgba); 15 | 16 | rgb& operator += (const rgb& p) { 17 | *this = *this + p; 18 | return *this; 19 | } 20 | }; 21 | 22 | struct rgba : public float4 { 23 | rgba() {} 24 | rgba(const float4& rgba) : float4(rgba) {} 25 | rgba(float r, float g, float b, float a) : float4(r, g, b, a) {} 26 | explicit rgba(float x) : float4(x) {} 27 | explicit rgba(const rgb& rgb, float a) : float4(rgb, a) {} 28 | 29 | rgba& operator += (const rgba& p) { 30 | *this = *this + p; 31 | return *this; 32 | } 33 | }; 34 | 35 | inline rgb::rgb(const rgba& rgba) : float3(rgba) {} 36 | 37 | inline rgb gamma(const rgb& c, float g = 0.5f) { 38 | return rgb(std::pow(c.x, g), std::pow(c.y, g), std::pow(c.z, g)); 39 | } 40 | 41 | inline rgba gamma(const rgba& c, float g = 0.5f) { 42 | return rgba(std::pow(c.x, g), std::pow(c.y, g), std::pow(c.z, g), c.w); 43 | } 44 | 45 | inline rgb clamp(const rgb& val, const rgb& min, const rgb& max) { 46 | return rgb(clamp(val.x, min.x, max.x), 47 | clamp(val.y, min.y, max.y), 48 | clamp(val.z, min.z, max.z)); 49 | } 50 | 51 | inline rgba clamp(const rgba& val, const rgba& min, const rgba& max) { 52 | return rgba(clamp(val.x, min.x, max.x), 53 | clamp(val.y, min.y, max.y), 54 | clamp(val.z, min.z, max.z), 55 | clamp(val.w, min.w, max.w)); 56 | } 57 | 58 | #endif // COLOR_H 59 | -------------------------------------------------------------------------------- /src/driver/common.h: -------------------------------------------------------------------------------- 1 | #ifndef COMMON_H 2 | #define COMMON_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | // Round to the integer above the division. 10 | inline uint32_t round_up(uint32_t val, uint32_t div) { 11 | auto mod = val % div; 12 | return val + (mod ? div - mod : 0); 13 | } 14 | 15 | /// Clamps a between b and c. 16 | template 17 | inline T clamp(T a, T b, T c) { 18 | return (a < b) ? b : ((a > c) ? c : a); 19 | } 20 | 21 | /// Returns the integer that is greater or equal to the logarithm base 2 of the argument. 22 | template 23 | inline T closest_log2(T i) { 24 | T p = 1, q = 0; 25 | while (i > p) p <<= 1, q++; 26 | return q; 27 | } 28 | 29 | /// Reinterprets a floating point number as an integer. 30 | inline int32_t float_as_int(float f) { 31 | union { float vf; int32_t vi; } v; 32 | v.vf = f; 33 | return v.vi; 34 | } 35 | 36 | /// Reinterprets an integer as a floating point number. 37 | inline float int_as_float(int32_t i) { 38 | union { float vf; int32_t vi; } v; 39 | v.vi = i; 40 | return v.vf; 41 | } 42 | 43 | inline void error [[noreturn]] () { 44 | std::cerr << std::endl; 45 | abort(); 46 | } 47 | 48 | /// Outputs an error message in the console. 49 | template 50 | inline void error [[noreturn]] (T t, Args... args) { 51 | #if COLORIZE 52 | std::cerr << "\033[1;31m"; 53 | #endif 54 | std::cerr << t; 55 | #if COLORIZE 56 | std::cerr << "\033[0m"; 57 | #endif 58 | error(args...); 59 | } 60 | 61 | inline void info() { 62 | std::cout << std::endl; 63 | } 64 | 65 | /// Outputs an information message in the console. 66 | template 67 | inline void info(T t, Args... args) { 68 | std::cout << t; 69 | info(args...); 70 | } 71 | 72 | inline void warn() { 73 | std::clog << std::endl; 74 | } 75 | 76 | /// Outputs an warning message in the console. 77 | template 78 | inline void warn(T t, Args... args) { 79 | #if COLORIZE 80 | std::clog << "\033[1;33m"; 81 | #endif 82 | std::clog << t; 83 | #if COLORIZE 84 | std::clog << "\033[0m"; 85 | #endif 86 | warn(args...); 87 | } 88 | 89 | #endif // COMMON_H 90 | -------------------------------------------------------------------------------- /src/driver/file_path.h: -------------------------------------------------------------------------------- 1 | #ifndef FILE_PATH_H 2 | #define FILE_PATH_H 3 | 4 | #include 5 | #include 6 | 7 | /// Represents a path in the file system. 8 | class FilePath { 9 | public: 10 | FilePath(const std::string& path) 11 | : path_(path) 12 | { 13 | std::replace(path_.begin(), path_.end(), '\\', '/'); 14 | auto pos = path_.rfind('/'); 15 | base_ = (pos != std::string::npos) ? path_.substr(0, pos) : "."; 16 | file_ = (pos != std::string::npos) ? path_.substr(pos + 1) : path_; 17 | } 18 | 19 | const std::string& path() const { return path_; } 20 | const std::string& base_name() const { return base_; } 21 | const std::string& file_name() const { return file_; } 22 | 23 | std::string extension() const { 24 | auto pos = file_.rfind('.'); 25 | return (pos != std::string::npos) ? file_.substr(pos + 1) : std::string(); 26 | } 27 | 28 | std::string remove_extension() const { 29 | auto pos = file_.rfind('.'); 30 | return (pos != std::string::npos) ? file_.substr(0, pos) : file_; 31 | } 32 | 33 | operator const std::string& () const { 34 | return path(); 35 | } 36 | 37 | private: 38 | std::string path_; 39 | std::string base_; 40 | std::string file_; 41 | }; 42 | 43 | #endif // FILE_PATH_H 44 | -------------------------------------------------------------------------------- /src/driver/float2.h: -------------------------------------------------------------------------------- 1 | #ifndef FLOAT2_H 2 | #define FLOAT2_H 3 | 4 | #include 5 | #include "common.h" 6 | 7 | struct float3; 8 | struct float4; 9 | 10 | struct float2 { 11 | union { 12 | struct { float x, y; }; 13 | float values[2]; 14 | }; 15 | 16 | float2() {} 17 | explicit float2(float x) : x(x), y(x) {} 18 | explicit float2(const float3& xy); 19 | explicit float2(const float4& xy); 20 | float2(float x, float y) : x(x), y(y) {} 21 | 22 | bool operator == (const float2& other) const { 23 | return x == other.x && y == other.y; 24 | } 25 | 26 | bool operator != (const float2& other) const { 27 | return x != other.x || y != other.y; 28 | } 29 | 30 | float operator [] (size_t i) const { return values[i]; } 31 | float& operator [] (size_t i) { return values[i]; } 32 | 33 | float2& operator += (const float2& a) { 34 | x += a.x; y += a.y; 35 | return *this; 36 | } 37 | 38 | float2& operator -= (const float2& a) { 39 | x -= a.x; y -= a.y; 40 | return *this; 41 | } 42 | 43 | float2& operator *= (float a) { 44 | x *= a; y *= a; 45 | return *this; 46 | } 47 | 48 | float2& operator *= (const float2& a) { 49 | x *= a.x; y *= a.y; 50 | return *this; 51 | } 52 | }; 53 | 54 | inline float2 operator * (float a, const float2& b) { 55 | return float2(a * b.x, a * b.y); 56 | } 57 | 58 | inline float2 operator * (const float2& a, float b) { 59 | return float2(a.x * b, a.y * b); 60 | } 61 | 62 | inline float2 operator / (const float2& a, float b) { 63 | return a * (1.0f / b); 64 | } 65 | 66 | inline float2 operator - (const float2& a, const float2& b) { 67 | return float2(a.x - b.x, a.y - b.y); 68 | } 69 | 70 | inline float2 operator + (const float2& a, const float2& b) { 71 | return float2(a.x + b.x, a.y + b.y); 72 | } 73 | 74 | inline float2 operator * (const float2& a, const float2& b) { 75 | return float2(a.x * b.x, a.y * b.y); 76 | } 77 | 78 | inline float2 min(const float2& a, const float2& b) { 79 | return float2(a.x < b.x ? a.x : b.x, 80 | a.y < b.y ? a.y : b.y); 81 | } 82 | 83 | inline float2 max(const float2& a, const float2& b) { 84 | return float2(a.x > b.x ? a.x : b.x, 85 | a.y > b.y ? a.y : b.y); 86 | } 87 | 88 | inline float dot(const float2& a, const float2& b) { 89 | return a.x * b.x + a.y * b.y; 90 | } 91 | 92 | inline float lensqr(const float2& a) { 93 | return dot(a, a); 94 | } 95 | 96 | inline float length(const float2& a) { 97 | return std::sqrt(dot(a, a)); 98 | } 99 | 100 | inline float2 normalize(const float2& a) { 101 | return a * (1.0f / length(a)); 102 | } 103 | 104 | #endif // FLOAT2_H 105 | -------------------------------------------------------------------------------- /src/driver/float3.h: -------------------------------------------------------------------------------- 1 | #ifndef FLOAT3_H 2 | #define FLOAT3_H 3 | 4 | #include 5 | #include "common.h" 6 | #include "float2.h" 7 | 8 | struct float4; 9 | 10 | struct float3 { 11 | union { 12 | struct { float x, y, z; }; 13 | float values[3]; 14 | }; 15 | 16 | float3() {} 17 | explicit float3(float x) : x(x), y(x), z(x) {} 18 | explicit float3(const float4& xyz); 19 | float3(float x, float y, float z) : x(x), y(y), z(z) {} 20 | float3(const float2& xy, float z) : x(xy.x), y(xy.y), z(z) {} 21 | float3(float x, const float2& yz) : x(x), y(yz.x), z(yz.y) {} 22 | 23 | bool operator == (const float3& other) const { 24 | return x == other.x && y == other.y && z == other.z; 25 | } 26 | 27 | bool operator != (const float3& other) const { 28 | return x != other.x || y != other.y || z != other.z; 29 | } 30 | 31 | float operator [] (size_t i) const { return values[i]; } 32 | float& operator [] (size_t i) { return values[i]; } 33 | 34 | float3& operator += (const float3& a) { 35 | x += a.x; y += a.y; z += a.z; 36 | return *this; 37 | } 38 | 39 | float3& operator -= (const float3& a) { 40 | x -= a.x; y -= a.y; z -= a.z; 41 | return *this; 42 | } 43 | 44 | float3& operator *= (float a) { 45 | x *= a; y *= a; z *= a; 46 | return *this; 47 | } 48 | 49 | float3& operator *= (const float3& a) { 50 | x *= a.x; y *= a.y; z *= a.z; 51 | return *this; 52 | } 53 | }; 54 | 55 | inline float2::float2(const float3& xy) 56 | : x(xy.x), y(xy.y) 57 | {} 58 | 59 | inline float3 operator * (float a, const float3& b) { 60 | return float3(a * b.x, a * b.y, a * b.z); 61 | } 62 | 63 | inline float3 operator * (const float3& a, float b) { 64 | return float3(a.x * b, a.y * b, a.z * b); 65 | } 66 | 67 | inline float3 operator / (const float3& a, float b) { 68 | return a * (1.0f / b); 69 | } 70 | 71 | inline float3 operator - (const float3& a, const float3& b) { 72 | return float3(a.x - b.x, a.y - b.y, a.z - b.z); 73 | } 74 | 75 | inline float3 operator - (const float3& a) { 76 | return float3(-a.x, -a.y, -a.z); 77 | } 78 | 79 | inline float3 operator + (const float3& a, const float3& b) { 80 | return float3(a.x + b.x, a.y + b.y, a.z + b.z); 81 | } 82 | 83 | inline float3 operator * (const float3& a, const float3& b) { 84 | return float3(a.x * b.x, a.y * b.y, a.z * b.z); 85 | } 86 | 87 | inline float3 operator / (const float3& a, const float3& b) { 88 | return float3(a.x / b.x, a.y / b.y, a.z / b.z); 89 | } 90 | 91 | inline float3 cross(const float3& a, const float3& b) { 92 | return float3(a.y * b.z - a.z * b.y, 93 | a.z * b.x - a.x * b.z, 94 | a.x * b.y - a.y * b.x); 95 | } 96 | 97 | inline float3 rotate(const float3& v, const float3& axis, float angle) { 98 | float q[4]; 99 | q[0] = axis.x * sinf(angle / 2); 100 | q[1] = axis.y * sinf(angle / 2); 101 | q[2] = axis.z * sinf(angle / 2); 102 | q[3] = std::cos(angle / 2); 103 | 104 | float p[4]; 105 | p[0] = q[3] * v.x + q[1] * v.z - q[2] * v.y; 106 | p[1] = q[3] * v.y - q[0] * v.z + q[2] * v.x; 107 | p[2] = q[3] * v.z + q[0] * v.y - q[1] * v.x; 108 | p[3] = -(q[0] * v.x + q[1] * v.y + q[2] * v.z); 109 | 110 | return float3(p[3] * -q[0] + p[0] * q[3] + p[1] * -q[2] - p[2] * -q[1], 111 | p[3] * -q[1] - p[0] * -q[2] + p[1] * q[3] + p[2] * -q[0], 112 | p[3] * -q[2] + p[0] * -q[1] - p[1] * -q[0] + p[2] * q[3]); 113 | } 114 | 115 | inline float3 min(const float3& a, const float3& b) { 116 | return float3(a.x < b.x ? a.x : b.x, 117 | a.y < b.y ? a.y : b.y, 118 | a.z < b.z ? a.z : b.z); 119 | } 120 | 121 | inline float3 max(const float3& a, const float3& b) { 122 | return float3(a.x > b.x ? a.x : b.x, 123 | a.y > b.y ? a.y : b.y, 124 | a.z > b.z ? a.z : b.z); 125 | } 126 | 127 | inline float dot(const float3& a, const float3& b) { 128 | return a.x * b.x + a.y * b.y + a.z * b.z; 129 | } 130 | 131 | inline float lensqr(const float3& a) { 132 | return dot(a, a); 133 | } 134 | 135 | inline float length(const float3& a) { 136 | return std::sqrt(dot(a, a)); 137 | } 138 | 139 | inline float3 normalize(const float3& a) { 140 | return a * (1.0f / length(a)); 141 | } 142 | 143 | #endif // FLOAT3_H 144 | -------------------------------------------------------------------------------- /src/driver/float4.h: -------------------------------------------------------------------------------- 1 | #ifndef FLOAT4_H 2 | #define FLOAT4_H 3 | 4 | #include 5 | #include "common.h" 6 | #include "float2.h" 7 | #include "float3.h" 8 | 9 | struct float4 { 10 | union { 11 | struct { float x, y, z, w; }; 12 | float values[4]; 13 | }; 14 | 15 | float4() {} 16 | explicit float4(float x) : x(x), y(x), z(x), w(x) {} 17 | float4(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {} 18 | float4(const float3& xyz, float w) : x(xyz.x), y(xyz.y), z(xyz.z), w(w) {} 19 | float4(float x, const float3& yzw) : x(x), y(yzw.x), z(yzw.y), w(yzw.z) {} 20 | float4(const float2& xy, float z, float w) : x(xy.x), y(xy.y), z(z), w(w) {} 21 | float4(float x, const float2& yz, float w) : x(x), y(yz.x), z(yz.y), w(w) {} 22 | float4(float x, float y, const float2& zw) : x(x), y(y), z(zw.x), w(zw.y) {} 23 | float4(const float2& xy, const float2& zw) : x(xy.x), y(xy.y), z(zw.x), w(zw.y) {} 24 | 25 | bool operator == (const float4& other) const { 26 | return x == other.x && y == other.y && z == other.z && w != other.w; 27 | } 28 | 29 | bool operator != (const float4& other) const { 30 | return x != other.x || y != other.y || z != other.z || w != other.w; 31 | } 32 | 33 | float operator [] (size_t i) const { return values[i]; } 34 | float& operator [] (size_t i) { return values[i]; } 35 | 36 | float4& operator += (const float4& a) { 37 | x += a.x; y += a.y; z += a.z; w += a.w; 38 | return *this; 39 | } 40 | 41 | float4& operator -= (const float4& a) { 42 | x -= a.x; y -= a.y; z -= a.z; w -= a.w; 43 | return *this; 44 | } 45 | 46 | float4& operator *= (float a) { 47 | x *= a; y *= a; z *= a; w *= a; 48 | return *this; 49 | } 50 | 51 | float4& operator *= (const float4& a) { 52 | x *= a.x; y *= a.y; z *= a.z; w *= a.w; 53 | return *this; 54 | } 55 | }; 56 | 57 | inline float2::float2(const float4& xy) 58 | : x(xy.x), y(xy.y) 59 | {} 60 | 61 | inline float3::float3(const float4& xyz) 62 | : x(xyz.x), y(xyz.y), z(xyz.z) 63 | {} 64 | 65 | inline float4 operator * (float a, const float4& b) { 66 | return float4(a * b.x, a * b.y, a * b.z, a * b.w); 67 | } 68 | 69 | inline float4 operator * (const float4& a, float b) { 70 | return float4(a.x * b, a.y * b, a.z * b, a.w * b); 71 | } 72 | 73 | inline float4 operator / (const float4& a, float b) { 74 | return a * (1.0f / b); 75 | } 76 | 77 | inline float4 operator - (const float4& a, const float4& b) { 78 | return float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); 79 | } 80 | 81 | inline float4 operator - (const float4& a) { 82 | return float4(-a.x, -a.y, -a.z, -a.w); 83 | } 84 | 85 | inline float4 operator + (const float4& a, const float4& b) { 86 | return float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); 87 | } 88 | 89 | inline float4 operator * (const float4& a, const float4& b) { 90 | return float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); 91 | } 92 | 93 | inline float4 abs(const float4& a) { 94 | return float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w)); 95 | } 96 | 97 | inline float4 min(const float4& a, const float4& b) { 98 | return float4(a.x < b.x ? a.x : b.x, 99 | a.y < b.y ? a.y : b.y, 100 | a.z < b.z ? a.z : b.z, 101 | a.w < b.w ? a.w : b.w); 102 | } 103 | 104 | inline float4 max(const float4& a, const float4& b) { 105 | return float4(a.x > b.x ? a.x : b.x, 106 | a.y > b.y ? a.y : b.y, 107 | a.z > b.z ? a.z : b.z, 108 | a.w > b.w ? a.w : b.w); 109 | } 110 | 111 | inline float dot(const float4& a, const float4& b) { 112 | return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; 113 | } 114 | 115 | inline float lensqr(const float4& a) { 116 | return dot(a, a); 117 | } 118 | 119 | inline float length(const float4& a) { 120 | return std::sqrt(dot(a, a)); 121 | } 122 | 123 | inline float4 normalize(const float4& a) { 124 | return a * (1.0f / length(a)); 125 | } 126 | 127 | inline float4 clamp(const float4& val, const float4& min, const float4& max) { 128 | return float4(clamp(val.x, min.x, max.x), 129 | clamp(val.y, min.y, max.y), 130 | clamp(val.z, min.z, max.z), 131 | clamp(val.w, min.w, max.w)); 132 | } 133 | 134 | #endif // FLOAT4_H 135 | -------------------------------------------------------------------------------- /src/driver/image.h: -------------------------------------------------------------------------------- 1 | #ifndef IMAGE_H 2 | #define IMAGE_H 3 | 4 | #include "file_path.h" 5 | 6 | struct ImageRgba32 { 7 | std::unique_ptr pixels; 8 | size_t width, height; 9 | }; 10 | 11 | bool load_png(const FilePath&, ImageRgba32&); 12 | bool load_jpg(const FilePath&, ImageRgba32&); 13 | bool save_png(const FilePath&, const ImageRgba32&); 14 | 15 | #endif // IMAGE_H 16 | -------------------------------------------------------------------------------- /src/driver/obj.h: -------------------------------------------------------------------------------- 1 | #ifndef LOAD_OBJ_H 2 | #define LOAD_OBJ_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "float3.h" 9 | #include "color.h" 10 | #include "file_path.h" 11 | 12 | namespace obj { 13 | 14 | struct Index { 15 | int v, n, t; 16 | }; 17 | 18 | struct Face { 19 | std::vector indices; 20 | int material; 21 | }; 22 | 23 | struct Group { 24 | std::vector faces; 25 | }; 26 | 27 | struct Object { 28 | std::vector groups; 29 | }; 30 | 31 | struct Material { 32 | rgb ka; 33 | rgb kd; 34 | rgb ks; 35 | rgb ke; 36 | float ns; 37 | float ni; 38 | rgb tf; 39 | float tr; 40 | float d; 41 | int illum; 42 | std::string map_ka; 43 | std::string map_kd; 44 | std::string map_ks; 45 | std::string map_ke; 46 | std::string map_bump; 47 | std::string map_d; 48 | }; 49 | 50 | struct File { 51 | std::vector objects; 52 | std::vector vertices; 53 | std::vector normals; 54 | std::vector texcoords; 55 | std::vector materials; 56 | std::vector mtl_libs; 57 | }; 58 | 59 | typedef std::unordered_map MaterialLib; 60 | 61 | struct TriMesh { 62 | std::vector vertices; 63 | std::vector indices; 64 | std::vector normals; 65 | std::vector face_normals; 66 | std::vector texcoords; 67 | }; 68 | 69 | bool load_obj(const FilePath&, File&); 70 | bool load_mtl(const FilePath&, MaterialLib&); 71 | TriMesh compute_tri_mesh(const File&, size_t); 72 | 73 | } // namespace obj 74 | 75 | #endif // LOAD_OBJ_H 76 | -------------------------------------------------------------------------------- /src/driver/tri.h: -------------------------------------------------------------------------------- 1 | #ifndef TRI_H 2 | #define TRI_H 3 | 4 | #include "float3.h" 5 | #include "bbox.h" 6 | 7 | struct Tri { 8 | float3 v0, v1, v2; 9 | 10 | Tri() {} 11 | Tri(const float3& v0, const float3& v1, const float3& v2) 12 | : v0(v0), v1(v1), v2(v2) 13 | {} 14 | 15 | float3& operator[] (int i) { return i == 0 ? v0 : (i == 1 ? v1 : v2); } 16 | const float3& operator[] (int i) const { return i == 0 ? v0 : (i == 1 ? v1 : v2); } 17 | 18 | float area() const { return length(cross(v1 - v0, v2 - v0)) / 2; } 19 | 20 | /// Computes the triangle bounding box. 21 | void compute_bbox(BBox& bb) const { 22 | bb.min = min(v0, min(v1, v2)); 23 | bb.max = max(v0, max(v1, v2)); 24 | } 25 | 26 | /// Splits the triangle along one axis and returns the resulting two bounding boxes. 27 | void compute_split(BBox& left_bb, BBox& right_bb, int axis, float split) const { 28 | left_bb = BBox::empty(); 29 | right_bb = BBox::empty(); 30 | 31 | const float3& e0 = v1 - v0; 32 | const float3& e1 = v2 - v1; 33 | const float3& e2 = v0 - v2; 34 | 35 | const bool left0 = v0[axis] <= split; 36 | const bool left1 = v1[axis] <= split; 37 | const bool left2 = v2[axis] <= split; 38 | 39 | if (left0) left_bb.extend(v0); 40 | if (left1) left_bb.extend(v1); 41 | if (left2) left_bb.extend(v2); 42 | 43 | if (!left0) right_bb.extend(v0); 44 | if (!left1) right_bb.extend(v1); 45 | if (!left2) right_bb.extend(v2); 46 | 47 | if (left0 ^ left1) { 48 | const float3& p = clip_edge(axis, split, v0, e0); 49 | left_bb.extend(p); 50 | right_bb.extend(p); 51 | } 52 | if (left1 ^ left2) { 53 | const float3& p = clip_edge(axis, split, v1, e1); 54 | left_bb.extend(p); 55 | right_bb.extend(p); 56 | } 57 | if (left2 ^ left0) { 58 | const float3& p = clip_edge(axis, split, v2, e2); 59 | left_bb.extend(p); 60 | right_bb.extend(p); 61 | } 62 | } 63 | 64 | private: 65 | static float3 clip_edge(int axis, float plane, const float3& p, const float3& edge) { 66 | const float t = (plane - p[axis]) / (edge[axis]); 67 | return p + t * edge; 68 | } 69 | }; 70 | 71 | #endif // TRI_H 72 | -------------------------------------------------------------------------------- /src/dummy_main.impala: -------------------------------------------------------------------------------- 1 | // Dummy file used to generate a C interface for the renderer 2 | 3 | struct Settings { 4 | eye: Vec3, 5 | dir: Vec3, 6 | up: Vec3, 7 | right: Vec3, 8 | width: f32, 9 | height: f32 10 | } 11 | 12 | extern fn get_spp() -> i32 { 1 } 13 | extern fn render(settings: &Settings, iter: i32) -> () {} 14 | -------------------------------------------------------------------------------- /src/render/camera.impala: -------------------------------------------------------------------------------- 1 | // Opaque camera object 2 | struct Camera { 3 | // Generates a ray for a point on the image plane (in [-1, 1]^2) 4 | generate_ray: fn (f32, f32) -> Ray, 5 | // Projects a 3D point on the image plane 6 | project: fn (Vec3) -> Vec3, 7 | // Unprojects a point on the image plane 8 | unproject: fn (Vec3) -> Vec3, 9 | // Computes the local camera geometry for a point on the image plane 10 | geometry: fn (f32, f32) -> CameraGeometry 11 | } 12 | 13 | // Local geometry of the camera lens 14 | struct CameraGeometry { 15 | cos_dir: f32, // Cosine between the ray direction and the camera normal 16 | dist: f32, // Distance between the camera origin and the point on the image plane 17 | area: f32 // Local pixel area (relative to total lens area) 18 | } 19 | 20 | fn @make_camera_geometry(cos_dir: f32, dist: f32, area: f32) -> CameraGeometry { 21 | CameraGeometry { 22 | cos_dir: cos_dir, 23 | dist: dist, 24 | area: area 25 | } 26 | } 27 | 28 | // Creates a perspective camera 29 | fn @make_perspective_camera(math: Intrinsics, eye: Vec3, view: Mat3x3, w: f32, h: f32) -> Camera { 30 | let dir = view.col(2); 31 | let right = view.col(0); 32 | let up = view.col(1); 33 | 34 | Camera { 35 | generate_ray: @ |x, y| { 36 | let d = vec3_normalize(math, 37 | vec3_add( 38 | vec3_add(vec3_mulf(right, w * x), 39 | vec3_mulf(up, h * y)), 40 | dir 41 | ) 42 | ); 43 | make_ray(eye, d, 0.0f, flt_max) 44 | }, 45 | project: @ |p| { 46 | let d = vec3_normalize(math, vec3_sub(p, eye)); 47 | make_vec3(vec3_dot(d, right) / w, 48 | vec3_dot(d, up) / h, 49 | -vec3_dot(d, dir)) 50 | }, 51 | unproject: @ |p| eye, 52 | geometry: @ |x, y| { 53 | let d = math.sqrtf(1.0f + x * x * w * w + y * y * h * h); 54 | make_camera_geometry(1.0f / d, d, 1.0f / (4.0f * w * h)) 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/render/driver.impala: -------------------------------------------------------------------------------- 1 | // Driver functions ---------------------------------------------------------------- 2 | 3 | extern "C" { 4 | fn rodent_get_film_data(i32, &mut &mut [f32], &mut i32, &mut i32) -> (); 5 | fn rodent_cpu_get_primary_stream(&mut PrimaryStream, i32) -> (); 6 | fn rodent_cpu_get_secondary_stream(&mut SecondaryStream, i32) -> (); 7 | fn rodent_gpu_get_first_primary_stream(i32, &mut PrimaryStream, i32) -> (); 8 | fn rodent_gpu_get_second_primary_stream(i32, &mut PrimaryStream, i32) -> (); 9 | fn rodent_gpu_get_secondary_stream(i32, &mut SecondaryStream, i32) -> (); 10 | fn rodent_gpu_get_tmp_buffer(i32, &mut &mut [i32], i32) -> (); 11 | fn rodent_load_buffer(i32, &[u8]) -> &[i8]; 12 | fn rodent_load_bvh2_tri1(i32, &[u8], &mut &[Node2], &mut &[Tri1]) -> (); 13 | fn rodent_load_bvh4_tri4(i32, &[u8], &mut &[Node4], &mut &[Tri4]) -> (); 14 | fn rodent_load_bvh8_tri4(i32, &[u8], &mut &[Node8], &mut &[Tri4]) -> (); 15 | fn rodent_load_png(i32, &[u8], &mut &[u8], &mut i32, &mut i32) -> (); 16 | fn rodent_load_jpg(i32, &[u8], &mut &[u8], &mut i32, &mut i32) -> (); 17 | fn rodent_cpu_intersect_primary_embree(&PrimaryStream, i32, i32) -> (); 18 | fn rodent_cpu_intersect_secondary_embree(&SecondaryStream) -> (); 19 | fn rodent_present(i32) -> (); 20 | } 21 | 22 | // Ray streams --------------------------------------------------------------------- 23 | 24 | struct RayStream { 25 | id: &mut [i32], // this field is also used to indicate if the ray is alive 26 | org_x: &mut [f32], 27 | org_y: &mut [f32], 28 | org_z: &mut [f32], 29 | dir_x: &mut [f32], 30 | dir_y: &mut [f32], 31 | dir_z: &mut [f32], 32 | tmin: &mut [f32], 33 | tmax: &mut [f32], 34 | } 35 | 36 | struct PrimaryStream { 37 | rays: RayStream, 38 | geom_id: &mut [i32], 39 | prim_id: &mut [i32], 40 | t: &mut [f32], 41 | u: &mut [f32], 42 | v: &mut [f32], 43 | rnd: &mut [RndState], 44 | mis: &mut [f32], 45 | contrib_r: &mut [f32], 46 | contrib_g: &mut [f32], 47 | contrib_b: &mut [f32], 48 | depth: &mut [i32], 49 | size: i32, 50 | pad: i32 // TODO: Needed for AMDGPU backend 51 | } 52 | 53 | struct SecondaryStream { 54 | rays: RayStream, 55 | prim_id: &mut [i32], 56 | color_r: &mut [f32], 57 | color_g: &mut [f32], 58 | color_b: &mut [f32], 59 | size: i32, 60 | pad: i32 // TODO: Needed for AMDGPU backend 61 | } 62 | 63 | fn @make_ray_stream_reader(rays: RayStream, vector_width: i32) -> fn (i32, i32) -> Ray { 64 | @ |i, j| { 65 | let k = i * vector_width + j; 66 | make_ray( 67 | make_vec3(rays.org_x(k), 68 | rays.org_y(k), 69 | rays.org_z(k)), 70 | make_vec3(rays.dir_x(k), 71 | rays.dir_y(k), 72 | rays.dir_z(k)), 73 | rays.tmin(k), 74 | rays.tmax(k) 75 | ) 76 | } 77 | } 78 | 79 | fn @make_ray_stream_writer(rays: RayStream, vector_width: i32) -> fn (i32, i32, Ray) -> () { 80 | @ |i, j, ray| { 81 | let k = i * vector_width + j; 82 | rays.org_x(k) = ray.org.x; 83 | rays.org_y(k) = ray.org.y; 84 | rays.org_z(k) = ray.org.z; 85 | rays.dir_x(k) = ray.dir.x; 86 | rays.dir_y(k) = ray.dir.y; 87 | rays.dir_z(k) = ray.dir.z; 88 | rays.tmin(k) = ray.tmin; 89 | rays.tmax(k) = ray.tmax; 90 | } 91 | } 92 | 93 | fn @make_primary_stream_hit_reader(primary: PrimaryStream, vector_width: i32) -> fn (i32, i32) -> Hit { 94 | @ |i, j| { 95 | let k = i * vector_width + j; 96 | make_hit( 97 | primary.geom_id(k), 98 | primary.prim_id(k), 99 | primary.t(k), 100 | make_vec2(primary.u(k), 101 | primary.v(k)) 102 | ) 103 | } 104 | } 105 | 106 | fn @make_primary_stream_hit_writer(primary: PrimaryStream, vector_width: i32, invalid_geom_id: i32) -> fn (i32, i32, Hit) -> () { 107 | @ |i, j, hit| { 108 | let k = i * vector_width + j; 109 | primary.geom_id(k) = if hit.geom_id == -1 { invalid_geom_id } else { hit.geom_id }; 110 | primary.prim_id(k) = hit.prim_id; 111 | primary.t(k) = hit.distance; 112 | primary.u(k) = hit.uv_coords.x; 113 | primary.v(k) = hit.uv_coords.y; 114 | } 115 | } 116 | 117 | fn @make_secondary_stream_hit_writer(secondary: SecondaryStream, vector_width: i32) -> fn (i32, i32, Hit) -> () { 118 | @ |i, j, hit| { 119 | let k = i * vector_width + j; 120 | secondary.prim_id(k) = hit.prim_id; 121 | } 122 | } 123 | 124 | fn @make_primary_stream_state_reader(primary: PrimaryStream, vector_width: i32) -> fn (i32, i32) -> RayState { 125 | @ |i, j| { 126 | let k = i * vector_width + j; 127 | RayState { 128 | rnd: primary.rnd(k), 129 | contrib: make_color(primary.contrib_r(k), primary.contrib_g(k), primary.contrib_b(k)), 130 | mis: primary.mis(k), 131 | depth: primary.depth(k) 132 | } 133 | } 134 | } 135 | 136 | fn @make_primary_stream_state_writer(primary: PrimaryStream, vector_width: i32) -> fn (i32, i32, RayState) -> () { 137 | @ |i, j, state| { 138 | let k = i * vector_width + j; 139 | primary.rnd(k) = state.rnd; 140 | primary.contrib_r(k) = state.contrib.r; 141 | primary.contrib_g(k) = state.contrib.g; 142 | primary.contrib_b(k) = state.contrib.b; 143 | primary.mis(k) = state.mis; 144 | primary.depth(k) = state.depth; 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /src/render/geometry.impala: -------------------------------------------------------------------------------- 1 | // Abstract geometry object (tied to one device) 2 | struct Geometry { 3 | // Computes the surface element after an intersection on this geometry 4 | surface_element: fn (Ray, Hit) -> SurfaceElement, 5 | // Shader for this geometry 6 | shader: Shader 7 | } 8 | 9 | // Triangle mesh with per-vertex/per-face attributes 10 | struct TriMesh { 11 | vertices: fn (i32) -> Vec3, 12 | normals: fn (i32) -> Vec3, 13 | face_normals: fn (i32) -> Vec3, 14 | triangles: fn (i32) -> (i32, i32, i32), 15 | attrs: fn (i32) -> (bool, fn (i32) -> Vec4), 16 | num_attrs: i32, 17 | num_tris: i32 18 | } 19 | 20 | // Creates a geometry object from a triangle mesh definition 21 | fn @make_tri_mesh_geometry(math: Intrinsics, tri_mesh: TriMesh, shader: Shader) -> Geometry { 22 | Geometry { 23 | surface_element: @ |ray, hit| { 24 | let (i0, i1, i2) = tri_mesh.triangles(hit.prim_id); 25 | 26 | let face_normal = tri_mesh.face_normals(hit.prim_id); 27 | let normal = vec3_normalize(math, vec3_lerp2(tri_mesh.normals(i0), tri_mesh.normals(i1), tri_mesh.normals(i2), hit.uv_coords.x, hit.uv_coords.y)); 28 | let is_entering = vec3_dot(ray.dir, face_normal) <= 0.0f; 29 | 30 | fn @attr(i: i32) -> Vec4 { 31 | if i >= tri_mesh.num_attrs { 32 | make_vec4(0.0f, 0.0f, 0.0f, 0.0f) 33 | } else { 34 | let (per_face, attr_value) = tri_mesh.attrs(i); 35 | if per_face { 36 | attr_value(hit.prim_id) 37 | } else { 38 | vec4_lerp2(attr_value(i0), attr_value(i1), attr_value(i2), hit.uv_coords.x, hit.uv_coords.y) 39 | } 40 | } 41 | } 42 | 43 | SurfaceElement { 44 | is_entering: is_entering, 45 | point: vec3_add(ray.org, vec3_mulf(ray.dir, hit.distance)), 46 | face_normal: if is_entering { face_normal } else { vec3_neg(face_normal) }, 47 | uv_coords: hit.uv_coords, 48 | local: make_orthonormal_mat3x3(if vec3_dot(ray.dir, normal) <= 0.0f { normal } else { vec3_neg(normal) }), 49 | attr: attr 50 | } 51 | }, 52 | shader: shader 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/render/image.impala: -------------------------------------------------------------------------------- 1 | // Images are discrete collections of pixels with a fixed number of channels 2 | struct Image { 3 | pixels: fn (i32, i32) -> Color, 4 | width: i32, 5 | height: i32 6 | } 7 | 8 | struct BorderHandling { 9 | horz: fn (Intrinsics, f32) -> f32, 10 | vert: fn (Intrinsics, f32) -> f32 11 | } 12 | 13 | type Texture = fn (Vec2) -> Color; 14 | type ImageFilter = fn (Intrinsics, Image, Vec2) -> Color; 15 | 16 | fn @make_image(pixels: fn (i32, i32) -> Color, width: i32, height: i32) -> Image { 17 | Image { 18 | pixels: pixels, 19 | width: width, 20 | height: height 21 | } 22 | } 23 | 24 | fn @make_image_rgba32(pixels: fn (i32, i32) -> u32, width: i32, height: i32) -> Image { 25 | Image { 26 | pixels: @ |x, y| { 27 | let pixel = pixels(x, y); 28 | let r = pixel & 0xFFu; 29 | let g = (pixel >> 8u) & 0xFFu; 30 | let b = (pixel >> 16u) & 0xFFu; 31 | make_color((r as f32) * (1.0f / 255.0f), 32 | (g as f32) * (1.0f / 255.0f), 33 | (b as f32) * (1.0f / 255.0f)) 34 | }, 35 | width: width, 36 | height: height 37 | } 38 | } 39 | 40 | fn @make_clamp_border() -> BorderHandling { 41 | let clamp = @ |math, x| math.fminf(1.0f, math.fmaxf(0.0f, x)); 42 | BorderHandling { 43 | horz: clamp, 44 | vert: clamp 45 | } 46 | } 47 | 48 | fn @make_repeat_border() -> BorderHandling { 49 | let repeat = @ |math, x| x - math.floorf(x); 50 | BorderHandling { 51 | horz: repeat, 52 | vert: repeat 53 | } 54 | } 55 | 56 | fn @make_nearest_filter() -> ImageFilter { 57 | @ |math, img, uv| { 58 | img.pixels(math.min((uv.x * img.width as f32) as i32, img.width - 1), 59 | math.min((uv.y * img.height as f32) as i32, img.height - 1)) 60 | } 61 | } 62 | 63 | fn @make_bilinear_filter() -> ImageFilter { 64 | @ |math, img, uv| { 65 | let u = uv.x * img.width as f32; 66 | let v = uv.y * img.height as f32; 67 | let x0 = math.min(u as i32, img.width - 1); 68 | let y0 = math.min(v as i32, img.height - 1); 69 | let x1 = math.min(x0 + 1, img.width - 1); 70 | let y1 = math.min(y0 + 1, img.height - 1); 71 | let kx = u - (u as i32 as f32); 72 | let ky = v - (v as i32 as f32); 73 | 74 | let p00 = img.pixels(x0, y0); 75 | let p10 = img.pixels(x1, y0); 76 | let p01 = img.pixels(x0, y1); 77 | let p11 = img.pixels(x1, y1); 78 | 79 | let interp = @ |x00, x10, x01, x11| lerp(lerp(x00, x10, kx), lerp(x01, x11, kx), ky); 80 | make_color(interp(p00.r, p10.r, p01.r, p11.r), 81 | interp(p00.g, p10.g, p01.g, p11.g), 82 | interp(p00.b, p10.b, p01.b, p11.b)) 83 | } 84 | } 85 | 86 | fn @make_texture(math: Intrinsics, border: BorderHandling, filter: ImageFilter, image: Image) -> Texture { 87 | @ |uv| { 88 | let u = border.horz(math, uv.x); 89 | let v = border.vert(math, uv.y); 90 | filter(math, image, make_vec2(u, v)) 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/render/light.impala: -------------------------------------------------------------------------------- 1 | // Result from sampling a light source 2 | struct EmissionSample { 3 | pos: Vec3, // Position on the light source 4 | dir: Vec3, // Direction of the light going outwards 5 | intensity: Color, // Intensity along the direction 6 | pdf_area: f32, // Probability to sample the point on the light 7 | pdf_dir: f32, // Probability to sample the direction on the light, conditioned on the point on the light source 8 | cos: f32 // Cosine between the direction and the light source geometry 9 | } 10 | 11 | // Result from sampling direct lighting from a light source 12 | struct DirectLightSample { 13 | pos: Vec3, // Position on the light source 14 | intensity: Color, // Intensity along the direction 15 | pdf_area: f32, // Probability to sample the point on the light 16 | pdf_dir: f32, // Probability to sample the direction using emission sampling 17 | cos: f32 // Cosine between the direction and the light source geometry 18 | } 19 | 20 | // Emission properties of a light source 21 | struct EmissionValue { 22 | intensity: Color, // Intensity along the direction 23 | pdf_area: f32, // Probability to sample the point on the light 24 | pdf_dir: f32 // Probability to sample the direction using emission sampling 25 | } 26 | 27 | // Surface that emits light 28 | struct AreaEmitter { 29 | sample: fn (Vec2) -> (Vec3, Vec3, f32), 30 | normal: fn (Vec2) -> Vec3, 31 | pdf: fn (Vec2) -> f32 32 | } 33 | 34 | // Opaque light structure 35 | struct Light { 36 | // Samples direct illumination from this light source at the given point on a surface 37 | sample_direct: fn (&mut RndState, Vec3) -> DirectLightSample, 38 | // Samples the emitting surface of the light 39 | sample_emission: fn (&mut RndState) -> EmissionSample, 40 | // Returns the emission properties of the light at a given point on its surface 41 | emission: fn (Vec3, Vec2) -> EmissionValue, 42 | // true if the light has an area (can be hit by a ray) 43 | has_area: bool 44 | } 45 | 46 | fn @make_emission_sample(pos: Vec3, dir: Vec3, intensity: Color, pdf_area: f32, pdf_dir: f32, cos: f32) -> EmissionSample { 47 | if pdf_area > 0.0f && pdf_dir > 0.0f && cos > 0.0f { 48 | EmissionSample { 49 | pos: pos, 50 | dir: dir, 51 | intensity: intensity, 52 | pdf_area: pdf_area, 53 | pdf_dir: pdf_dir, 54 | cos: cos 55 | } 56 | } else { 57 | EmissionSample { 58 | pos: pos, 59 | dir: dir, 60 | intensity: black, 61 | pdf_area: 1.0f, 62 | pdf_dir: 1.0f, 63 | cos: 1.0f 64 | } 65 | } 66 | } 67 | 68 | fn @make_direct_sample(pos: Vec3, intensity: Color, pdf_area: f32, pdf_dir: f32, cos: f32) -> DirectLightSample { 69 | if pdf_area > 0.0f && pdf_dir > 0.0f && cos > 0.0f { 70 | DirectLightSample { 71 | pos: pos, 72 | intensity: intensity, 73 | pdf_area: pdf_area, 74 | pdf_dir: pdf_dir, 75 | cos: cos 76 | } 77 | } else { 78 | DirectLightSample { 79 | pos: pos, 80 | intensity: black, 81 | pdf_area: 1.0f, 82 | pdf_dir: 1.0f, 83 | cos: 0.0f 84 | } 85 | } 86 | } 87 | 88 | fn @make_emission_value(intensity: Color, pdf_area: f32, pdf_dir: f32) -> EmissionValue { 89 | if pdf_dir > 0.0f { 90 | EmissionValue { 91 | intensity: intensity, 92 | pdf_area: pdf_area, 93 | pdf_dir: pdf_dir 94 | } 95 | } else { 96 | EmissionValue { 97 | intensity: black, 98 | pdf_area: 1.0f, 99 | pdf_dir: 1.0f 100 | } 101 | } 102 | } 103 | 104 | fn @make_point_light(math: Intrinsics, pos: Vec3, color: Color) -> Light { 105 | Light { 106 | sample_direct: @ |rnd, from| { 107 | let intensity = color_mulf(color, 1.0f / (4.0f * flt_pi)); 108 | make_direct_sample(pos, intensity, 1.0f, uniform_sphere_pdf(), 1.0f) 109 | }, 110 | sample_emission: @ |rnd| { 111 | let u = randf(rnd); 112 | let v = randf(rnd); 113 | let sample = sample_uniform_sphere(math, u, v); 114 | let intensity = color_mulf(color, 1.0f / (4.0f * flt_pi)); 115 | make_emission_sample(pos, sample.dir, intensity, 1.0f, sample.pdf, 1.0f) 116 | }, 117 | emission: @ |_, _| make_emission_value(black, 1.0f, 1.0f), 118 | has_area: false 119 | } 120 | } 121 | 122 | fn @make_area_light(math: Intrinsics, area: AreaEmitter, color: Color) -> Light { 123 | Light { 124 | sample_direct: @ |rnd, from| { 125 | let (pos, n, area_pdf) = area.sample(make_vec2(randf(rnd), randf(rnd))); 126 | let dir = vec3_sub(from, pos); 127 | let cos = vec3_dot(dir, n) / vec3_len(math, dir); 128 | make_direct_sample(pos, color, area_pdf, cosine_hemisphere_pdf(cos), cos) 129 | }, 130 | sample_emission: @ |rnd| { 131 | let (pos, n, area_pdf) = area.sample(make_vec2(randf(rnd), randf(rnd))); 132 | let sample = sample_cosine_hemisphere(math, randf(rnd), randf(rnd)); 133 | make_emission_sample(pos, mat3x3_mul(make_orthonormal_mat3x3(n), sample.dir), color, area_pdf, sample.pdf, sample.dir.z) 134 | }, 135 | emission: @ |dir, uv_coords| make_emission_value(color, area.pdf(uv_coords), cosine_hemisphere_pdf(vec3_dot(area.normal(uv_coords), dir))), 136 | has_area: true 137 | } 138 | } 139 | 140 | fn @make_triangle_light(math: Intrinsics, v0: Vec3, v1: Vec3, v2: Vec3, color: Color) -> Light { 141 | let n_ = vec3_cross(vec3_sub(v1, v0), vec3_sub(v2, v0)); 142 | let inv_area = 1.0f / (0.5f * sqrt_newton(vec3_len2(n_), 1e-5f)); 143 | let n = vec3_mulf(n_, 0.5f * inv_area); 144 | make_precomputed_triangle_light(math, v0, v1, v2, n, inv_area, color) 145 | } 146 | 147 | fn @make_precomputed_triangle_light(math: Intrinsics, v0: Vec3, v1: Vec3, v2: Vec3, n: Vec3, inv_area: f32, color: Color) -> Light { 148 | let emitter = AreaEmitter { 149 | sample: @ |uv| (sample_triangle(uv.x, uv.y, v0, v1, v2), n, inv_area), 150 | normal: @ |_| n, 151 | pdf: @ |uv| inv_area 152 | }; 153 | make_area_light(math, emitter, color) 154 | } 155 | -------------------------------------------------------------------------------- /src/render/renderer.impala: -------------------------------------------------------------------------------- 1 | struct PathTracer { 2 | on_emit: RayEmitter, 3 | on_hit: fn (Ray, Hit, &mut RayState, SurfaceElement, Material, fn (Color) -> !) -> (), 4 | on_shadow: fn (Ray, Hit, &mut RayState, SurfaceElement, Material, fn (Ray, Color) -> !) -> (), 5 | on_bounce: fn (Ray, Hit, &mut RayState, SurfaceElement, Material, fn (Ray, RayState) -> !) -> (), 6 | } 7 | 8 | struct RayState { 9 | rnd: RndState, 10 | contrib: Color, 11 | mis: f32, 12 | depth: i32 13 | } 14 | 15 | type RayEmitter = fn (i32, i32, i32, i32, i32) -> (Ray, RayState); 16 | 17 | fn @make_ray_state(rnd: RndState, contrib: Color, mis: f32, depth: i32) -> RayState { 18 | RayState { 19 | rnd: rnd, 20 | contrib: contrib, 21 | mis: mis, 22 | depth: depth 23 | } 24 | } 25 | 26 | fn @make_camera_emitter(scene: Scene, device: Device, iter: i32) -> RayEmitter { 27 | @ |sample, x, y, width, height| { 28 | let mut hash = fnv_init(); 29 | hash = fnv_hash(hash, sample as u32); 30 | hash = fnv_hash(hash, iter as u32); 31 | hash = fnv_hash(hash, x as u32); 32 | hash = fnv_hash(hash, y as u32); 33 | let mut rnd = hash as RndState; 34 | let kx = 2.0f * (x as f32 + randf(&mut rnd)) / (width as f32) - 1.0f; 35 | let ky = 1.0f - 2.0f * (y as f32 + randf(&mut rnd)) / (height as f32); 36 | let ray = scene.camera.generate_ray(kx, ky); 37 | let state = make_ray_state(rnd, white, 0.0f, 0); 38 | (ray, state) 39 | } 40 | } 41 | 42 | fn @make_debug_renderer() -> Renderer { 43 | @ |scene, device, iter| { 44 | let on_emit = make_camera_emitter(scene, device, iter); 45 | let on_shadow = @ |_, _, _, _, _, _| (); 46 | let on_bounce = @ |_, _, _, _, _, _| (); 47 | let on_hit = @ |ray, hit, state, surf, mat, accumulate| { 48 | accumulate(color_mulf(white, -vec3_dot(ray.dir, surf.local.col(2)))) 49 | }; 50 | 51 | let path_tracer = PathTracer { 52 | on_emit: on_emit, 53 | on_hit: on_hit, 54 | on_shadow: on_shadow, 55 | on_bounce: on_bounce 56 | }; 57 | 58 | device.trace(scene, path_tracer, 1); 59 | } 60 | } 61 | 62 | fn @make_path_tracing_renderer(max_path_len: i32, spp: i32)-> Renderer { 63 | @ |scene, device, iter| { 64 | let offset = 0.001f; 65 | let pdf_lightpick = 1.0f / (scene.num_lights as f32); 66 | 67 | let on_emit = make_camera_emitter(scene, device, iter); 68 | 69 | fn @on_shadow( ray: Ray 70 | , hit: Hit 71 | , state: &mut RayState 72 | , surf: SurfaceElement 73 | , mat: Material 74 | , emit: fn (Ray, Color) -> ! 75 | ) -> () { 76 | // No shadow rays for specular materials 77 | if mat.bsdf.is_specular { 78 | return() 79 | } 80 | 81 | let rnd = &mut state.rnd; 82 | // Note: randi() returns random integers, but we only want positive integers here 83 | let light_id = (randi(rnd) & 0x7FFFFFFF) % scene.num_lights; 84 | let light = @@(scene.lights)(light_id); 85 | let light_sample = @@(light.sample_direct)(rnd, surf.point); 86 | let light_dir = vec3_sub(light_sample.pos, surf.point); 87 | let vis = vec3_dot(light_dir, surf.local.col(2)); 88 | 89 | if vis > 0.0f && light_sample.cos > 0.0f { 90 | let inv_d = 1.0f / vec3_len(device.intrinsics, light_dir); 91 | let inv_d2 = inv_d * inv_d; 92 | let in_dir = vec3_mulf(light_dir, inv_d); 93 | let out_dir = vec3_neg(ray.dir); 94 | 95 | let pdf_e = if light.has_area { mat.bsdf.pdf(in_dir, out_dir) } else { 0.0f }; 96 | let pdf_l = light_sample.pdf_area * pdf_lightpick; 97 | let inv_pdf_l = 1.0f / pdf_l; 98 | 99 | let cos_e = vis * inv_d; 100 | let cos_l = light_sample.cos; 101 | 102 | let mis = if light.has_area { 1.0f / (1.0f + pdf_e * cos_l * inv_d2 * inv_pdf_l) } else { 1.0f }; 103 | let geom_factor = cos_e * cos_l * inv_d2 * inv_pdf_l; 104 | 105 | let contrib = color_mul(light_sample.intensity, color_mul(state.contrib, mat.bsdf.eval(in_dir, out_dir))); 106 | emit( 107 | make_ray(surf.point, light_dir, offset, 1.0f - offset), 108 | color_mulf(contrib, geom_factor * mis) 109 | ) 110 | } 111 | } 112 | 113 | fn @on_hit( ray: Ray 114 | , hit: Hit 115 | , state: &mut RayState 116 | , surf: SurfaceElement 117 | , mat: Material 118 | , accumulate: fn (Color) -> ! 119 | ) -> () { 120 | // Hits on a light source 121 | if mat.is_emissive && surf.is_entering { 122 | let out_dir = vec3_neg(ray.dir); 123 | let emit = mat.emission(out_dir); 124 | let next_mis = state.mis * hit.distance * hit.distance / vec3_dot(out_dir, surf.local.col(2)); 125 | let mis = 1.0f / (1.0f + next_mis * pdf_lightpick * emit.pdf_area); 126 | accumulate(color_mulf(color_mul(state.contrib, emit.intensity), mis)) 127 | } 128 | } 129 | 130 | fn @on_bounce( ray: Ray 131 | , hit: Hit 132 | , state: &mut RayState 133 | , surf: SurfaceElement 134 | , mat: Material 135 | , bounce: fn (Ray, RayState) -> ! 136 | ) -> () { 137 | // Russian roulette and maximum depth 138 | let rr_prob = russian_roulette(state.contrib, 0.75f); 139 | if state.depth >= max_path_len || randf(&mut state.rnd) >= rr_prob { 140 | return() 141 | } 142 | 143 | // Bounce 144 | let out_dir = vec3_neg(ray.dir); 145 | let mat_sample = mat.bsdf.sample(&mut state.rnd, out_dir, false); 146 | let contrib = color_mul(state.contrib, mat_sample.color); 147 | let mis = if mat.bsdf.is_specular { 0.0f } else { 1.0f / mat_sample.pdf }; 148 | bounce( 149 | make_ray(surf.point, mat_sample.in_dir, offset, flt_max), 150 | make_ray_state(state.rnd, color_mulf(contrib, mat_sample.cos / (mat_sample.pdf * rr_prob)), mis, state.depth + 1) 151 | ) 152 | } 153 | 154 | let path_tracer = PathTracer { 155 | on_emit: on_emit, 156 | on_hit: on_hit, 157 | on_shadow: on_shadow, 158 | on_bounce: on_bounce 159 | }; 160 | 161 | device.trace(scene, path_tracer, spp); 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /src/render/scene.impala: -------------------------------------------------------------------------------- 1 | // Compile-time scene data 2 | struct Scene { 3 | num_geometries: i32, 4 | num_lights: i32, 5 | 6 | geometries: fn (i32) -> Geometry, 7 | lights: fn (i32) -> Light, 8 | camera: Camera, 9 | bvh: Bvh 10 | } 11 | 12 | // Rendering device 13 | struct Device { 14 | intrinsics: Intrinsics, 15 | 16 | trace: fn (Scene, PathTracer, i32) -> (), 17 | present: fn () -> (), 18 | 19 | // General formats 20 | load_buffer: fn (&[u8]) -> DeviceBuffer, 21 | load_bvh: fn (&[u8]) -> Bvh, 22 | load_png: fn (&[u8]) -> Image, 23 | load_jpg: fn (&[u8]) -> Image 24 | } 25 | 26 | struct DeviceBuffer { 27 | load_i32: fn (i32) -> i32, 28 | load_f32: fn (i32) -> f32, 29 | load_vec2: fn (i32) -> Vec2, 30 | load_vec3: fn (i32) -> Vec3, 31 | load_vec4: fn (i32) -> Vec4, 32 | load_int2: fn (i32) -> (i32, i32), 33 | load_int3: fn (i32) -> (i32, i32, i32), 34 | load_int4: fn (i32) -> (i32, i32, i32, i32), 35 | } 36 | 37 | type Renderer = fn (Scene, Device, i32) -> (); 38 | type Shader = fn (Ray, Hit, SurfaceElement) -> Material; 39 | -------------------------------------------------------------------------------- /src/traversal/stack.impala: -------------------------------------------------------------------------------- 1 | struct NodeRef { 2 | node: i32, 3 | tmin: f32 4 | } 5 | 6 | struct Stack { 7 | push: fn (i32, f32) -> (), 8 | push_after: fn (i32, f32) -> (), 9 | set_top: fn (i32, f32) -> (), 10 | sort_n: fn (i32, fn (f32, f32) -> bool, SortingNetwork, bool) -> (), 11 | pop: fn () -> NodeRef, 12 | top: fn () -> NodeRef, 13 | is_empty: fn () -> bool, 14 | size: fn () -> i32 15 | } 16 | 17 | struct SmallStack { 18 | write: fn (i32, (i32, f32)) -> (), 19 | read: fn (i32) -> (i32, f32) 20 | } 21 | 22 | fn @is_leaf (ref: NodeRef) -> bool { ref.node < 0 } 23 | fn @is_inner(ref: NodeRef) -> bool { ref.node > 0 } 24 | 25 | fn @make_small_stack(n: i32) -> SmallStack { 26 | fn @(?begin & ?end) make_small_stack_helper(begin: i32, end: i32) -> SmallStack { 27 | if begin == end { 28 | SmallStack { 29 | write: @ |_, _| (), 30 | read: @ |_| undef[(i32, f32)]() 31 | } 32 | } else if begin + 1 == end { 33 | let mut val : (i32, f32); 34 | SmallStack { 35 | write: @ |i, v| val = v, 36 | read: @ |i| val 37 | } 38 | } else { 39 | let m = (begin + end) / 2; 40 | let left = make_small_stack_helper(begin, m); 41 | let right = make_small_stack_helper(m, end); 42 | SmallStack { 43 | write: @ |i, v| if i < m { left.write(i, v) } else { right.write(i, v) }, 44 | read: @ |i| if i < m { left.read(i) } else { right.read(i) } 45 | } 46 | } 47 | } 48 | 49 | make_small_stack_helper(0, n) 50 | } 51 | 52 | fn @alloc_stack() -> Stack { 53 | let mut nodes : [i32 * 64]; 54 | let mut tmins : [f32 * 64]; 55 | let mut node = 0; 56 | let mut tmin = flt_max; 57 | let mut ptr = -1; 58 | 59 | let vals_accessor = @ |off| (@ |i| nodes(i + off), @ |i, v| nodes(i + off) = v); 60 | let keys_accessor = @ |off| (@ |i| tmins(i + off), @ |i, k| tmins(i + off) = k); 61 | 62 | Stack { 63 | push: @ |n, t| { 64 | ptr++; 65 | nodes(ptr) = node; 66 | tmins(ptr) = tmin; 67 | node = n; 68 | tmin = t; 69 | }, 70 | push_after: @ |n, t| { 71 | ptr++; 72 | nodes(ptr) = n; 73 | tmins(ptr) = t; 74 | }, 75 | set_top: @ |n, t| { 76 | node = n; 77 | tmin = t; 78 | }, 79 | sort_n: @ |n, cmp, sorting_network, branchless| { 80 | let (read_val, write_val) = vals_accessor(ptr - n + 1); 81 | let (read_key, write_key) = keys_accessor(ptr - n + 1); 82 | if branchless { 83 | let tmp = make_small_stack(n); 84 | for i in range(0, n) @{ 85 | tmp.write(i, (read_val(i), read_key(i))) 86 | } 87 | sorting_network(n, @ |i, j| { 88 | let (v0, k0) = tmp.read(i); 89 | let (v1, k1) = tmp.read(j); 90 | let swp = cmp(k0, k1); 91 | tmp.write(i, select(swp, (v1, k1), (v0, k0))); 92 | tmp.write(j, select(swp, (v0, k0), (v1, k1))); 93 | }); 94 | for i in range(0, n) @{ 95 | let (v, k) = tmp.read(i); 96 | write_val(i, v); 97 | write_key(i, k); 98 | } 99 | } else { 100 | sorting_network(n, @ |i, j| { 101 | let (k0, k1) = (read_key(i), read_key(j)); 102 | if cmp(k0, k1) { 103 | let (v0, v1) = (read_val(i), read_val(j)); 104 | write_key(i, k1); 105 | write_key(j, k0); 106 | write_val(i, v1); 107 | write_val(j, v0); 108 | } 109 | }); 110 | } 111 | }, 112 | pop: @ || { 113 | let old = NodeRef { node: node, tmin: tmin }; 114 | node = nodes(ptr); 115 | tmin = tmins(ptr); 116 | ptr--; 117 | old 118 | }, 119 | top: @ || NodeRef { node: node, tmin: tmin }, 120 | is_empty: @ || node == 0, 121 | size: @ || ptr 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /testing/cornell_box.mtl: -------------------------------------------------------------------------------- 1 | # The original Cornell Box in OBJ format. 2 | # Note that the real box is not a perfect cube, so 3 | # the faces are imperfect in this data set. 4 | # 5 | # Created by Guedis Cardenas and Morgan McGuire at Williams College, 2011 6 | # Released into the Public Domain. 7 | # 8 | # http://graphics.cs.williams.edu/data 9 | # http://www.graphics.cornell.edu/online/box/data.html 10 | # 11 | 12 | newmtl leftWall 13 | Ns 10.0000 14 | Ni 1.5000 15 | illum 2 16 | Ka 0.63 0.065 0.05 # Red 17 | Kd 0.63 0.065 0.05 18 | Ks 0 0 0 19 | Ke 0 0 0 20 | 21 | 22 | newmtl rightWall 23 | Ns 10.0000 24 | Ni 1.5000 25 | illum 2 26 | Ka 0.14 0.45 0.091 # Green 27 | Kd 0.14 0.45 0.091 28 | Ks 0 0 0 29 | Ke 0 0 0 30 | 31 | 32 | newmtl floor 33 | Ns 10.0000 34 | Ni 1.0000 35 | illum 2 36 | Ka 0.725 0.71 0.68 # White 37 | Kd 0.725 0.71 0.68 38 | Ks 0 0 0 39 | Ke 0 0 0 40 | 41 | 42 | newmtl ceiling 43 | Ns 10.0000 44 | Ni 1.0000 45 | illum 2 46 | Ka 0.725 0.71 0.68 # White 47 | Kd 0.725 0.71 0.68 48 | Ks 0 0 0 49 | Ke 0 0 0 50 | 51 | 52 | newmtl backWall 53 | Ns 10.0000 54 | Ni 1.0000 55 | illum 2 56 | Ka 0.725 0.71 0.68 # White 57 | Kd 0.725 0.71 0.68 58 | Ks 0 0 0 59 | Ke 0 0 0 60 | 61 | 62 | newmtl shortBox 63 | Ns 10.0000 64 | Ni 1.0000 65 | illum 2 66 | Ka 0.725 0.71 0.68 # White 67 | Kd 0.725 0.71 0.68 68 | Ks 0 0 0 69 | Ke 0 0 0 70 | 71 | 72 | newmtl tallBox 73 | Ns 10.0000 74 | Ni 1.0000 75 | illum 2 76 | Ka 0.725 0.71 0.68 # White 77 | Kd 0.725 0.71 0.68 78 | Ks 0 0 0 79 | Ke 0 0 0 80 | 81 | newmtl light 82 | Ns 10.0000 83 | Ni 1.0000 84 | illum 2 85 | Ka 0.78 0.78 0.78 # White 86 | Kd 0.78 0.78 0.78 87 | Ks 0 0 0 88 | Ke 17 12 4 89 | -------------------------------------------------------------------------------- /testing/cornell_box.obj: -------------------------------------------------------------------------------- 1 | # The original Cornell Box in OBJ format. 2 | # Note that the real box is not a perfect cube, so 3 | # the faces are imperfect in this data set. 4 | # 5 | # Created by Guedis Cardenas and Morgan McGuire at Williams College, 2011 6 | # Released into the Public Domain. 7 | # 8 | # http://graphics.cs.williams.edu/data 9 | # http://www.graphics.cornell.edu/online/box/data.html 10 | # 11 | 12 | mtllib cornell_box.mtl 13 | 14 | ## Object floor 15 | v -1.01 0.00 0.99 16 | v 1.00 0.00 0.99 17 | v 1.00 0.00 -1.04 18 | v -0.99 0.00 -1.04 19 | 20 | g floor 21 | usemtl floor 22 | f -4 -3 -2 -1 23 | 24 | ## Object ceiling 25 | v -1.02 1.99 0.99 26 | v -1.02 1.99 -1.04 27 | v 1.00 1.99 -1.04 28 | v 1.00 1.99 0.99 29 | 30 | g ceiling 31 | usemtl ceiling 32 | f -4 -3 -2 -1 33 | 34 | ## Object backwall 35 | v -0.99 0.00 -1.04 36 | v 1.00 0.00 -1.04 37 | v 1.00 1.99 -1.04 38 | v -1.02 1.99 -1.04 39 | 40 | g backWall 41 | usemtl backWall 42 | f -4 -3 -2 -1 43 | 44 | ## Object rightwall 45 | v 1.00 0.00 -1.04 46 | v 1.00 0.00 0.99 47 | v 1.00 1.99 0.99 48 | v 1.00 1.99 -1.04 49 | 50 | g rightWall 51 | usemtl rightWall 52 | f -4 -3 -2 -1 53 | 54 | ## Object leftWall 55 | v -1.01 0.00 0.99 56 | v -0.99 0.00 -1.04 57 | v -1.02 1.99 -1.04 58 | v -1.02 1.99 0.99 59 | 60 | g leftWall 61 | usemtl leftWall 62 | f -4 -3 -2 -1 63 | 64 | ## Object shortBox 65 | usemtl shortBox 66 | 67 | # Top Face 68 | v 0.53 0.60 0.75 69 | v 0.70 0.60 0.17 70 | v 0.13 0.60 0.00 71 | v -0.05 0.60 0.57 72 | f -4 -3 -2 -1 73 | 74 | # Left Face 75 | v -0.05 0.00 0.57 76 | v -0.05 0.60 0.57 77 | v 0.13 0.60 0.00 78 | v 0.13 0.00 0.00 79 | f -4 -3 -2 -1 80 | 81 | # Front Face 82 | v 0.53 0.00 0.75 83 | v 0.53 0.60 0.75 84 | v -0.05 0.60 0.57 85 | v -0.05 0.00 0.57 86 | f -4 -3 -2 -1 87 | 88 | # Right Face 89 | v 0.70 0.00 0.17 90 | v 0.70 0.60 0.17 91 | v 0.53 0.60 0.75 92 | v 0.53 0.00 0.75 93 | f -4 -3 -2 -1 94 | 95 | # Back Face 96 | v 0.13 0.00 0.00 97 | v 0.13 0.60 0.00 98 | v 0.70 0.60 0.17 99 | v 0.70 0.00 0.17 100 | f -4 -3 -2 -1 101 | 102 | # Bottom Face 103 | v 0.53 0.00 0.75 104 | v 0.70 0.00 0.17 105 | v 0.13 0.00 0.00 106 | v -0.05 0.00 0.57 107 | f -12 -11 -10 -9 108 | 109 | g shortBox 110 | usemtl shortBox 111 | 112 | ## Object tallBox 113 | usemtl tallBox 114 | 115 | # Top Face 116 | v -0.53 1.20 0.09 117 | v 0.04 1.20 -0.09 118 | v -0.14 1.20 -0.67 119 | v -0.71 1.20 -0.49 120 | f -4 -3 -2 -1 121 | 122 | # Left Face 123 | v -0.53 0.00 0.09 124 | v -0.53 1.20 0.09 125 | v -0.71 1.20 -0.49 126 | v -0.71 0.00 -0.49 127 | f -4 -3 -2 -1 128 | 129 | # Back Face 130 | v -0.71 0.00 -0.49 131 | v -0.71 1.20 -0.49 132 | v -0.14 1.20 -0.67 133 | v -0.14 0.00 -0.67 134 | f -4 -3 -2 -1 135 | 136 | # Right Face 137 | v -0.14 0.00 -0.67 138 | v -0.14 1.20 -0.67 139 | v 0.04 1.20 -0.09 140 | v 0.04 0.00 -0.09 141 | f -4 -3 -2 -1 142 | 143 | # Front Face 144 | v 0.04 0.00 -0.09 145 | v 0.04 1.20 -0.09 146 | v -0.53 1.20 0.09 147 | v -0.53 0.00 0.09 148 | f -4 -3 -2 -1 149 | 150 | # Bottom Face 151 | v -0.53 0.00 0.09 152 | v 0.04 0.00 -0.09 153 | v -0.14 0.00 -0.67 154 | v -0.71 0.00 -0.49 155 | f -8 -7 -6 -5 156 | 157 | g tallBox 158 | usemtl tallBox 159 | 160 | ## Object light 161 | v -0.24 1.98 0.16 162 | v -0.24 1.98 -0.22 163 | v 0.23 1.98 -0.22 164 | v 0.23 1.98 0.16 165 | 166 | g light 167 | usemtl light 168 | f -4 -3 -2 -1 169 | -------------------------------------------------------------------------------- /testing/ref-cornell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AnyDSL/rodent/f24b885cc9bf0a8b336f4c5de2f12e07a468f798/testing/ref-cornell.png -------------------------------------------------------------------------------- /testing/ref-primary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AnyDSL/rodent/f24b885cc9bf0a8b336f4c5de2f12e07a468f798/testing/ref-primary.png -------------------------------------------------------------------------------- /testing/ref-random.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AnyDSL/rodent/f24b885cc9bf0a8b336f4c5de2f12e07a468f798/testing/ref-random.png -------------------------------------------------------------------------------- /testing/sponza-primary.rays: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AnyDSL/rodent/f24b885cc9bf0a8b336f4c5de2f12e07a468f798/testing/sponza-primary.rays -------------------------------------------------------------------------------- /testing/sponza-random.rays: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AnyDSL/rodent/f24b885cc9bf0a8b336f4c5de2f12e07a468f798/testing/sponza-random.rays -------------------------------------------------------------------------------- /testing/sponza.bvh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AnyDSL/rodent/f24b885cc9bf0a8b336f4c5de2f12e07a468f798/testing/sponza.bvh -------------------------------------------------------------------------------- /tools/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Generate the traversal benchmark utility, and the ../common/traversal.h interface 2 | add_subdirectory(bench_traversal) 3 | add_subdirectory(bench_shading) 4 | add_subdirectory(bench_interface) 5 | 6 | find_package(CUDA QUIET) 7 | if (CUDA_FOUND) 8 | add_subdirectory(bench_aila) 9 | endif() 10 | 11 | add_subdirectory(ray_gen) 12 | find_package(PNG QUIET) 13 | if (PNG_FOUND) 14 | add_subdirectory(fbuf2png) 15 | endif() 16 | 17 | add_subdirectory(bvh_extractor) 18 | 19 | if (EMBREE_FOUND) 20 | add_subdirectory(bench_embree) 21 | endif() 22 | 23 | if (ImageMagick_FOUND AND PNG_FOUND) 24 | # Only test the primary rays, as the random rays are often too close 25 | # to surfaces and often give slightly different results for each algorithm 26 | add_test(NAME single_bvh4 COMMAND ${CMAKE_COMMAND} -DBENCH_TRAVERSAL=$ -DFBUF2PNG=$ -DIM_COMPARE=${ImageMagick_compare_EXECUTABLE} "-DBENCH_TRAVERSAL_ARGS=--single;--bvh-width;4" -DTESTING_DIR=${PROJECT_SOURCE_DIR}/testing -DTRAVERSAL_OUTPUT=single-bvh4-output -P ${PROJECT_SOURCE_DIR}/cmake/test/run_traversal.cmake) 27 | add_test(NAME packet_bvh4 COMMAND ${CMAKE_COMMAND} -DBENCH_TRAVERSAL=$ -DFBUF2PNG=$ -DIM_COMPARE=${ImageMagick_compare_EXECUTABLE} "-DBENCH_TRAVERSAL_ARGS=--packet;--ray-width;4;--bvh-width;4" -DTESTING_DIR=${PROJECT_SOURCE_DIR}/testing -DTRAVERSAL_OUTPUT=packet-bvh4-output -P ${PROJECT_SOURCE_DIR}/cmake/test/run_traversal.cmake) 28 | add_test(NAME hybrid_bvh4 COMMAND ${CMAKE_COMMAND} -DBENCH_TRAVERSAL=$ -DFBUF2PNG=$ -DIM_COMPARE=${ImageMagick_compare_EXECUTABLE} "-DBENCH_TRAVERSAL_ARGS=--ray-width;4;--bvh-width;4" -DTESTING_DIR=${PROJECT_SOURCE_DIR}/testing -DTRAVERSAL_OUTPUT=hybrid-bvh4-output -P ${PROJECT_SOURCE_DIR}/cmake/test/run_traversal.cmake) 29 | add_test(NAME single_bvh8 COMMAND ${CMAKE_COMMAND} -DBENCH_TRAVERSAL=$ -DFBUF2PNG=$ -DIM_COMPARE=${ImageMagick_compare_EXECUTABLE} "-DBENCH_TRAVERSAL_ARGS=--single;--bvh-width;8" -DTESTING_DIR=${PROJECT_SOURCE_DIR}/testing -DTRAVERSAL_OUTPUT=single-bvh8-output -P ${PROJECT_SOURCE_DIR}/cmake/test/run_traversal.cmake) 30 | add_test(NAME packet_bvh8 COMMAND ${CMAKE_COMMAND} -DBENCH_TRAVERSAL=$ -DFBUF2PNG=$ -DIM_COMPARE=${ImageMagick_compare_EXECUTABLE} "-DBENCH_TRAVERSAL_ARGS=--packet;--ray-width;8;--bvh-width;8" -DTESTING_DIR=${PROJECT_SOURCE_DIR}/testing -DTRAVERSAL_OUTPUT=packet-bvh8-output -P ${PROJECT_SOURCE_DIR}/cmake/test/run_traversal.cmake) 31 | add_test(NAME hybrid_bvh8 COMMAND ${CMAKE_COMMAND} -DBENCH_TRAVERSAL=$ -DFBUF2PNG=$ -DIM_COMPARE=${ImageMagick_compare_EXECUTABLE} "-DBENCH_TRAVERSAL_ARGS=--ray-width;8;--bvh-width;8" -DTESTING_DIR=${PROJECT_SOURCE_DIR}/testing -DTRAVERSAL_OUTPUT=hybrid-bvh8-output -P ${PROJECT_SOURCE_DIR}/cmake/test/run_traversal.cmake) 32 | endif() 33 | -------------------------------------------------------------------------------- /tools/bench_aila/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CUDA_HOST_COMPILER ${CMAKE_C_COMPILER} CACHE FILEPATH "Compiler to use with CUDA") 2 | cuda_compile(AILA_TRAVERSAL 3 | CudaTracerKernels.hpp 4 | kepler_dynamic_fetch.cu 5 | OPTIONS "-O3;-std=c++11;--expt-extended-lambda;-arch=sm_52;-I${CMAKE_CURRENT_SOURCE_DIR}/../common") 6 | 7 | add_executable(bench_aila bench_aila.cpp ${AILA_TRAVERSAL}) 8 | target_include_directories(bench_aila PUBLIC ../common ../../src) 9 | target_link_libraries(bench_aila ${CUDA_LIBRARIES} ${AnyDSL_runtime_LIBRARIES}) 10 | # Needs the interface file generated by bench_traversal 11 | add_dependencies(bench_aila bench_traversal) 12 | -------------------------------------------------------------------------------- /tools/bench_aila/bench_aila.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "traversal.h" 12 | #include "load_bvh.h" 13 | #include "load_rays.h" 14 | 15 | void setup_traversal(const Node2* nodes, size_t num_nodes, const Tri1* tris, size_t num_tris); 16 | void shutdown_traversal(); 17 | void bench_traversal(const Ray1* rays, Hit1* hits, int num_rays, double* timings, int ntimes, bool any); 18 | 19 | inline void check_argument(int i, int argc, char** argv) { 20 | if (i + 1 >= argc) { 21 | std::cerr << "Missing argument for " << argv[i] << std::endl; 22 | exit(1); 23 | } 24 | } 25 | 26 | inline void usage() { 27 | std::cout << "Usage: bench_aila [options]\n" 28 | "Available options:\n" 29 | " -bvh --bvh-file Sets the BVH file to use\n" 30 | " -ray --ray-file Sets the ray file to use\n" 31 | " --tmin Sets the minimum distance along the rays (default: 0)\n" 32 | " --tmax Sets the maximum distance along the rays (default: 1e9)\n" 33 | " --bench Sets the number of benchmark iterations (default: 1)\n" 34 | " --warmup Sets the number of warmup iterations (default: 0)\n" 35 | " -any Exit at the first intersection (disabled by default)\n" 36 | " -o --output Sets the output file name (no file is generated by default)\n"; 37 | } 38 | 39 | static void transform_nodes(Node2* nodes, size_t size) { 40 | for (size_t i = 0; i < size; ++i) { 41 | Node2 copy = nodes[i]; 42 | nodes[i].bounds[4] = copy.bounds[6]; 43 | nodes[i].bounds[5] = copy.bounds[7]; 44 | nodes[i].bounds[6] = copy.bounds[8]; 45 | nodes[i].bounds[7] = copy.bounds[9]; 46 | nodes[i].bounds[8] = copy.bounds[4]; 47 | nodes[i].bounds[9] = copy.bounds[5]; 48 | } 49 | } 50 | 51 | int main(int argc, char** argv) { 52 | std::string ray_file; 53 | std::string bvh_file; 54 | std::string out_file; 55 | float tmin = 0.0f, tmax = 1e9f; 56 | int iters = 1; 57 | int warmup = 0; 58 | bool any_hit = false; 59 | 60 | for (int i = 1; i < argc; i++) { 61 | auto arg = argv[i]; 62 | if (arg[0] == '-') { 63 | if (!strcmp(arg, "-h") || !strcmp(arg, "--help")) { 64 | usage(); 65 | return 0; 66 | } else if (!strcmp(arg, "-bvh") || !strcmp(arg, "--bvh-file")) { 67 | check_argument(i, argc, argv); 68 | bvh_file = argv[++i]; 69 | } else if (!strcmp(arg, "-ray") || !strcmp(arg, "--ray-file")) { 70 | check_argument(i, argc, argv); 71 | ray_file = argv[++i]; 72 | } else if (!strcmp(arg, "--tmin")) { 73 | check_argument(i, argc, argv); 74 | tmin = strtof(argv[++i], nullptr); 75 | } else if (!strcmp(arg, "--tmax")) { 76 | check_argument(i, argc, argv); 77 | tmax = strtof(argv[++i], nullptr); 78 | } else if (!strcmp(arg, "--bench")) { 79 | check_argument(i, argc, argv); 80 | iters = strtol(argv[++i], nullptr, 10); 81 | } else if (!strcmp(arg, "--warmup")) { 82 | check_argument(i, argc, argv); 83 | warmup = strtol(argv[++i], nullptr, 10); 84 | } else if (!strcmp(arg, "-any")) { 85 | any_hit = true; 86 | } else if (!strcmp(arg, "-o") || !strcmp(arg, "--output")) { 87 | check_argument(i, argc, argv); 88 | out_file = argv[++i]; 89 | } else { 90 | std::cerr << "Unknown option '" << arg << "'" << std::endl; 91 | return 1; 92 | } 93 | } else { 94 | std::cerr << "Invalid argument '" << arg << "'" << std::endl; 95 | return 1; 96 | } 97 | } 98 | 99 | if (bvh_file == "") { 100 | std::cerr << "No BVH file specified" << std::endl; 101 | return 1; 102 | } 103 | if (ray_file == "") { 104 | std::cerr << "No ray file specified" << std::endl; 105 | return 1; 106 | } 107 | 108 | anydsl::Array nodes; 109 | anydsl::Array tris; 110 | if (!load_bvh(bvh_file, nodes, tris, BvhType::BVH2_TRI1, anydsl::Platform::Host, anydsl::Device(0))) { 111 | std::cerr << "Cannot load BVH file" << std::endl; 112 | return 1; 113 | } 114 | 115 | transform_nodes(nodes.data(), nodes.size()); 116 | 117 | anydsl::Array rays; 118 | if (!load_rays(ray_file, rays, tmin, tmax, anydsl::Platform::Host, anydsl::Device(0))) { 119 | std::cerr << "Cannot load rays" << std::endl; 120 | return 1; 121 | } 122 | 123 | std::vector hits(rays.size()); 124 | std::vector timings(iters); 125 | 126 | setup_traversal(nodes.data(), nodes.size(), tris.data(), tris.size()); 127 | bench_traversal(rays.data(), hits.data(), rays.size(), nullptr, warmup, any_hit); 128 | bench_traversal(rays.data(), hits.data(), rays.size(), timings.data(), iters, any_hit); 129 | shutdown_traversal(); 130 | 131 | size_t intr = 0; 132 | for (auto& hit : hits) intr += (hit.tri_id >= 0); 133 | 134 | if (out_file != "") { 135 | std::ofstream of(out_file, std::ofstream::binary); 136 | for (auto& hit : hits) 137 | of.write((char*)&hit.t, sizeof(float)); 138 | } 139 | 140 | std::sort(timings.begin(), timings.end()); 141 | auto sum = std::accumulate(timings.begin(), timings.end(), 0.0); 142 | auto avg = sum / timings.size(); 143 | auto med = timings[timings.size() / 2]; 144 | auto min = *std::min_element(timings.begin(), timings.end()); 145 | std::cout << sum << "ms for " << iters << " iteration(s)" << std::endl; 146 | std::cout << rays.size() * iters / (1000.0 * sum) << " Mrays/sec" << std::endl; 147 | std::cout << "# Average: " << avg << " ms" << std::endl; 148 | std::cout << "# Median: " << med << " ms" << std::endl; 149 | std::cout << "# Min: " << min << " ms" << std::endl; 150 | std::cout << intr << " intersection(s)" << std::endl; 151 | return 0; 152 | } 153 | -------------------------------------------------------------------------------- /tools/bench_embree/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(bench_embree 2 | bench_embree.cpp 3 | ../../src/driver/obj.h 4 | ../../src/driver/obj.cpp) 5 | target_include_directories(bench_embree PUBLIC ../common ../../src ${EMBREE_ROOT_DIR}/include ${EMBREE_ROOT_DIR} ${EMBREE_LIBRARY_DIR}) 6 | target_compile_definitions(bench_embree PUBLIC ${EMBREE_DEFINITIONS}) 7 | target_link_libraries(bench_embree ${EMBREE_DEPENDENCIES} ${AnyDSL_runtime_LIBRARIES}) 8 | # Needs the interface file generated by bench_traversal 9 | add_dependencies(bench_embree bench_traversal) 10 | -------------------------------------------------------------------------------- /tools/bench_interface/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(INTERFACE_SRCS 2 | bench_interface.impala 3 | ../../src/render/material.impala 4 | ../../src/render/light.impala 5 | ../../src/render/image.impala 6 | ../../src/core/common.impala 7 | ../../src/core/color.impala 8 | ../../src/core/random.impala 9 | ../../src/core/matrix.impala 10 | ../../src/core/vector.impala) 11 | 12 | anydsl_runtime_wrap(INTERFACE_OBJS 13 | NAME "bench_interface" 14 | CLANG_FLAGS ${CLANG_FLAGS} 15 | FILES ${INTERFACE_SRCS} 16 | INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../common/interface) 17 | 18 | add_executable(bench_interface 19 | ${INTERFACE_OBJS} 20 | bench_interface.cpp 21 | ${CMAKE_CURRENT_SOURCE_DIR}/../common/interface.h) 22 | target_include_directories(bench_interface PUBLIC ../common ../../src/driver) 23 | target_link_libraries(bench_interface ${AnyDSL_runtime_LIBRARIES}) 24 | -------------------------------------------------------------------------------- /tools/bench_interface/bench_interface.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #if defined(__x86_64__) || defined(__amd64__) || defined(_M_X64) 11 | #include 12 | #endif 13 | 14 | #include 15 | 16 | #include "float2.h" 17 | #include "float3.h" 18 | #include "interface.h" 19 | 20 | #define BENCH_CUDA 21 | 22 | #ifdef BENCH_CUDA 23 | // Some external functions will refer to those symbols when compiled on the GPU 24 | extern "C" float __nv_fminf(float a, float b) { return fminf(a, b); } 25 | extern "C" float __nv_fmaxf(float a, float b) { return fmaxf(a, b); } 26 | extern "C" float __nv_sqrtf(float x) { return sqrtf(x); } 27 | extern "C" float __nv_floorf(float x) { return floorf(x); } 28 | #endif 29 | 30 | template 31 | void fill(anydsl::Array& array, T val) { 32 | anydsl::Array copy(array.size()); 33 | std::fill(copy.begin(), copy.end(), val); 34 | anydsl::copy(copy, array); 35 | } 36 | 37 | template 38 | void set(anydsl::Array& array, const std::vector& vals) { 39 | anydsl::Array host_array(vals.size()); 40 | std::copy(vals.begin(), vals.end(), host_array.begin()); 41 | copy(host_array, array); 42 | } 43 | 44 | int main(int argc, char** argv) { 45 | #if defined(__x86_64__) || defined(__amd64__) || defined(_M_X64) 46 | _mm_setcsr(_mm_getcsr() | (_MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON)); 47 | #endif 48 | 49 | #ifdef BENCH_CUDA 50 | auto plt = anydsl::Platform::Cuda; 51 | auto dev = anydsl::Device(0); 52 | #else 53 | auto plt = anydsl::Platform::Host; 54 | auto dev = anydsl::Device(0); 55 | #endif 56 | 57 | auto num_vertices = 4; 58 | auto num_triangles = 2; 59 | 60 | anydsl::Array vertices (plt, dev, num_vertices); 61 | anydsl::Array normals (plt, dev, num_vertices); 62 | anydsl::Array texcoords(plt, dev, num_vertices); 63 | anydsl::Array indices (plt, dev, num_triangles * 4); 64 | 65 | // Create a quad 66 | set(vertices, { 67 | float3(-1.0f, 1.0f, 0.0f), 68 | float3(-1.0f, -1.0f, 0.0f), 69 | float3( 1.0f, -1.0f, 0.0f), 70 | float3( 1.0f, 1.0f, 0.0f) 71 | }); 72 | set(normals, { 73 | float3(0.0f, 0.0f, 1.0f), 74 | float3(0.0f, 0.0f, 1.0f), 75 | float3(0.0f, 0.0f, 1.0f), 76 | float3(0.0f, 0.0f, 1.0f) 77 | }); 78 | set(texcoords, { 79 | float2(-1.0f, 1.0f), 80 | float2(-1.0f, -1.0f), 81 | float2( 1.0f, -1.0f), 82 | float2( 1.0f, 1.0f) 83 | }); 84 | set(indices, { 85 | 0, 86 | 1, 87 | 2, 88 | -1, 89 | 90 | 2, 91 | 3, 92 | 0, 93 | -1 94 | }); 95 | 96 | int width = 1024; 97 | int height = 1024; 98 | anydsl::Array pixels_kd(plt, dev, width * height); 99 | anydsl::Array pixels_ks(plt, dev, width * height); 100 | anydsl::Array pixels_ns(plt, dev, width * height); 101 | 102 | fill(pixels_kd, Color { 0.1f, 0.2f, 0.3f }); 103 | fill(pixels_ks, Color { 1.0f, 0.5f, 0.1f }); 104 | fill(pixels_ns, Color { 0.1f, 0.5f, 1.0f }); 105 | 106 | Tex tex_kd { 107 | pixels_kd.data(), 108 | Color { 0.0f, 0.0f, 0.0f }, 109 | 0, 110 | 1, 111 | width, 112 | height 113 | }; 114 | 115 | Tex tex_ks { 116 | pixels_ks.data(), 117 | Color { 0.5f, 1.0f, 0.2f }, 118 | 2, 119 | 0, 120 | width, 121 | height 122 | }; 123 | 124 | Tex tex_ns { 125 | pixels_ns.data(), 126 | Color { 0.0f, 0.0f, 0.0f }, 127 | 1, 128 | 1, 129 | width, 130 | height 131 | }; 132 | 133 | ShadedMesh mesh { 134 | reinterpret_cast(vertices.data()), 135 | reinterpret_cast(indices.data()), 136 | reinterpret_cast(normals.data()), 137 | reinterpret_cast(texcoords.data()), 138 | tex_kd, 139 | tex_ks, 140 | tex_ns 141 | }; 142 | 143 | size_t N = 1024*1024; 144 | anydsl::Array host_in_dirs(N); 145 | anydsl::Array host_out_dirs(N); 146 | anydsl::Array host_tri_hits(N); 147 | 148 | uint32_t seed = 42; 149 | std::mt19937 gen(seed); 150 | std::uniform_real_distribution rnd(0.0f, 1.0f); 151 | for (size_t i = 0; i < N; ++i) { 152 | host_tri_hits[i].id = i % 2; 153 | host_tri_hits[i].uv.x = rnd(gen); 154 | host_tri_hits[i].uv.y = rnd(gen); 155 | 156 | auto in = normalize(float3(rnd(gen), rnd(gen), rnd(gen))); 157 | auto out = normalize(float3(rnd(gen), rnd(gen), rnd(gen))); 158 | 159 | host_in_dirs[i].x = in.x; 160 | host_in_dirs[i].y = in.y; 161 | host_in_dirs[i].z = in.z; 162 | 163 | host_out_dirs[i].x = out.x; 164 | host_out_dirs[i].y = out.y; 165 | host_out_dirs[i].z = out.z; 166 | } 167 | anydsl::Array colors (plt, dev, N); 168 | anydsl::Array in_dirs (plt, dev, N); 169 | anydsl::Array out_dirs(plt, dev, N); 170 | anydsl::Array tri_hits(plt, dev, N); 171 | anydsl::copy(host_in_dirs, in_dirs); 172 | anydsl::copy(host_out_dirs, out_dirs); 173 | anydsl::copy(host_tri_hits, tri_hits); 174 | 175 | #ifdef BENCH_CUDA 176 | size_t iters = 1000; 177 | #else 178 | size_t iters = 100; 179 | #endif 180 | std::vector times; 181 | for (size_t i = 0; i < iters; ++i) { 182 | auto t0 = std::chrono::high_resolution_clock::now(); 183 | bench_interface(&mesh, tri_hits.data(), in_dirs.data(), out_dirs.data(), colors.data(), N); 184 | auto t1 = std::chrono::high_resolution_clock::now(); 185 | times.push_back(std::chrono::duration_cast(t1 - t0).count()); 186 | } 187 | std::sort(times.begin(), times.end()); 188 | std::cout << N / times[iters/2] << " Mrays/s" << std::endl; 189 | 190 | return 0; 191 | } 192 | -------------------------------------------------------------------------------- /tools/bench_interface/bench_interface.impala: -------------------------------------------------------------------------------- 1 | static border_clamp = 0u32; 2 | static border_repeat = 1u32; 3 | static border_constant = 2u32; 4 | 5 | static sampler_nearest = 0u32; 6 | static sampler_bilinear = 1u32; 7 | 8 | struct Tex { 9 | pixels: &[Color], 10 | border_color: Color, 11 | border: u32, 12 | sampler: u32, 13 | width: i32, 14 | height: i32 15 | } 16 | 17 | struct ShadedMesh { 18 | vertices: &[Vec3], 19 | indices: &[u32], 20 | normals: &[Vec3], 21 | texcoords: &[Vec2], 22 | 23 | tex_kd: Tex, 24 | tex_ks: Tex, 25 | tex_ns: Tex 26 | } 27 | 28 | struct TriHit { 29 | id: i32, 30 | uv: Vec2 31 | } 32 | 33 | struct ShaderInput { 34 | point: Vec3, 35 | face_normal: Vec3, 36 | normal: Vec3, 37 | uv_coords: Vec2, 38 | local: Mat3x3, 39 | kd: Color, 40 | ks: Color, 41 | ns: f32 42 | } 43 | 44 | static iterate = gpu_iterate; 45 | static math = nvvm_intrinsics; 46 | static opt_interface = false; 47 | static opt_tex = false; 48 | 49 | fn cpu_iterate(n: i32, body: fn (i32) -> ()) -> () { 50 | let num_cores = 0; // autodetect 51 | for i in parallel(num_cores, 0, n) { 52 | body(i); 53 | } 54 | } 55 | 56 | fn gpu_iterate(n: i32, body: fn (i32) -> ()) -> () { 57 | let dev = 0; 58 | let grid = (n, 1, 1); 59 | let block = (64, 1, 1); 60 | let acc = nvvm_accelerator(dev); 61 | for work_item in acc.exec(grid, block) { 62 | body(work_item.gidx()); 63 | } 64 | acc.sync(); 65 | } 66 | 67 | extern fn @(opt) lookup_tex(opt: bool, tex: Tex, mut uv: Vec2) -> Color { 68 | if tex.border == border_clamp { 69 | let border = make_clamp_border(); 70 | uv.x = border.horz(math, uv.x); 71 | uv.y = border.vert(math, uv.y); 72 | } else if tex.border == border_repeat { 73 | let border = make_repeat_border(); 74 | uv.x = border.horz(math, uv.x); 75 | uv.y = border.vert(math, uv.y); 76 | } else /* if tex.border == border_constant */ { 77 | if uv.x < 0.0f || uv.x > 1.0f || 78 | uv.y < 0.0f || uv.y > 1.0f { 79 | return(tex.border_color) 80 | } 81 | } 82 | let image = make_image(|x, y| tex.pixels(x + y * tex.width), tex.width, tex.height); 83 | if tex.sampler == sampler_nearest { 84 | let filter = make_nearest_filter(); 85 | filter(math, image, uv) 86 | } else /* if tex.sampler == sampler_bilinear */ { 87 | let filter = make_bilinear_filter(); 88 | filter(math, image, uv) 89 | } 90 | } 91 | 92 | extern fn @(opt) compute_shader_input(opt: bool, mesh: ShadedMesh, tri_hit: TriHit) -> ShaderInput { 93 | let i0 = mesh.indices(tri_hit.id * 4 + 0); 94 | let i1 = mesh.indices(tri_hit.id * 4 + 1); 95 | let i2 = mesh.indices(tri_hit.id * 4 + 2); 96 | let v0 = mesh.vertices(i0); 97 | let v1 = mesh.vertices(i1); 98 | let v2 = mesh.vertices(i2); 99 | let uv = tri_hit.uv; 100 | 101 | let point = vec3_lerp2(v0, v1, v2, uv.x, uv.y); 102 | let face_normal = vec3_normalize(math, vec3_cross(vec3_sub(v1, v0), vec3_sub(v2, v0))); 103 | let normal = vec3_normalize(math, vec3_lerp2(mesh.normals(i0), mesh.normals(i1), mesh.normals(i2), uv.x, uv.y)); 104 | let texcoord = vec2_lerp2(mesh.texcoords(i0), mesh.texcoords(i1), mesh.texcoords(i2), uv.x, uv.y); 105 | 106 | let kd = lookup_tex(opt_tex, mesh.tex_kd, texcoord); 107 | let ks = lookup_tex(opt_tex, mesh.tex_ks, texcoord); 108 | let ns = lookup_tex(opt_tex, mesh.tex_ns, texcoord).r; 109 | 110 | let local = make_orthonormal_mat3x3(normal); 111 | 112 | ShaderInput { 113 | point: point, 114 | face_normal: face_normal, 115 | normal: normal, 116 | uv_coords: uv, 117 | local: local, 118 | kd: kd, 119 | ks: ks, 120 | ns: ns 121 | } 122 | } 123 | 124 | extern fn @(opt) shade(opt: bool, input: ShaderInput, in_dir: Vec3, out_dir: Vec3) -> Color { 125 | let surf = SurfaceElement { 126 | is_entering: true, 127 | point: input.point, 128 | face_normal: input.face_normal, 129 | uv_coords: input.uv_coords, 130 | attr: @ |_| make_vec4(0.0f, 0.0f, 0.0f, 0.0f), 131 | local: input.local 132 | }; 133 | let bsdf = make_diffuse_bsdf(math, surf, input.kd); 134 | bsdf.eval(in_dir, out_dir) 135 | } 136 | 137 | extern fn bench_interface(mesh_ptr: &ShadedMesh, tri_hits: &[TriHit], in_dirs: &[Vec3], out_dirs: &[Vec3], colors: &mut [Color], n: i32) -> () { 138 | let mesh = *mesh_ptr; 139 | for i in iterate(n) { 140 | let input = compute_shader_input(opt_interface, mesh, tri_hits(i)); 141 | colors(i) = shade(opt_interface, input, in_dirs(i), out_dirs(i)); 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /tools/bench_shading/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(SHADING_SRCS 2 | bench_shading.impala 3 | ../../src/render/mapping_cpu.impala 4 | ../../src/render/material.impala 5 | ../../src/render/geometry.impala 6 | ../../src/render/light.impala 7 | ../../src/render/image.impala 8 | ../../src/render/camera.impala 9 | ../../src/render/scene.impala 10 | ../../src/render/renderer.impala 11 | ../../src/render/driver.impala 12 | ../../src/traversal/intersection.impala 13 | ../../src/traversal/mapping_cpu.impala 14 | ../../src/traversal/mapping_gpu.impala 15 | ../../src/traversal/stack.impala 16 | ../../src/core/common.impala 17 | ../../src/core/cpu_common.impala 18 | ../../src/core/sort.impala 19 | ../../src/core/color.impala 20 | ../../src/core/random.impala 21 | ../../src/core/matrix.impala 22 | ../../src/core/vector.impala) 23 | 24 | anydsl_runtime_wrap(SHADING_OBJS 25 | NAME "bench_shading" 26 | CLANG_FLAGS ${CLANG_FLAGS} 27 | FILES ${SHADING_SRCS} 28 | INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../common/shading) 29 | 30 | add_executable(bench_shading 31 | ${SHADING_OBJS} 32 | bench_shading.cpp 33 | ${CMAKE_CURRENT_SOURCE_DIR}/../common/shading.h) 34 | target_include_directories(bench_shading PUBLIC ../common ../../src/driver) 35 | target_link_libraries(bench_shading ${AnyDSL_runtime_LIBRARIES}) 36 | -------------------------------------------------------------------------------- /tools/bench_shading/bench_shading.impala: -------------------------------------------------------------------------------- 1 | static vector_width = 8; 2 | static num_geoms = 4; 3 | static offset = 0.0001f; 4 | static sorted = true; 5 | static specialized = true; 6 | 7 | fn @iterate_rays(primary: &PrimaryStream, begins: &[i32], ends: &[i32], num_geoms: i32, body: fn(i32, i32) -> ()) -> () { 8 | if ?num_geoms & sorted & specialized { 9 | for geom_id in unroll(0, num_geoms) { 10 | let (begin, end) = (begins(geom_id), ends(geom_id)); 11 | for i, vector_width in vectorized_range(vector_width, begin, end) { 12 | @@body(i, geom_id); 13 | } 14 | } 15 | } else { 16 | for i, vector_width in vectorized_range(vector_width, begins(0), ends(num_geoms - 1)) { 17 | @@body(i, primary.geom_id(i)) 18 | } 19 | } 20 | } 21 | 22 | extern fn cpu_bench_shading( primary_in: &PrimaryStream 23 | , primary_out: &PrimaryStream 24 | , vertices: &[Vec3] 25 | , normals: &[Vec3] 26 | , face_normals: &[Vec3] 27 | , texcoords: &[Vec2] 28 | , indices: &[i32] 29 | , pixels: &[u32] 30 | , width: i32 31 | , height: i32 32 | , begins: &[i32] 33 | , ends: &[i32] 34 | , num_tris: i32 35 | , num_iters: i32) -> () { 36 | let read_primary_ray = make_ray_stream_reader(primary_in.rays, 1); 37 | let read_primary_hit = make_primary_stream_hit_reader(*primary_in, 1); 38 | let read_primary_state = make_primary_stream_state_reader(*primary_in, 1); 39 | let write_primary_ray = make_ray_stream_writer(primary_out.rays, 1); 40 | let write_primary_state = make_primary_stream_state_writer(*primary_out, 1); 41 | 42 | let math = cpu_intrinsics; 43 | 44 | let image = make_image_rgba32(@ |i, j| pixels(j * width + i), width, height); 45 | 46 | fn @shader(geom_id: i32, surf: SurfaceElement) -> Material { 47 | let texture = make_texture(math, make_repeat_border(), make_bilinear_filter(), image); 48 | let kd = if geom_id & 1 == 0 { 49 | make_color(0.0f, 1.0f, 0.0f) 50 | } else { 51 | texture(vec4_to_2(surf.attr(0))) 52 | }; 53 | let diffuse = make_diffuse_bsdf(math, surf, kd); 54 | let (ks, ns) = if geom_id & 2 == 0 { 55 | (make_color(0.0f, 1.0f, 0.0f), 96.0f) 56 | } else { 57 | (texture(vec4_to_2(surf.attr(0))), 12.0f) 58 | }; 59 | let specular = make_phong_bsdf(math, surf, ks, ns); 60 | let lum_ks = color_luminance(ks); 61 | let lum_kd = color_luminance(kd); 62 | let k = select(lum_ks + lum_kd == 0.0f, 0.0f, lum_ks / (lum_ks + lum_kd)); 63 | let bsdf = make_mix_bsdf(diffuse, specular, k); 64 | make_material(bsdf) 65 | } 66 | 67 | let tri_mesh = TriMesh { 68 | vertices: @ |i| vertices(i), 69 | normals: @ |i| normals(i), 70 | face_normals: @ |i| face_normals(i), 71 | triangles: @ |i| (indices(i * 4 + 0), indices(i * 4 + 1), indices(i * 4 + 2)), 72 | attrs: @ |_| (false, @ |i| vec2_to_4(texcoords(i), 0.0f, 0.0f)), 73 | num_attrs: 1, 74 | num_tris: num_tris 75 | }; 76 | 77 | for iter in range(0, num_iters) { 78 | for i, geom_id in iterate_rays(primary_in, begins, ends, num_geoms) { 79 | let ray = read_primary_ray(i, 0); 80 | let hit = read_primary_hit(i, 0); 81 | let mut state = read_primary_state(i, 0); 82 | 83 | let geom = make_tri_mesh_geometry(math, tri_mesh, @ |_, _, surf| shader(geom_id, surf)); 84 | let surf = geom.surface_element(ray, hit); 85 | let mat = geom.shader(ray, hit, surf); 86 | 87 | let out_dir = vec3_neg(ray.dir); 88 | let sample = mat.bsdf.sample(&mut state.rnd, out_dir, false); 89 | 90 | let contrib = color_mulf(color_mul(state.contrib, sample.color), sample.cos / sample.pdf); 91 | let mis = if mat.bsdf.is_specular { 0.0f } else { 1.0f / sample.pdf }; 92 | let new_ray = make_ray(surf.point, sample.in_dir, offset, flt_max); 93 | let new_state = make_ray_state( 94 | state.rnd, 95 | contrib, 96 | mis, 97 | state.depth + 1 98 | ); 99 | 100 | write_primary_ray(i, 0, new_ray); 101 | write_primary_state(i, 0, new_state); 102 | } 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /tools/bench_traversal/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(TRAVERSAL_SRCS 2 | bench_traversal.impala 3 | ../../src/traversal/intersection.impala 4 | ../../src/traversal/stack.impala 5 | ../../src/traversal/mapping_cpu.impala 6 | ../../src/traversal/mapping_gpu.impala 7 | ../../src/core/common.impala 8 | ../../src/core/cpu_common.impala 9 | ../../src/core/sort.impala 10 | ../../src/core/matrix.impala 11 | ../../src/core/vector.impala) 12 | 13 | anydsl_runtime_wrap(TRAVERSAL_OBJS 14 | NAME "bench_traversal" 15 | CLANG_FLAGS ${CLANG_FLAGS} 16 | FILES ${TRAVERSAL_SRCS} 17 | INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/../common/traversal) 18 | 19 | add_executable(bench_traversal 20 | ${TRAVERSAL_OBJS} 21 | bench_traversal.cpp 22 | ${CMAKE_CURRENT_SOURCE_DIR}/../common/traversal.h) 23 | target_include_directories(bench_traversal PUBLIC ../common) 24 | target_link_libraries(bench_traversal ${AnyDSL_runtime_LIBRARIES}) 25 | if (EXISTS ${CMAKE_CURRENT_BINARY_DIR}/bench_traversal.nvvm.bc) 26 | add_custom_command(TARGET bench_traversal POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/bench_traversal.nvvm.bc ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) 27 | endif() 28 | -------------------------------------------------------------------------------- /tools/bvh_extractor/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(EXTRACTOR_SRCS 2 | bvh_extractor.cpp 3 | extract_bvh2.cpp 4 | ../../src/driver/obj.cpp 5 | ../../src/driver/obj.h 6 | ../../src/driver/bvh.h) 7 | 8 | if (EMBREE_FOUND) 9 | set(EXTRACTOR_SRCS ${EXTRACTOR_SRCS} 10 | extract_bvh4_8.cpp 11 | ../../src/driver/embree_bvh.h) 12 | endif () 13 | 14 | add_executable(bvh_extractor ${EXTRACTOR_SRCS}) 15 | target_include_directories(bvh_extractor PUBLIC ../common ../../src) 16 | 17 | if (EMBREE_FOUND) 18 | target_include_directories(bvh_extractor PUBLIC ${EMBREE_ROOT_DIR}/include ${EMBREE_ROOT_DIR} ${EMBREE_LIBRARY_DIR}) 19 | target_compile_definitions(bvh_extractor PUBLIC ${EMBREE_DEFINITIONS}) 20 | target_link_libraries(bvh_extractor ${EMBREE_DEPENDENCIES}) 21 | target_compile_definitions(bvh_extractor PUBLIC -DENABLE_EMBREE_BVH) 22 | endif () 23 | 24 | # Needs the interface file generated by bench_traversal 25 | add_dependencies(bvh_extractor bench_traversal) 26 | -------------------------------------------------------------------------------- /tools/bvh_extractor/bvh_extractor.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "driver/obj.h" 7 | #include "driver/file_path.h" 8 | #include "driver/bvh.h" 9 | 10 | #ifdef ENABLE_EMBREE_BVH 11 | size_t build_bvh8(std::ofstream&, const obj::TriMesh&); 12 | size_t build_bvh4(std::ofstream&, const obj::TriMesh&); 13 | #endif 14 | size_t build_bvh2(std::ofstream&, const obj::TriMesh&); 15 | 16 | inline void check_argument(int i, int argc, char** argv) { 17 | if (i + 1 >= argc) { 18 | std::cerr << "Missing argument for " << argv[i] << std::endl; 19 | exit(1); 20 | } 21 | } 22 | 23 | inline void usage() { 24 | std::cout << "Usage: bvh_extractor [options]\n" 25 | "Available options:\n" 26 | " -obj --obj-file Sets the OBJ file to use\n" 27 | " -o --output Sets the output file name\n"; 28 | } 29 | 30 | int main(int argc, char** argv) { 31 | std::string obj_file, out_file; 32 | for (int i = 1; i < argc; i++) { 33 | auto arg = argv[i]; 34 | if (arg[0] == '-') { 35 | if (!strcmp(arg, "-h") || !strcmp(arg, "--help")) { 36 | usage(); 37 | return 0; 38 | } else if (!strcmp(arg, "-obj") || !strcmp(arg, "--obj-file")) { 39 | check_argument(i, argc, argv); 40 | obj_file = argv[++i]; 41 | } else if (!strcmp(arg, "-o") || !strcmp(arg, "--output")) { 42 | check_argument(i, argc, argv); 43 | out_file = argv[++i]; 44 | } else { 45 | std::cerr << "Unknown option '" << arg << "'" << std::endl; 46 | return 1; 47 | } 48 | } else { 49 | std::cerr << "Invalid argument '" << arg << "'" << std::endl; 50 | return 1; 51 | } 52 | } 53 | 54 | if (obj_file == "") { 55 | std::cerr << "No OBJ file specified" << std::endl; 56 | return 1; 57 | } 58 | if (out_file == "") { 59 | std::cerr << "No output file specified" << std::endl; 60 | return 1; 61 | } 62 | 63 | FilePath path(obj_file); 64 | obj::File obj; 65 | if (!load_obj(obj_file, obj)) { 66 | std::cerr << "Cannot load OBJ file" << std::endl; 67 | return 1; 68 | } 69 | obj::TriMesh tri_mesh = compute_tri_mesh(obj, 0); 70 | 71 | std::cout << "Loaded OBJ file with " << tri_mesh.indices.size() / 4 << " triangle(s)" << std::endl; 72 | 73 | std::ofstream out(out_file, std::ofstream::binary); 74 | if (!out) { 75 | std::cerr << "Cannot create output file" << std::endl; 76 | return 1; 77 | } 78 | 79 | uint32_t magic = 0x95CBED1F; 80 | out.write((char*)&magic, sizeof(uint32_t)); 81 | 82 | #ifdef ENABLE_EMBREE_BVH 83 | auto bvh8_nodes = build_bvh8(out, tri_mesh); 84 | if (!bvh8_nodes) { 85 | std::cerr << "Cannot build a BVH8 using Embree" << std::endl; 86 | return 1; 87 | } 88 | 89 | std::cout << "BVH8 successfully built (" << bvh8_nodes << " nodes)" << std::endl; 90 | 91 | auto bvh4_nodes = build_bvh4(out, tri_mesh); 92 | if (!bvh4_nodes) { 93 | std::cerr << "Cannot build a BVH4 using Embree" << std::endl; 94 | return 1; 95 | } 96 | 97 | std::cout << "BVH4 successfully built (" << bvh4_nodes << " nodes)" << std::endl; 98 | #else 99 | std::cout << "Compiled without Embree. Will only build a GPU BVH." << std::endl; 100 | #endif 101 | 102 | auto bvh2_nodes = build_bvh2(out, tri_mesh); 103 | if (!bvh2_nodes) { 104 | std::cerr << "Cannot build a BVH2" << std::endl; 105 | return 1; 106 | } 107 | 108 | std::cout << "BVH2 successfully built (" << bvh2_nodes << " nodes)" << std::endl; 109 | 110 | return 0; 111 | } 112 | -------------------------------------------------------------------------------- /tools/bvh_extractor/extract_bvh2.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "traversal.h" 4 | #include "driver/bvh.h" 5 | #include "driver/obj.h" 6 | 7 | class Bvh2Builder { 8 | public: 9 | Bvh2Builder(std::vector& nodes, std::vector& tris) 10 | : nodes_(nodes), tris_(tris) 11 | {} 12 | 13 | void build(const std::vector& tris) { 14 | builder_.build(tris, NodeWriter(this), LeafWriter(this, tris), 2); 15 | } 16 | 17 | #ifdef STATISTICS 18 | void print_stats() const { builder_.print_stats(); } 19 | #endif 20 | 21 | private: 22 | struct CostFn { 23 | static float leaf_cost(int count, float area) { 24 | return count * area; 25 | } 26 | static float traversal_cost(float area) { 27 | return area * 1.0f; 28 | } 29 | }; 30 | 31 | struct NodeWriter { 32 | Bvh2Builder* builder; 33 | 34 | NodeWriter(Bvh2Builder* builder) 35 | : builder(builder) 36 | {} 37 | 38 | template 39 | int operator() (int parent, int child, const BBox& parent_bb, int count, BBoxFn bboxes) { 40 | auto& nodes = builder->nodes_; 41 | 42 | int i = nodes.size(); 43 | nodes.emplace_back(); 44 | 45 | if (parent >= 0 && child >= 0) 46 | nodes[parent].child[child] = i + 1; 47 | 48 | assert(count == 2); 49 | 50 | const BBox& bbox1 = bboxes(0); 51 | nodes[i].bounds[0] = bbox1.min.x; 52 | nodes[i].bounds[2] = bbox1.min.y; 53 | nodes[i].bounds[4] = bbox1.min.z; 54 | nodes[i].bounds[1] = bbox1.max.x; 55 | nodes[i].bounds[3] = bbox1.max.y; 56 | nodes[i].bounds[5] = bbox1.max.z; 57 | 58 | const BBox& bbox2 = bboxes(1); 59 | nodes[i].bounds[ 6] = bbox2.min.x; 60 | nodes[i].bounds[ 8] = bbox2.min.y; 61 | nodes[i].bounds[10] = bbox2.min.z; 62 | nodes[i].bounds[ 7] = bbox2.max.x; 63 | nodes[i].bounds[ 9] = bbox2.max.y; 64 | nodes[i].bounds[11] = bbox2.max.z; 65 | 66 | return i; 67 | } 68 | }; 69 | 70 | struct LeafWriter { 71 | Bvh2Builder* builder; 72 | const std::vector& ref_tris; 73 | 74 | LeafWriter(Bvh2Builder* builder, const std::vector& ref_tris) 75 | : builder(builder), ref_tris(ref_tris) 76 | {} 77 | 78 | template 79 | void operator() (int parent, int child, const BBox& leaf_bb, int ref_count, RefFn refs) { 80 | auto& nodes = builder->nodes_; 81 | auto& tris = builder->tris_; 82 | 83 | nodes[parent].child[child] = ~tris.size(); 84 | 85 | for (int i = 0; i < ref_count; i++) { 86 | const int ref = refs(i); 87 | const Tri& tri = ref_tris[ref]; 88 | auto e1 = tri.v0 - tri.v1; 89 | auto e2 = tri.v2 - tri.v0; 90 | auto n = cross(e1, e2); 91 | tris.emplace_back(Tri1 { 92 | { tri.v0.x, tri.v0.y, tri.v0.z}, 0, 93 | { e1.x, e1.y, e1.z}, 0, 94 | { e2.x, e2.y, e2.z}, ref 95 | }); 96 | } 97 | 98 | // Add sentinel 99 | tris.back().prim_id |= 0x80000000; 100 | } 101 | }; 102 | 103 | SplitBvhBuilder<2, CostFn> builder_; 104 | std::vector& nodes_; 105 | std::vector& tris_; 106 | }; 107 | 108 | size_t build_bvh2(std::ofstream& out, const obj::TriMesh& tri_mesh) { 109 | std::vector tris; 110 | for (size_t i = 0; i < tri_mesh.indices.size(); i += 4) { 111 | auto& v0 = tri_mesh.vertices[tri_mesh.indices[i + 0]]; 112 | auto& v1 = tri_mesh.vertices[tri_mesh.indices[i + 1]]; 113 | auto& v2 = tri_mesh.vertices[tri_mesh.indices[i + 2]]; 114 | tris.emplace_back(v0, v1, v2); 115 | } 116 | 117 | std::vector new_nodes; 118 | std::vector new_tris; 119 | Bvh2Builder builder(new_nodes, new_tris); 120 | 121 | builder.build(tris); 122 | 123 | uint64_t offset = sizeof(uint32_t) * 3 + 124 | sizeof(Node2) * new_nodes.size() + 125 | sizeof(Tri1) * new_tris.size(); 126 | uint32_t block_type = 1; 127 | uint32_t num_nodes = new_nodes.size(); 128 | uint32_t num_tris = new_tris.size(); 129 | 130 | out.write((char*)&offset, sizeof(uint64_t)); 131 | out.write((char*)&block_type, sizeof(uint32_t)); 132 | out.write((char*)&num_nodes, sizeof(uint32_t)); 133 | out.write((char*)&num_tris, sizeof(uint32_t)); 134 | out.write((char*)new_nodes.data(), sizeof(Node2) * new_nodes.size()); 135 | out.write((char*)new_tris.data(), sizeof(Tri1) * new_tris.size()); 136 | 137 | return new_nodes.size(); 138 | } 139 | -------------------------------------------------------------------------------- /tools/bvh_extractor/extract_bvh4_8.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "traversal.h" 5 | #include "load_bvh.h" 6 | #include "driver/embree_bvh.h" 7 | #include "driver/obj.h" 8 | 9 | template 10 | void write_embree_bvh(std::ofstream& out, const std::vector& nodes, const std::vector& tris) { 11 | uint64_t offset = sizeof(uint32_t) * 3 + 12 | sizeof(BvhNode) * nodes.size() + 13 | sizeof(BvhTri) * tris.size(); 14 | uint32_t block_type = uint32_t(N == 4 ? BvhType::BVH4_TRI4 : BvhType::BVH8_TRI4); 15 | uint32_t num_nodes = nodes.size(); 16 | uint32_t num_tris = tris.size(); 17 | 18 | out.write((char*)&offset, sizeof(uint64_t)); 19 | out.write((char*)&block_type, sizeof(uint32_t)); 20 | out.write((char*)&num_nodes, sizeof(uint32_t)); 21 | out.write((char*)&num_tris, sizeof(uint32_t)); 22 | out.write((char*)nodes.data(), sizeof(BvhNode) * nodes.size()); 23 | out.write((char*)tris.data(), sizeof(BvhTri) * tris.size()); 24 | } 25 | 26 | size_t build_bvh4(std::ofstream& out, const obj::TriMesh& tri_mesh) { 27 | std::vector nodes; 28 | std::vector tris; 29 | if (!build_embree_bvh<4>(tri_mesh, nodes, tris)) 30 | return 0; 31 | write_embree_bvh<4>(out, nodes, tris); 32 | return nodes.size(); 33 | } 34 | 35 | size_t build_bvh8(std::ofstream& out, const obj::TriMesh& tri_mesh) { 36 | std::vector nodes; 37 | std::vector tris; 38 | if (!build_embree_bvh<8>(tri_mesh, nodes, tris)) 39 | return 0; 40 | write_embree_bvh<8>(out, nodes, tris); 41 | return nodes.size(); 42 | } 43 | -------------------------------------------------------------------------------- /tools/common/load_bvh.h: -------------------------------------------------------------------------------- 1 | #ifndef LOAD_BVH_H 2 | #define LOAD_BVH_H 3 | 4 | #include 5 | #include 6 | #include "traversal.h" 7 | 8 | enum class BvhType : uint32_t { 9 | BVH2_TRI1 = 1, 10 | BVH4_TRI4 = 2, 11 | BVH8_TRI4 = 3 12 | }; 13 | 14 | namespace detail { 15 | 16 | struct BvhHeader { 17 | uint32_t node_count; 18 | uint32_t tri_count; 19 | }; 20 | 21 | inline bool check_header(std::istream& is) { 22 | uint32_t magic; 23 | is.read((char*)&magic, sizeof(uint32_t)); 24 | return magic == 0x95CBED1F; 25 | } 26 | 27 | inline bool locate_block(std::istream& is, BvhType type) { 28 | uint32_t block_type; 29 | uint64_t offset = 0; 30 | do { 31 | is.seekg(offset, std::istream::cur); 32 | 33 | is.read((char*)&offset, sizeof(uint64_t)); 34 | if (is.gcount() != sizeof(std::uint64_t)) return false; 35 | is.read((char*)&block_type, sizeof(uint32_t)); 36 | if (is.gcount() != sizeof(uint32_t)) return false; 37 | 38 | offset -= sizeof(uint32_t); 39 | } while (!is.eof() && block_type != (uint32_t)type); 40 | 41 | return static_cast(is); 42 | } 43 | 44 | } // namespace detail 45 | 46 | template 47 | inline bool load_bvh(const std::string& filename, 48 | anydsl::Array& nodes, 49 | anydsl::Array& tris, 50 | BvhType bvh_type, 51 | anydsl::Platform platform, 52 | anydsl::Device device) { 53 | std::ifstream in(filename, std::ifstream::binary); 54 | if (!in || !detail::check_header(in) || !detail::locate_block(in, bvh_type)) 55 | return false; 56 | 57 | detail::BvhHeader header; 58 | in.read((char*)&header, sizeof(detail::BvhHeader)); 59 | auto host_nodes = std::move(anydsl::Array(header.node_count)); 60 | auto host_tris = std::move(anydsl::Array(header.tri_count )); 61 | in.read((char*)host_nodes.data(), sizeof(Node) * header.node_count); 62 | in.read((char*)host_tris.data(), sizeof(Tri) * header.tri_count); 63 | 64 | if (platform != anydsl::Platform::Host) { 65 | nodes = std::move(anydsl::Array(platform, device, header.node_count)); 66 | tris = std::move(anydsl::Array(platform, device, header.tri_count )); 67 | anydsl::copy(host_nodes, nodes); 68 | anydsl::copy(host_tris, tris); 69 | } else { 70 | nodes = std::move(host_nodes); 71 | tris = std::move(host_tris); 72 | } 73 | return true; 74 | } 75 | 76 | #endif // LOAD_BVH_H 77 | -------------------------------------------------------------------------------- /tools/common/load_rays.h: -------------------------------------------------------------------------------- 1 | #ifndef LOAD_RAYS_H 2 | #define LOAD_RAYS_H 3 | 4 | #include 5 | #include 6 | 7 | template 8 | struct RayTraits {}; 9 | 10 | struct Ray1; 11 | template <> 12 | struct RayTraits { 13 | enum { RayPerPacket = 1 }; 14 | static void write_ray(const float* org_dir, float tmin, float tmax, int /*j*/, Ray1& ray) { 15 | ray.org[0] = org_dir[0]; 16 | ray.org[1] = org_dir[1]; 17 | ray.org[2] = org_dir[2]; 18 | ray.dir[0] = org_dir[3]; 19 | ray.dir[1] = org_dir[4]; 20 | ray.dir[2] = org_dir[5]; 21 | ray.tmin = tmin; 22 | ray.tmax = tmax; 23 | } 24 | }; 25 | 26 | struct Ray4; 27 | template <> 28 | struct RayTraits { 29 | enum { RayPerPacket = 4 }; 30 | static void write_ray(const float* org_dir, float tmin, float tmax, int j, Ray4& ray) { 31 | ray.org[0][j] = org_dir[0]; 32 | ray.org[1][j] = org_dir[1]; 33 | ray.org[2][j] = org_dir[2]; 34 | ray.dir[0][j] = org_dir[3]; 35 | ray.dir[1][j] = org_dir[4]; 36 | ray.dir[2][j] = org_dir[5]; 37 | ray.tmin[j] = tmin; 38 | ray.tmax[j] = tmax; 39 | } 40 | }; 41 | 42 | struct Ray8; 43 | template <> 44 | struct RayTraits { 45 | enum { RayPerPacket = 8 }; 46 | static void write_ray(const float* org_dir, float tmin, float tmax, int j, Ray8& ray) { 47 | ray.org[0][j] = org_dir[0]; 48 | ray.org[1][j] = org_dir[1]; 49 | ray.org[2][j] = org_dir[2]; 50 | ray.dir[0][j] = org_dir[3]; 51 | ray.dir[1][j] = org_dir[4]; 52 | ray.dir[2][j] = org_dir[5]; 53 | ray.tmin[j] = tmin; 54 | ray.tmax[j] = tmax; 55 | } 56 | }; 57 | 58 | template 59 | inline bool load_rays(const std::string& filename, 60 | anydsl::Array& rays, 61 | float tmin, float tmax, 62 | anydsl::Platform platform, 63 | anydsl::Device device) { 64 | std::ifstream in(filename, std::ifstream::binary); 65 | if (!in) return false; 66 | 67 | in.seekg(0, std::ios_base::end); 68 | auto size = in.tellg(); 69 | in.seekg(0, std::ios_base::beg); 70 | 71 | if (size % (sizeof(float) * 6) != 0) return false; 72 | 73 | auto rays_per_packet = RayTraits::RayPerPacket; 74 | auto ray_count = size / (rays_per_packet * sizeof(float) * 6); 75 | anydsl::Array host_rays(ray_count); 76 | 77 | for (size_t i = 0; i < ray_count; i++) { 78 | for (int j = 0; j < rays_per_packet; j++) { 79 | float org_dir[6]; 80 | in.read((char*)org_dir, sizeof(float) * 6); 81 | RayTraits::write_ray(org_dir, tmin, tmax, j, host_rays[i]); 82 | } 83 | } 84 | 85 | if (platform != anydsl::Platform::Host) { 86 | rays = std::move(anydsl::Array(platform, device, ray_count)); 87 | anydsl::copy(host_rays, rays); 88 | } else { 89 | rays = std::move(host_rays); 90 | } 91 | return true; 92 | } 93 | 94 | #endif // LOAD_RAYS_H 95 | -------------------------------------------------------------------------------- /tools/fbuf2png/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(fbuf2png fbuf2png.cpp) 2 | target_include_directories(fbuf2png PUBLIC ${PNG_INCLUDE_DIRS}) 3 | target_link_libraries(fbuf2png ${PNG_LIBRARIES}) 4 | -------------------------------------------------------------------------------- /tools/fbuf2png/fbuf2png.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | static void write_to_stream(png_structp png_ptr, png_bytep data, png_size_t length) { 9 | png_voidp a = png_get_io_ptr(png_ptr); 10 | ((std::ostream*)a)->write((const char*)data, length); 11 | } 12 | 13 | static void flush_stream(png_structp png_ptr) { 14 | // Nothing to do 15 | } 16 | 17 | inline void check_argument(int i, int argc, char** argv) { 18 | if (i + 1 >= argc) { 19 | std::cerr << "Missing argument for " << argv[i] << std::endl; 20 | exit(1); 21 | } 22 | } 23 | 24 | inline void usage() { 25 | std::cout << "Usage: fbuf2png [options] input output\n" 26 | "Available options:\n" 27 | " -sx --width Sets the width of the image (default: 1024)\n" 28 | " -sy --height Sets the height of the image (default: 1024)\n" 29 | " -n --normalize Normalizes the values contained in the image (disabled by default)\n"; 30 | } 31 | 32 | int main(int argc, char** argv) { 33 | bool normalize = false; 34 | int width = 1024; 35 | int height = 1024; 36 | std::vector files; 37 | 38 | for (int i = 1; i < argc; i++) { 39 | auto arg = argv[i]; 40 | if (arg[0] == '-') { 41 | if (!strcmp(arg, "-h") || !strcmp(arg, "--help")) { 42 | usage(); 43 | return 0; 44 | } else if (!strcmp(arg, "-n") || !strcmp(arg, "--normalize")) { 45 | normalize = true; 46 | } else if (!strcmp(arg, "-sx") || !strcmp(arg, "--width")) { 47 | check_argument(i, argc, argv); 48 | width = strtol(argv[++i], nullptr, 10); 49 | } else if (!strcmp(arg, "-sy") || !strcmp(arg, "--height")) { 50 | check_argument(i, argc, argv); 51 | height = strtol(argv[++i], nullptr, 10); 52 | } else { 53 | std::cerr << "Unknown option '" << arg << "'" << std::endl; 54 | return 1; 55 | } 56 | } else { 57 | files.push_back(argv[i]); 58 | } 59 | } 60 | 61 | if (files.size() < 2) { 62 | std::cerr << "Missing input or output file" << std::endl; 63 | return 1; 64 | } 65 | if (files.size() > 2) { 66 | std::cerr << "Too many arguments" << std::endl; 67 | return 1; 68 | } 69 | 70 | std::ifstream fbuf_file(files[0], std::ofstream::binary); 71 | std::ofstream png_file(files[1], std::ofstream::binary); 72 | if (!fbuf_file || !png_file) 73 | return 1; 74 | 75 | // Read fbuf file and convert it to an image 76 | std::vector float_image(width * height); 77 | if (!fbuf_file.read((char*)float_image.data(), width * height * sizeof(float))) { 78 | std::cerr << "Not enough data in the float buffer" << std::endl; 79 | return 1; 80 | } 81 | const float tmax = normalize ? *std::max_element(float_image.begin(), float_image.end()) : 1.0f; 82 | 83 | png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); 84 | if (!png_ptr) { 85 | return 1; 86 | } 87 | 88 | png_infop info_ptr = png_create_info_struct(png_ptr); 89 | if (!info_ptr) { 90 | png_destroy_read_struct(&png_ptr, nullptr, nullptr); 91 | return 1; 92 | } 93 | 94 | std::vector row(width * 4); 95 | if (setjmp(png_jmpbuf(png_ptr))) { 96 | png_destroy_write_struct(&png_ptr, &info_ptr); 97 | return 1; 98 | } 99 | 100 | png_set_write_fn(png_ptr, &png_file, write_to_stream, flush_stream); 101 | 102 | png_set_IHDR(png_ptr, info_ptr, width, height, 103 | 8, PNG_COLOR_TYPE_RGBA, PNG_INTERLACE_NONE, 104 | PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE); 105 | 106 | png_write_info(png_ptr, info_ptr); 107 | 108 | for (int y = 0; y < height; y++) { 109 | for (int x = 0; x < width; x++) { 110 | uint8_t c = 255.0f * float_image[y * width + x] / tmax; 111 | row[x * 4 + 0] = c; 112 | row[x * 4 + 1] = c; 113 | row[x * 4 + 2] = c; 114 | row[x * 4 + 3] = 255.0f; 115 | } 116 | png_write_row(png_ptr, row.data()); 117 | } 118 | 119 | png_write_end(png_ptr, info_ptr); 120 | png_destroy_write_struct(&png_ptr, &info_ptr); 121 | 122 | return 0; 123 | } 124 | -------------------------------------------------------------------------------- /tools/ray_gen/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(ray_gen ray_gen.cpp) 2 | target_include_directories(ray_gen PUBLIC ../common ../../src) 3 | target_link_libraries(ray_gen PUBLIC ${AnyDSL_runtime_LIBRARIES}) 4 | # Needs the interface file generated by bench_traversal 5 | add_dependencies(ray_gen bench_traversal) 6 | --------------------------------------------------------------------------------