├── requirements.txt ├── examples ├── data │ ├── albedo.exr │ ├── beauty.exr │ ├── glossy.exr │ ├── normal.exr │ ├── diffuse.exr │ └── specular.exr ├── hello.h ├── triangle.h ├── curves.h ├── sphere.h ├── simpleMotionBlur.h ├── hello.cu ├── random.h ├── helpers.h ├── triangle.cu ├── curves.cu ├── simpleMotionBlur.cu ├── sphere.cu ├── denoiser.py ├── hello.py ├── triangle.py ├── sphere.py ├── curves.py └── simpleMotionBlur.py ├── optix ├── path_util.py.in ├── CMakeLists.txt ├── CMake │ └── FindOptiX.cmake └── setup.py ├── test ├── conftest.py ├── test_pipeline.py ├── test_context.py ├── test_program_group.py ├── test_module.py ├── util │ └── tutil.py └── sample_ptx.py ├── LICENSE.txt ├── README.md └── CMakeLists.txt /requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | cupy 3 | numpy 4 | Pillow 5 | cuda-python 6 | -------------------------------------------------------------------------------- /examples/data/albedo.exr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/otk-pyoptix/HEAD/examples/data/albedo.exr -------------------------------------------------------------------------------- /examples/data/beauty.exr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/otk-pyoptix/HEAD/examples/data/beauty.exr -------------------------------------------------------------------------------- /examples/data/glossy.exr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/otk-pyoptix/HEAD/examples/data/glossy.exr -------------------------------------------------------------------------------- /examples/data/normal.exr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/otk-pyoptix/HEAD/examples/data/normal.exr -------------------------------------------------------------------------------- /examples/data/diffuse.exr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/otk-pyoptix/HEAD/examples/data/diffuse.exr -------------------------------------------------------------------------------- /examples/data/specular.exr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/otk-pyoptix/HEAD/examples/data/specular.exr -------------------------------------------------------------------------------- /optix/path_util.py.in: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | include_path = "${OptiX_INCLUDE_DIR}" 4 | cuda_tk_path = "${CUDAToolkit_INCLUDE_DIRS}" 5 | stddef_path = "${OptiX_STDDEF_DIR}" 6 | 7 | -------------------------------------------------------------------------------- /test/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 NVIDIA CORPORATION All rights reserved. 2 | # Use of this source code is governed by a BSD-style 3 | # license that can be found in the LICENSE file. 4 | 5 | 6 | import sys 7 | import os 8 | sys.path.append(os.path.join(os.path.dirname(__file__), 'util')) 9 | -------------------------------------------------------------------------------- /test/test_pipeline.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 NVIDIA CORPORATION All rights reserved. 2 | # Use of this source code is governed by a BSD-style 3 | # license that can be found in the LICENSE file. 4 | 5 | 6 | 7 | import optix 8 | import pytest 9 | import cupy as cp 10 | 11 | import tutil 12 | 13 | 14 | 15 | 16 | class TestPipeline: 17 | 18 | def test_pipeline_options( self ): 19 | 20 | pipeline_options = optix.PipelineCompileOptions() 21 | pipeline_options.usesMotionBlur = False 22 | pipeline_options.traversableGraphFlags = optix.TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING 23 | pipeline_options.numPayloadValues = 2 24 | pipeline_options.numAttributeValues = 2 25 | pipeline_options.exceptionFlags = optix.EXCEPTION_FLAG_NONE 26 | pipeline_options.pipelineLaunchParamsVariableName = "params1" 27 | assert pipeline_options.pipelineLaunchParamsVariableName == "params1" 28 | 29 | 30 | pipeline_options = optix.PipelineCompileOptions( 31 | usesMotionBlur = False, 32 | traversableGraphFlags = optix.TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING, 33 | numPayloadValues = 3, 34 | numAttributeValues = 4, 35 | exceptionFlags = optix.EXCEPTION_FLAG_NONE, 36 | pipelineLaunchParamsVariableName = "params2" 37 | ) 38 | assert pipeline_options.pipelineLaunchParamsVariableName == "params2" 39 | 40 | 41 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions 5 | are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of NVIDIA CORPORATION nor the names of its 12 | contributors may be used to endorse or promote products derived 13 | from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | -------------------------------------------------------------------------------- /examples/hello.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions 6 | // are met: 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of NVIDIA CORPORATION nor the names of its 13 | // contributors may be used to endorse or promote products derived 14 | // from this software without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | // 28 | 29 | struct Params 30 | { 31 | uchar4* image; 32 | unsigned int image_width; 33 | }; 34 | 35 | struct RayGenData 36 | { 37 | float r,g,b; 38 | }; 39 | -------------------------------------------------------------------------------- /optix/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. 2 | # Use of this source code is governed by a BSD-style 3 | # license that can be found in the LICENSE file. 4 | 5 | cmake_minimum_required(VERSION 3.17) 6 | project(optix) 7 | 8 | #------------------------------------------------------------------------------ 9 | # Fetch external dependencies 10 | #------------------------------------------------------------------------------ 11 | include(FetchContent) 12 | 13 | message(VERBOSE "Finding pybind11...") 14 | FetchContent_Declare( 15 | pybind11 16 | GIT_REPOSITORY https://github.com/pybind/pybind11 17 | GIT_TAG v2.9.2 18 | GIT_SHALLOW TRUE 19 | ) 20 | FetchContent_GetProperties(pybind11) 21 | if(NOT pybind11_POPULATED) 22 | FetchContent_Populate(pybind11) 23 | add_subdirectory(${pybind11_SOURCE_DIR} ${pybind11_BINARY_DIR}) 24 | endif() 25 | 26 | #------------------------------------------------------------------------------ 27 | # set environment 28 | #------------------------------------------------------------------------------ 29 | 30 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/CMake) 31 | 32 | if(NOT TARGET OptiX::OptiX) 33 | find_package( OptiX REQUIRED ) 34 | endif() 35 | if(NOT TARGET CUDA::cuda_driver) 36 | find_package( CUDAToolkit 10.0 REQUIRED ) 37 | endif() 38 | 39 | configure_file("${CMAKE_SOURCE_DIR}/path_util.py.in" "${CMAKE_SOURCE_DIR}/../examples/path_util.py") 40 | 41 | 42 | #------------------------------------------------------------------------------ 43 | # main build 44 | #------------------------------------------------------------------------------ 45 | 46 | pybind11_add_module(optix main.cpp) 47 | 48 | target_link_libraries( optix PRIVATE 49 | OptiX::OptiX 50 | CUDA::cuda_driver 51 | CUDA::cudart 52 | ) 53 | target_compile_features( optix PRIVATE 54 | cxx_std_17 55 | ) 56 | add_custom_command( 57 | TARGET optix POST_BUILD 58 | COMMAND ${CMAKE_COMMAND} -E copy_directory 59 | ${OptiX_INCLUDE_DIR} 60 | $/include 61 | ) 62 | 63 | -------------------------------------------------------------------------------- /examples/triangle.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions 6 | // are met: 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of NVIDIA CORPORATION nor the names of its 13 | // contributors may be used to endorse or promote products derived 14 | // from this software without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | // 28 | 29 | struct Params 30 | { 31 | uchar4* image; 32 | unsigned int image_width; 33 | unsigned int image_height; 34 | float3 cam_eye; 35 | float3 cam_u, cam_v, cam_w; 36 | OptixTraversableHandle handle; 37 | }; 38 | 39 | 40 | struct RayGenData 41 | { 42 | // No data needed 43 | }; 44 | 45 | 46 | struct MissData 47 | { 48 | float3 bg_color; 49 | }; 50 | 51 | 52 | struct HitGroupData 53 | { 54 | // No data needed 55 | }; 56 | -------------------------------------------------------------------------------- /examples/curves.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions 6 | // are met: 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of NVIDIA CORPORATION nor the names of its 13 | // contributors may be used to endorse or promote products derived 14 | // from this software without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | // 28 | 29 | #include 30 | #include 31 | 32 | struct Params 33 | { 34 | uchar4* image; 35 | unsigned int image_width; 36 | unsigned int image_height; 37 | float3 cam_eye; 38 | float3 cam_u, cam_v, cam_w; 39 | OptixTraversableHandle handle; 40 | }; 41 | 42 | struct RayGenData 43 | { 44 | // No data needed 45 | }; 46 | 47 | 48 | struct MissData 49 | { 50 | float3 bg_color; 51 | }; 52 | 53 | 54 | struct HitGroupData 55 | { 56 | // No data needed 57 | }; 58 | -------------------------------------------------------------------------------- /examples/sphere.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions 6 | // are met: 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of NVIDIA CORPORATION nor the names of its 13 | // contributors may be used to endorse or promote products derived 14 | // from this software without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | // 28 | 29 | struct Sphere 30 | { 31 | float center_x, center_y, center_z; 32 | float radius; 33 | }; 34 | 35 | 36 | struct Params 37 | { 38 | uchar4* image; 39 | unsigned int image_width; 40 | unsigned int image_height; 41 | int origin_x; 42 | int origin_y; 43 | OptixTraversableHandle handle; 44 | }; 45 | 46 | 47 | struct RayGenData 48 | { 49 | float3 cam_eye; 50 | float3 camera_u, camera_v, camera_w; 51 | }; 52 | 53 | 54 | struct MissData 55 | { 56 | float r, g, b; 57 | }; 58 | 59 | 60 | struct SphereHitGroupData 61 | { 62 | Sphere sphere; 63 | }; -------------------------------------------------------------------------------- /examples/simpleMotionBlur.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions 6 | // are met: 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of NVIDIA CORPORATION nor the names of its 13 | // contributors may be used to endorse or promote products derived 14 | // from this software without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | // 28 | 29 | enum RayType 30 | { 31 | RAY_TYPE_RADIANCE = 0, 32 | RAY_TYPE_COUNT 33 | }; 34 | 35 | 36 | struct Params 37 | { 38 | unsigned int width; 39 | unsigned int height; 40 | float4* accum_buffer; 41 | uchar4* frame_buffer; 42 | unsigned int subframe_index; 43 | 44 | float3 eye; 45 | float3 U; 46 | float3 V; 47 | float3 W; 48 | 49 | OptixTraversableHandle handle; 50 | }; 51 | 52 | 53 | struct RayGenData 54 | { 55 | }; 56 | 57 | 58 | struct MissData 59 | { 60 | float3 color; 61 | unsigned int pad; 62 | }; 63 | 64 | 65 | struct SphereData 66 | { 67 | float3 center; 68 | float radius; 69 | }; 70 | 71 | 72 | struct HitGroupData 73 | { 74 | float3 color; 75 | 76 | // For spheres. In real use case, we would have an abstraction for geom data/ material data 77 | float3 center; 78 | float radius; 79 | 80 | unsigned int pad; 81 | 82 | }; -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyOptiX 2 | 3 | Python bindings for OptiX 7. 4 | 5 | ## Installation 6 | 7 | 8 | ### Dependencies 9 | 10 | #### OptiX SDK 11 | Install [OptiX SDK](https://developer.nvidia.com/designworks/optix/download) version 7.6 or newer. 12 | 13 | #### CUDA SDK 14 | Install [CUDA SDK](https://developer.nvidia.com/cuda-downloads) version 12.6 or newer required for examples, otherwise as required by your OptiX SDK. 15 | 16 | #### Build system requirements: 17 | * [cmake](https://cmake.org/) 18 | * [pip](https://pypi.org/project/pip/) 19 | 20 | #### Code sample dependencies 21 | To run the PyOptiX examples or tests, the python modules specified in `PyOptiX/requirements.txt` must be installed: 22 | * pytest 23 | * cupy 24 | * numpy 25 | * Pillow 26 | * cuda-python 27 | 28 | ### Virtual Environment 29 | In most cases, it makes sense to setup a python environment. Below are examples of how to setup your environment via either`Conda` or `venv`. 30 | 31 | #### `venv` Virtual Environment 32 | Create and activate a new virtual environment: 33 | ``` 34 | python3 -m venv env 35 | source env/bin/activate 36 | ``` 37 | Install all dependencies: 38 | ``` 39 | pip install -r requirements.txt 40 | ``` 41 | 42 | #### Conda Environment 43 | Create an environment containing pre-requisites: 44 | ``` 45 | conda create -n pyoptix python numpy conda-forge::cupy pillow pytest 46 | ``` 47 | Activate the environment: 48 | ``` 49 | conda activate pyoptix 50 | ``` 51 | The `pynvrtc` dependency, necessary for running the examples, needs to be installed via pip: 52 | ``` 53 | pip install pynvrtc 54 | ``` 55 | 56 | ### Building and installing the `optix` Python module 57 | Point `setuptools/CMake` to Optix by setting the following environment variable. 58 | 59 | Linux: 60 | ``` 61 | export PYOPTIX_CMAKE_ARGS="-DOptiX_INSTALL_DIR=" 62 | ``` 63 | Windows: 64 | ``` 65 | set PYOPTIX_CMAKE_ARGS=-DOptiX_INSTALL_DIR=C:\ProgramData\NVIDIA Corporation\OptiX SDK 7.0.0 66 | ``` 67 | 68 | Build and install using `pip` and `setuptools.py`: 69 | ``` 70 | cd optix 71 | pip install . 72 | ``` 73 | 74 | When compiling against an Optix 7.0 SDK an additional environment variable needs to be set 75 | containing a path to the system's stddef.h location. E.g. 76 | ``` 77 | export PYOPTIX_STDDEF_DIR="/usr/include/linux" 78 | ``` 79 | 80 | ## Running the Examples 81 | 82 | Run the `hello` sample: 83 | ``` 84 | cd examples 85 | python hello.py 86 | ``` 87 | If the example runs successfully, a green square will be rendered. 88 | 89 | ## Running the Test Suite 90 | 91 | Test tests are using `pytest` and can be run from the test directory like this: 92 | ``` 93 | cd test 94 | python -m pytest 95 | ``` 96 | -------------------------------------------------------------------------------- /optix/CMake/FindOptiX.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in the 11 | # documentation and/or other materials provided with the distribution. 12 | # * Neither the name of NVIDIA CORPORATION nor the names of its 13 | # contributors may be used to endorse or promote products derived 14 | # from this software without specific prior written permission. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | # 28 | 29 | if (TARGET OptiX::OptiX) 30 | return() 31 | endif() 32 | 33 | macro(OptiX_config_message) 34 | if (NOT DEFINED OptiX_FIND_QUIETLY) 35 | message(${ARGN}) 36 | endif() 37 | endmacro() 38 | 39 | # Locate the OptiX distribution. Search relative to the SDK first, then look in the system. 40 | 41 | find_path(OptiX_ROOT_DIR NAMES include/optix.h PATHS ${OptiX_INSTALL_DIR}) 42 | 43 | include(FindPackageHandleStandardArgs) 44 | find_package_handle_standard_args(OptiX 45 | FOUND_VAR OptiX_FOUND 46 | REQUIRED_VARS 47 | OptiX_ROOT_DIR 48 | REASON_FAILURE_MESSAGE 49 | "OptiX installation not found on CMAKE_PREFIX_PATH (include/optix.h)" 50 | ) 51 | 52 | if (NOT OptiX_FOUND) 53 | set(OptiX_NOT_FOUND_MESSAGE "Unable to find OptiX, please add your OptiX installation to CMAKE_PREFIX_PATH") 54 | return() 55 | endif() 56 | 57 | set(OptiX_INCLUDE_DIR ${OptiX_ROOT_DIR}/include) 58 | 59 | add_library(OptiX::OptiX INTERFACE IMPORTED) 60 | target_include_directories(OptiX::OptiX INTERFACE ${OptiX_INCLUDE_DIR}) 61 | 62 | -------------------------------------------------------------------------------- /examples/hello.cu: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions 6 | // are met: 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of NVIDIA CORPORATION nor the names of its 13 | // contributors may be used to endorse or promote products derived 14 | // from this software without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | // 28 | 29 | #include 30 | 31 | #include "hello.h" 32 | 33 | extern "C" { 34 | __constant__ Params params; 35 | } 36 | 37 | extern "C" 38 | __global__ void __raygen__hello() 39 | { 40 | uint3 launch_index = optixGetLaunchIndex(); 41 | RayGenData* rtData = (RayGenData*)optixGetSbtDataPointer(); 42 | params.image[launch_index.y * params.image_width + launch_index.x] = 43 | make_uchar4( 44 | max( 0.0f, min( 255.0f, rtData->r*255.0f ) ), 45 | max( 0.0f, min( 255.0f, rtData->g*255.0f ) ), 46 | max( 0.0f, min( 255.0f, rtData->b*255.0f ) ), 47 | 255 48 | ); 49 | } 50 | 51 | extern "C" 52 | __global__ void __anyhit__noop() {} 53 | 54 | extern "C" 55 | __global__ void __closesthit__noop() {} 56 | 57 | extern "C" 58 | __global__ void ___intersection__noop() {} 59 | 60 | extern "C" 61 | __global__ void ___intersect__noop() {} 62 | 63 | extern "C" 64 | __global__ void ___miss__noop() {} 65 | 66 | extern "C" 67 | __global__ void ___direct_callable__noop() {} 68 | 69 | extern "C" 70 | __global__ void ___continuation_callable__noop() {} 71 | -------------------------------------------------------------------------------- /examples/random.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions 6 | // are met: 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of NVIDIA CORPORATION nor the names of its 13 | // contributors may be used to endorse or promote products derived 14 | // from this software without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | // 28 | 29 | #pragma once 30 | 31 | template 32 | static __host__ __device__ __inline__ unsigned int tea( unsigned int val0, unsigned int val1 ) 33 | { 34 | unsigned int v0 = val0; 35 | unsigned int v1 = val1; 36 | unsigned int s0 = 0; 37 | 38 | for( unsigned int n = 0; n < N; n++ ) 39 | { 40 | s0 += 0x9e3779b9; 41 | v0 += ((v1<<4)+0xa341316c)^(v1+s0)^((v1>>5)+0xc8013ea4); 42 | v1 += ((v0<<4)+0xad90777d)^(v0+s0)^((v0>>5)+0x7e95761e); 43 | } 44 | 45 | return v0; 46 | } 47 | 48 | // Generate random unsigned int in [0, 2^24) 49 | static __host__ __device__ __inline__ unsigned int lcg(unsigned int &prev) 50 | { 51 | const unsigned int LCG_A = 1664525u; 52 | const unsigned int LCG_C = 1013904223u; 53 | prev = (LCG_A * prev + LCG_C); 54 | return prev & 0x00FFFFFF; 55 | } 56 | 57 | static __host__ __device__ __inline__ unsigned int lcg2(unsigned int &prev) 58 | { 59 | prev = (prev*8121 + 28411) % 134456; 60 | return prev; 61 | } 62 | 63 | // Generate random float in [0, 1) 64 | static __host__ __device__ __inline__ float rnd(unsigned int &prev) 65 | { 66 | return ((float) lcg(prev) / (float) 0x01000000); 67 | } 68 | 69 | static __host__ __device__ __inline__ unsigned int rot_seed( unsigned int seed, unsigned int frame ) 70 | { 71 | return seed ^ frame; 72 | } 73 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in the 11 | # documentation and/or other materials provided with the distribution. 12 | # * Neither the name of NVIDIA CORPORATION nor the names of its 13 | # contributors may be used to endorse or promote products derived 14 | # from this software without specific prior written permission. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | # 28 | 29 | find_package(Python REQUIRED) 30 | 31 | # temporary guard while awaiting 8.0 fixes 32 | if(OPTIX_VERSION VERSION_GREATER 7.7) 33 | return() 34 | endif() 35 | 36 | file(COPY optix examples requirements.txt test DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) 37 | 38 | if(UNIX) 39 | add_custom_command(OUTPUT env.timestamp 40 | COMMAND ${Python_EXECUTABLE} -m venv env 41 | COMMAND bash -c "source ./env/bin/activate" 42 | COMMAND ./env/bin/pip install -r requirements.txt 43 | COMMAND ${CMAKE_COMMAND} -E touch env.timestamp 44 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} 45 | COMMENT "Creating virtual environment for PyOptiX build") 46 | else() 47 | add_custom_command(OUTPUT env.timestamp 48 | COMMAND ${Python_EXECUTABLE} -m venv env 49 | COMMAND powershell ./env/Scripts/Activate.ps1 50 | COMMAND powershell ./env/Scripts/pip.exe install -r requirements.txt 51 | COMMAND ${CMAKE_COMMAND} -E touch env.timestamp 52 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} 53 | COMMENT "Creating virtual environment for PyOptiX build") 54 | endif() 55 | 56 | add_custom_target(PyOptiX ALL 57 | ${CMAKE_COMMAND} -E env PYOPTIX_CMAKE_ARGS="-DOptiX_INSTALL_DIR=${OptiX_INSTALL_DIR}" 58 | ${Python_EXECUTABLE} setup.py build 59 | WORKING_DIRECTORY optix 60 | DEPENDS env.timestamp) -------------------------------------------------------------------------------- /examples/helpers.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions 6 | // are met: 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of NVIDIA CORPORATION nor the names of its 13 | // contributors may be used to endorse or promote products derived 14 | // from this software without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | // 28 | 29 | #pragma once 30 | 31 | #include 32 | #include "vec_math.h" 33 | 34 | 35 | __forceinline__ __device__ float3 toSRGB( const float3& c ) 36 | { 37 | float invGamma = 1.0f / 2.4f; 38 | float3 powed = make_float3( powf( c.x, invGamma ), powf( c.y, invGamma ), powf( c.z, invGamma ) ); 39 | return make_float3( 40 | c.x < 0.0031308f ? 12.92f * c.x : 1.055f * powed.x - 0.055f, 41 | c.y < 0.0031308f ? 12.92f * c.y : 1.055f * powed.y - 0.055f, 42 | c.z < 0.0031308f ? 12.92f * c.z : 1.055f * powed.z - 0.055f ); 43 | } 44 | 45 | //__forceinline__ __device__ float dequantizeUnsigned8Bits( const unsigned char i ) 46 | //{ 47 | // enum { N = (1 << 8) - 1 }; 48 | // return min((float)i / (float)N), 1.f) 49 | //} 50 | __forceinline__ __device__ unsigned char quantizeUnsigned8Bits( float x ) 51 | { 52 | x = clamp( x, 0.0f, 1.0f ); 53 | enum { N = (1 << 8) - 1, Np1 = (1 << 8) }; 54 | return (unsigned char)min((unsigned int)(x * (float)Np1), (unsigned int)N); 55 | } 56 | 57 | __forceinline__ __device__ uchar4 make_color( const float3& c ) 58 | { 59 | // first apply gamma, then convert to unsigned char 60 | float3 srgb = toSRGB( clamp( c, 0.0f, 1.0f ) ); 61 | return make_uchar4( quantizeUnsigned8Bits( srgb.x ), quantizeUnsigned8Bits( srgb.y ), quantizeUnsigned8Bits( srgb.z ), 255u ); 62 | } 63 | __forceinline__ __device__ uchar4 make_color( const float4& c ) 64 | { 65 | return make_color( make_float3( c.x, c.y, c.z ) ); 66 | } 67 | -------------------------------------------------------------------------------- /optix/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 NVIDIA CORPORATION All rights reserved. 2 | # Use of this source code is governed by a BSD-style 3 | # license that can be found in the LICENSE file. 4 | 5 | import os 6 | import re 7 | import sys 8 | import platform 9 | import subprocess 10 | 11 | from setuptools import setup, Extension 12 | from setuptools.command.build_ext import build_ext 13 | from distutils.version import LooseVersion 14 | 15 | 16 | class CMakeExtension(Extension): 17 | def __init__(self, name, sourcedir=''): 18 | Extension.__init__(self, name, sources=[]) 19 | self.sourcedir = os.path.abspath(sourcedir) 20 | 21 | 22 | class CMakeBuild(build_ext): 23 | def run(self): 24 | try: 25 | out = subprocess.check_output(['cmake', '--version']) 26 | except OSError: 27 | raise RuntimeError("CMake must be installed to build the following extensions: " + 28 | ", ".join(e.name for e in self.extensions)) 29 | 30 | if platform.system() == "Windows": 31 | cmake_version = LooseVersion(re.search(r'version\s*([\d.]+)', out.decode()).group(1)) 32 | if cmake_version < '3.1.0': 33 | raise RuntimeError("CMake >= 3.1.0 is required on Windows") 34 | 35 | for ext in self.extensions: 36 | self.build_extension(ext) 37 | 38 | def build_extension(self, ext): 39 | extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) 40 | # required for auto-detection of auxiliary "native" libs 41 | if not extdir.endswith(os.path.sep): 42 | extdir += os.path.sep 43 | 44 | cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir, 45 | '-DPYTHON_EXECUTABLE=' + sys.executable] 46 | 47 | cfg = 'Debug' if self.debug else 'Release' 48 | build_args = ['--config', cfg] 49 | 50 | if platform.system() == "Windows": 51 | cmake_args += ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), extdir)] 52 | if sys.maxsize > 2**32: 53 | cmake_args += ['-A', 'x64'] 54 | build_args += ['--', '/m'] 55 | else: 56 | cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg] 57 | build_args += ['--', '-j2'] 58 | 59 | if "PYOPTIX_CMAKE_ARGS" in os.environ: 60 | cmake_args += os.environ[ 'PYOPTIX_CMAKE_ARGS' ].split() 61 | 62 | # the following is only needed for 7.0 compiles, because the optix device header of that 63 | # first version included stddef.h. 64 | if "PYOPTIX_STDDEF_DIR" in os.environ: 65 | cmake_args += [ "-DOptiX_STDDEF_DIR={}".format(os.environ[ 'PYOPTIX_STDDEF_DIR' ]) ] 66 | 67 | env = os.environ.copy() 68 | env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''), 69 | self.distribution.get_version()) 70 | if not os.path.exists(self.build_temp): 71 | os.makedirs(self.build_temp) 72 | print( "CMAKE CMD: <<<{}>>>".format( ' '.join( ['cmake', ext.sourcedir] + cmake_args ) ) ) 73 | subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env) 74 | subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) 75 | 76 | 77 | 78 | setup( 79 | name='optix', 80 | version='0.0.1', 81 | author='Keith Morley', 82 | author_email='kmorley@nvidia.com', 83 | description='Python bindings for NVIDIA OptiX', 84 | long_description='', 85 | ext_modules=[CMakeExtension('optix')], 86 | cmdclass=dict(build_ext=CMakeBuild), 87 | zip_safe=False, 88 | ) 89 | -------------------------------------------------------------------------------- /test/test_context.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 NVIDIA CORPORATION All rights reserved. 2 | # Use of this source code is governed by a BSD-style 3 | # license that can be found in the LICENSE file. 4 | 5 | import cupy as cp 6 | import optix as ox 7 | import pytest 8 | 9 | import tutil 10 | 11 | 12 | class Logger: 13 | def __init__(self): 14 | self.num_mssgs = 0 15 | 16 | def __call__(self, level, tag, mssg): 17 | print("[{:>2}][{:>12}]: {}".format(level, tag, mssg)) 18 | self.num_mssgs += 1 19 | 20 | 21 | def log_callback(level, tag, mssg): 22 | print("[{:>2}][{:>12}]: {}".format(level, tag, mssg)) 23 | 24 | 25 | class TestDeviceContextOptions: 26 | def test_default_ctor(self): 27 | options = ox.DeviceContextOptions() 28 | assert options.logCallbackFunction is None 29 | assert options.logCallbackLevel == 0 30 | if tutil.optix_version_gte( (7,2) ): 31 | assert options.validationMode == ox.DEVICE_CONTEXT_VALIDATION_MODE_OFF 32 | 33 | def test_ctor0(self): 34 | options = ox.DeviceContextOptions(log_callback) 35 | assert options.logCallbackFunction == log_callback 36 | 37 | def test_ctor1(self): 38 | logger = Logger() 39 | if tutil.optix_version_gte( (7,2) ): 40 | options = ox.DeviceContextOptions( 41 | logCallbackFunction = logger, 42 | logCallbackLevel = 3, 43 | validationMode = ox.DEVICE_CONTEXT_VALIDATION_MODE_ALL 44 | ) 45 | else: 46 | options = ox.DeviceContextOptions( 47 | logCallbackFunction = logger, 48 | logCallbackLevel = 3 49 | ) 50 | assert options.logCallbackFunction == logger 51 | assert options.logCallbackLevel == 3 52 | if tutil.optix_version_gte( (7,2) ): 53 | assert options.validationMode == ox.DEVICE_CONTEXT_VALIDATION_MODE_ALL 54 | else: 55 | assert options.validationMode == ox.DEVICE_CONTEXT_VALIDATION_MODE_OFF 56 | 57 | def test_context_options_props(self): 58 | options = ox.DeviceContextOptions() 59 | options.logCallbackLevel = 1 60 | assert options.logCallbackLevel == 1 61 | 62 | options.logCallbackFunction = log_callback 63 | assert options.logCallbackFunction == log_callback 64 | 65 | 66 | class TestContext: 67 | def test_create_destroy( self ): 68 | ctx = ox.deviceContextCreate(0, ox.DeviceContextOptions()) 69 | ctx.destroy() 70 | 71 | def test_get_property( self ): 72 | ctx = ox.deviceContextCreate(0, ox.DeviceContextOptions()) 73 | v = ctx.getProperty( ox.DEVICE_PROPERTY_LIMIT_NUM_BITS_INSTANCE_VISIBILITY_MASK ) 74 | assert type( v ) is int 75 | assert v > 1 and v <= 16 # at time of writing, was 8 76 | ctx.destroy() 77 | 78 | def test_set_log_callback( self ): 79 | ctx = ox.deviceContextCreate(0, ox.DeviceContextOptions()) 80 | logger = Logger() 81 | ctx.setLogCallback( logger, 3 ) 82 | ctx.setLogCallback( None, 2 ) 83 | ctx.setLogCallback( log_callback, 1 ) 84 | ctx.destroy() 85 | 86 | def test_cache_default(self): 87 | ctx = ox.deviceContextCreate(0, ox.DeviceContextOptions()) 88 | assert ctx.getCacheEnabled() 89 | ctx.destroy() 90 | 91 | def test_cache_enable_disable(self): 92 | ctx = ox.deviceContextCreate(0, ox.DeviceContextOptions()) 93 | ctx.setCacheEnabled(False); 94 | assert not ctx.getCacheEnabled() 95 | ctx.setCacheEnabled(True); 96 | assert ctx.getCacheEnabled() 97 | ctx.destroy() 98 | 99 | def test_cache_database_sizes(self): 100 | ctx = ox.deviceContextCreate(0, ox.DeviceContextOptions()) 101 | db_sizes = ( 1024, 1024*1024 ) 102 | ctx.setCacheDatabaseSizes( *db_sizes ) 103 | assert ctx.getCacheDatabaseSizes() == db_sizes 104 | ctx.destroy() 105 | 106 | def test_set_get_cache( self ): 107 | ctx = ox.deviceContextCreate(0, ox.DeviceContextOptions()) 108 | 109 | v = ctx.getCacheLocation() 110 | assert type(v) is str 111 | 112 | loc = "/dev/null" 113 | with pytest.raises( RuntimeError ): 114 | ctx.setCacheLocation( loc ) # not valid dir 115 | ctx.destroy() 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /examples/triangle.cu: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions 6 | // are met: 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of NVIDIA CORPORATION nor the names of its 13 | // contributors may be used to endorse or promote products derived 14 | // from this software without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | // 28 | 29 | #include 30 | 31 | #include "triangle.h" 32 | #include "helpers.h" 33 | 34 | #include "vec_math.h" 35 | 36 | extern "C" { 37 | __constant__ Params params; 38 | } 39 | 40 | 41 | static __forceinline__ __device__ void setPayload( float3 p ) 42 | { 43 | optixSetPayload_0( __float_as_int( p.x ) ); 44 | optixSetPayload_1( __float_as_int( p.y ) ); 45 | optixSetPayload_2( __float_as_int( p.z ) ); 46 | } 47 | 48 | 49 | static __forceinline__ __device__ void computeRay( uint3 idx, uint3 dim, float3& origin, float3& direction ) 50 | { 51 | const float3 U = params.cam_u; 52 | const float3 V = params.cam_v; 53 | const float3 W = params.cam_w; 54 | const float2 d = 2.0f * make_float2( 55 | static_cast( idx.x ) / static_cast( dim.x ), 56 | static_cast( idx.y ) / static_cast( dim.y ) 57 | ) - 1.0f; 58 | 59 | origin = params.cam_eye; 60 | direction = normalize( d.x * U + d.y * V + W ); 61 | } 62 | 63 | 64 | extern "C" __global__ void __raygen__rg() 65 | { 66 | // Lookup our location within the launch grid 67 | const uint3 idx = optixGetLaunchIndex(); 68 | const uint3 dim = optixGetLaunchDimensions(); 69 | 70 | // Map our launch idx to a screen location and create a ray from the camera 71 | // location through the screen 72 | float3 ray_origin, ray_direction; 73 | computeRay( make_uint3( idx.x, idx.y, 0 ), dim, ray_origin, ray_direction ); 74 | 75 | // Trace the ray against our scene hierarchy 76 | float3 result = make_float3( 0 ); 77 | unsigned int p0, p1, p2; 78 | optixTrace( 79 | params.handle, 80 | ray_origin, 81 | ray_direction, 82 | 0.0f, // Min intersection distance 83 | 1e16f, // Max intersection distance 84 | 0.0f, // rayTime -- used for motion blur 85 | OptixVisibilityMask( 255 ), // Specify always visible 86 | OPTIX_RAY_FLAG_NONE, 87 | 0, // SBT offset -- See SBT discussion 88 | 1, // SBT stride -- See SBT discussion 89 | 0, // missSBTIndex -- See SBT discussion 90 | p0, p1, p2 ); 91 | result.x = __int_as_float( p0 ); 92 | result.y = __int_as_float( p1 ); 93 | result.z = __int_as_float( p2 ); 94 | 95 | // Record results in our output raster 96 | params.image[idx.y * params.image_width + idx.x] = make_color( result ); 97 | } 98 | 99 | 100 | extern "C" __global__ void __miss__ms() 101 | { 102 | MissData* miss_data = reinterpret_cast( optixGetSbtDataPointer() ); 103 | setPayload( miss_data->bg_color ); 104 | } 105 | 106 | 107 | extern "C" __global__ void __closesthit__ch() 108 | { 109 | // When built-in triangle intersection is used, a number of fundamental 110 | // attributes are provided by the OptiX API, indlucing barycentric coordinates. 111 | const float2 barycentrics = optixGetTriangleBarycentrics(); 112 | 113 | setPayload( make_float3( barycentrics, 1.0f ) ); 114 | } 115 | -------------------------------------------------------------------------------- /examples/curves.cu: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions 6 | // are met: 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of NVIDIA CORPORATION nor the names of its 13 | // contributors may be used to endorse or promote products derived 14 | // from this software without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | // 28 | #include 29 | 30 | #include "curves.h" 31 | #include "helpers.h" 32 | 33 | #include "vec_math.h" 34 | 35 | 36 | extern "C" { 37 | __constant__ Params params; 38 | } 39 | 40 | 41 | static __forceinline__ __device__ void setPayload( float3 p ) 42 | { 43 | optixSetPayload_0( __float_as_uint( p.x ) ); 44 | optixSetPayload_1( __float_as_uint( p.y ) ); 45 | optixSetPayload_2( __float_as_uint( p.z ) ); 46 | } 47 | 48 | 49 | static __forceinline__ __device__ void computeRay( uint3 idx, uint3 dim, float3& origin, float3& direction ) 50 | { 51 | const float3 U = params.cam_u; 52 | const float3 V = params.cam_v; 53 | const float3 W = params.cam_w; 54 | const float2 d = 2.0f * make_float2( 55 | static_cast( idx.x ) / static_cast( dim.x ), 56 | static_cast( idx.y ) / static_cast( dim.y ) 57 | ) - 1.0f; 58 | 59 | origin = params.cam_eye; 60 | direction = normalize( d.x * U + d.y * V + W ); 61 | } 62 | 63 | 64 | extern "C" __global__ void __raygen__rg() 65 | { 66 | // Lookup our location within the launch grid 67 | const uint3 idx = optixGetLaunchIndex(); 68 | const uint3 dim = optixGetLaunchDimensions(); 69 | 70 | // Map our launch idx to a screen location and create a ray from the camera 71 | // location through the screen 72 | float3 ray_origin, ray_direction; 73 | computeRay( idx, dim, ray_origin, ray_direction ); 74 | 75 | // Trace the ray against our scene hierarchy 76 | unsigned int p0, p1, p2; 77 | optixTrace( 78 | params.handle, 79 | ray_origin, 80 | ray_direction, 81 | 0.0f, // Min intersection distance 82 | 1e16f, // Max intersection distance 83 | 0.0f, // rayTime -- used for motion blur 84 | OptixVisibilityMask( 255 ), // Specify always visible 85 | OPTIX_RAY_FLAG_NONE, 86 | 0, // SBT offset -- See SBT discussion 87 | 1, // SBT stride -- See SBT discussion 88 | 0, // missSBTIndex -- See SBT discussion 89 | p0, p1, p2 ); 90 | float3 result; 91 | result.x = __uint_as_float( p0 ); 92 | result.y = __uint_as_float( p1 ); 93 | result.z = __uint_as_float( p2 ); 94 | 95 | // Record results in our output raster 96 | params.image[idx.y * params.image_width + idx.x] = make_color( result ); 97 | } 98 | 99 | 100 | extern "C" __global__ void __miss__ms() 101 | { 102 | MissData* miss_data = reinterpret_cast( optixGetSbtDataPointer() ); 103 | setPayload( miss_data->bg_color ); 104 | } 105 | 106 | 107 | extern "C" __global__ void __closesthit__ch() 108 | { 109 | // When built-in curve intersection is used, the curve parameter u is provided 110 | // by the OptiX API. The parameter’s range is [0,1] over the curve segment, 111 | // with u=0 or u=1 only on the end caps. 112 | float u = optixGetCurveParameter(); 113 | 114 | // linearly interpolate from black to orange 115 | setPayload( make_float3( u, u / 3.0f, 0.0f ) ); 116 | } 117 | -------------------------------------------------------------------------------- /examples/simpleMotionBlur.cu: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions 6 | // are met: 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of NVIDIA CORPORATION nor the names of its 13 | // contributors may be used to endorse or promote products derived 14 | // from this software without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | // 28 | #include 29 | 30 | #include "simpleMotionBlur.h" 31 | #include "random.h" 32 | 33 | #include "vec_math.h" 34 | #include "helpers.h" 35 | 36 | 37 | extern "C" { 38 | __constant__ Params params; 39 | } 40 | 41 | 42 | static __forceinline__ __device__ float3 traceCamera( 43 | OptixTraversableHandle handle, 44 | float3 ray_origin, 45 | float3 ray_direction, 46 | float ray_time 47 | ) 48 | { 49 | unsigned int r, g, b; 50 | 51 | optixTrace( 52 | handle, 53 | ray_origin, 54 | ray_direction, 55 | 0.0f, // tmin 56 | 1e16f, // tmax 57 | ray_time, 58 | OptixVisibilityMask( 1 ), 59 | OPTIX_RAY_FLAG_NONE, 60 | RAY_TYPE_RADIANCE, // SBT offset 61 | RAY_TYPE_COUNT, // SBT stride 62 | RAY_TYPE_RADIANCE, // missSBTIndex 63 | r, g, b ); 64 | 65 | return make_float3( 66 | __uint_as_float( r ), 67 | __uint_as_float( g ), 68 | __uint_as_float( b ) 69 | ); 70 | } 71 | 72 | 73 | static __forceinline__ __device__ void setPayload( float3 p ) 74 | { 75 | optixSetPayload_0( __float_as_uint( p.x ) ); 76 | optixSetPayload_1( __float_as_uint( p.y ) ); 77 | optixSetPayload_2( __float_as_uint( p.z ) ); 78 | } 79 | 80 | 81 | extern "C" __global__ void __raygen__rg() 82 | { 83 | const int w = params.width; 84 | const int h = params.height; 85 | const float3 eye = params.eye; 86 | const float3 U = params.U; 87 | const float3 V = params.V; 88 | const float3 W = params.W; 89 | const uint3 idx = optixGetLaunchIndex(); 90 | const int subframe_index = params.subframe_index; 91 | 92 | unsigned int seed = tea<4>( idx.y*w + idx.x, subframe_index ); 93 | // The center of each pixel is at fraction (0.5,0.5) 94 | const float2 subpixel_jitter = make_float2( rnd( seed ), rnd( seed ) ); 95 | 96 | const float2 d = 2.0f * make_float2( 97 | ( static_cast( idx.x ) + subpixel_jitter.x ) / static_cast( w ), 98 | ( static_cast( idx.y ) + subpixel_jitter.y ) / static_cast( h ) 99 | ) - 1.0f; 100 | float3 ray_direction = normalize(d.x*U + d.y*V + W); 101 | float3 ray_origin = eye; 102 | 103 | const float3 result = traceCamera( params.handle, ray_origin, ray_direction, rnd( seed ) ); 104 | 105 | const int image_index = idx.y*w + idx.x; 106 | float3 accum_color = result; 107 | if( subframe_index > 0 ) 108 | { 109 | const float a = 1.0f / static_cast( subframe_index+1 ); 110 | const float3 accum_color_prev = make_float3( params.accum_buffer[ image_index ]); 111 | accum_color = lerp( accum_color_prev, accum_color, a ); 112 | } 113 | params.accum_buffer[ image_index ] = make_float4( accum_color, 1.0f); 114 | params.frame_buffer[ image_index ] = make_color ( accum_color ); 115 | } 116 | 117 | 118 | extern "C" __global__ void __miss__camera() 119 | { 120 | MissData* rt_data = reinterpret_cast( optixGetSbtDataPointer() ); 121 | setPayload( rt_data->color ); 122 | } 123 | 124 | 125 | extern "C" __global__ void __closesthit__camera() 126 | { 127 | HitGroupData* rt_data = (HitGroupData*)optixGetSbtDataPointer(); 128 | setPayload( rt_data->color ); 129 | } 130 | 131 | 132 | extern "C" __global__ void __intersection__sphere() 133 | { 134 | HitGroupData* hg_data = reinterpret_cast( optixGetSbtDataPointer() ); 135 | const float3 orig = optixGetObjectRayOrigin(); 136 | const float3 dir = optixGetObjectRayDirection(); 137 | 138 | const float3 center = hg_data->center; 139 | const float radius = hg_data->radius; 140 | 141 | const float3 O = orig - center; 142 | const float l = 1 / length( dir ); 143 | const float3 D = dir * l; 144 | 145 | const float b = dot( O, D ); 146 | const float c = dot( O, O ) - radius * radius; 147 | const float disc = b * b - c; 148 | if( disc > 0.0f ) 149 | { 150 | const float sdisc = sqrtf( disc ); 151 | const float root1 = ( -b - sdisc ); 152 | 153 | const float root11 = 0.0f; 154 | const float3 shading_normal = ( O + ( root1 + root11 ) * D ) / radius; 155 | unsigned int p0, p1, p2; 156 | p0 = __float_as_uint( shading_normal.x ); 157 | p1 = __float_as_uint( shading_normal.y ); 158 | p2 = __float_as_uint( shading_normal.z ); 159 | 160 | optixReportIntersection( 161 | root1, // t hit 162 | 0, // user hit kind 163 | p0, p1, p2 164 | ); 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /test/test_program_group.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 NVIDIA CORPORATION All rights reserved. 2 | # Use of this source code is governed by a BSD-style 3 | # license that can be found in the LICENSE file. 4 | 5 | import optix as ox 6 | import cupy as cp 7 | 8 | import array 9 | import pytest 10 | 11 | import sample_ptx 12 | import tutil 13 | 14 | 15 | 16 | if tutil.optix_version_gte( (7,4) ): 17 | class TestProgramGroupOptions: 18 | def test_constructor(self): 19 | pgo = ox.ProgramGroupOptions() 20 | assert type(pgo) is ox.ProgramGroupOptions 21 | 22 | 23 | class TestProgramGroupBase: 24 | def setup_method(self): 25 | self.ctx = ox.deviceContextCreate(0, ox.DeviceContextOptions()) 26 | if tutil.optix_version_gte( (7,7) ): 27 | self.mod, log = self.ctx.moduleCreate(ox.ModuleCompileOptions(), 28 | ox.PipelineCompileOptions(), 29 | sample_ptx.hello_ptx) 30 | else: 31 | self.mod, log = self.ctx.moduleCreateFromPTX(ox.ModuleCompileOptions(), 32 | ox.PipelineCompileOptions(), 33 | sample_ptx.hello_ptx) 34 | 35 | 36 | def teardown_method(self): 37 | self.mod.destroy() 38 | self.ctx.destroy() 39 | 40 | 41 | class TestProgramGroupDescriptor(TestProgramGroupBase): 42 | def test_constructor(self): 43 | pgd = ox.ProgramGroupDesc(raygenModule = self.mod, 44 | raygenEntryFunctionName = "__raygen__hello") 45 | assert pgd.raygenModule == self.mod 46 | assert pgd.raygenEntryFunctionName == "__raygen__hello" 47 | 48 | def test_attributes(self): 49 | pgd = ox.ProgramGroupDesc() 50 | pgd.raygenModule = self.mod 51 | pgd.raygenEntryFunctionName = "__raygen__hello" 52 | assert pgd.raygenModule == self.mod 53 | assert pgd.raygenEntryFunctionName == "__raygen__hello" 54 | 55 | 56 | class TestProgramGroup(TestProgramGroupBase): 57 | def test_create_raygen(self): 58 | 59 | prog_group_desc = ox.ProgramGroupDesc() 60 | prog_group_desc.raygenModule = self.mod 61 | prog_group_desc.raygenEntryFunctionName = "__raygen__hello" 62 | 63 | prog_groups = None 64 | log = None 65 | if tutil.optix_version_gte( (7,4) ): 66 | prog_group_opts = ox.ProgramGroupOptions() 67 | prog_groups, log = self.ctx.programGroupCreate([prog_group_desc], prog_group_opts) 68 | else: 69 | prog_groups, log = self.ctx.programGroupCreate([prog_group_desc] ) 70 | assert len(prog_groups) == 1 71 | assert type(prog_groups[0]) is ox.ProgramGroup 72 | 73 | prog_groups[0].destroy() 74 | 75 | def test_create_miss(self): 76 | 77 | prog_group_desc = ox.ProgramGroupDesc() 78 | prog_group_desc.missModule = self.mod 79 | prog_group_desc.missEntryFunctionName = "__miss__noop" 80 | 81 | prog_groups = None 82 | log = None 83 | if tutil.optix_version_gte( (7,4) ): 84 | prog_group_opts = ox.ProgramGroupOptions() 85 | prog_groups, log = self.ctx.programGroupCreate([prog_group_desc], prog_group_opts) 86 | else: 87 | prog_groups, log = self.ctx.programGroupCreate([prog_group_desc] ) 88 | 89 | assert len(prog_groups) == 1 90 | assert type(prog_groups[0]) is ox.ProgramGroup 91 | 92 | prog_groups[0].destroy() 93 | 94 | def test_create_callables(self): 95 | 96 | prog_group_desc = ox.ProgramGroupDesc() 97 | prog_group_desc.callablesModuleDC = self.mod 98 | prog_group_desc.callablesModuleCC = self.mod 99 | prog_group_desc.callablesEntryFunctionNameCC = "__continuation_callable__noop" 100 | prog_group_desc.callablesEntryFunctionNameDC = "__direct_callable__noop" 101 | 102 | prog_groups = None 103 | log = None 104 | if tutil.optix_version_gte( (7,4) ): 105 | prog_group_opts = ox.ProgramGroupOptions() 106 | prog_groups, log = self.ctx.programGroupCreate([prog_group_desc], prog_group_opts) 107 | else: 108 | prog_groups, log = self.ctx.programGroupCreate([prog_group_desc] ) 109 | 110 | assert len(prog_groups) == 1 111 | assert type(prog_groups[0]) is ox.ProgramGroup 112 | 113 | prog_groups[0].destroy() 114 | 115 | def test_create_hitgroup(self): 116 | prog_group_desc = ox.ProgramGroupDesc() 117 | prog_group_desc.hitgroupModuleCH = self.mod 118 | prog_group_desc.hitgroupModuleAH = self.mod 119 | prog_group_desc.hitgroupModuleIS = self.mod 120 | prog_group_desc.hitgroupEntryFunctionNameCH = "__closesthit__noop" 121 | prog_group_desc.hitgroupEntryFunctionNameAH = "__anyhit__noop" 122 | prog_group_desc.hitgroupEntryFunctionNameIS = "__intersection__noop" 123 | 124 | prog_groups = None 125 | log = None 126 | if tutil.optix_version_gte( (7,4) ): 127 | prog_group_opts = ox.ProgramGroupOptions() 128 | prog_groups, log = self.ctx.programGroupCreate([prog_group_desc], prog_group_opts) 129 | else: 130 | prog_groups, log = self.ctx.programGroupCreate([prog_group_desc] ) 131 | 132 | assert len(prog_groups) == 1 133 | assert type(prog_groups[0]) is ox.ProgramGroup 134 | 135 | prog_groups[0].destroy() 136 | 137 | def create_prog_group(self): 138 | 139 | prog_group_desc = ox.ProgramGroupDesc() 140 | prog_group_desc.raygenModule = self.mod 141 | prog_group_desc.raygenEntryFunctionName = "__raygen__hello" 142 | 143 | prog_groups = None 144 | log = None 145 | if tutil.optix_version_gte( (7,4) ): 146 | prog_group_opts = ox.ProgramGroupOptions() 147 | prog_groups, log = self.ctx.programGroupCreate([prog_group_desc], prog_group_opts) 148 | else: 149 | prog_groups, log = self.ctx.programGroupCreate([prog_group_desc] ) 150 | return prog_groups[0] 151 | 152 | def test_get_stack_size(self): 153 | if tutil.optix_version_gte( (7,6) ): 154 | print("TODO - newer version requires pipeline arg") 155 | else: 156 | prog_group = self.create_prog_group() 157 | stack_size = prog_group.getStackSize() 158 | assert type(stack_size) is ox.StackSizes 159 | -------------------------------------------------------------------------------- /examples/sphere.cu: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions 6 | // are met: 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of NVIDIA CORPORATION nor the names of its 13 | // contributors may be used to endorse or promote products derived 14 | // from this software without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | // 28 | 29 | #include 30 | 31 | #include "sphere.h" 32 | #include "helpers.h" 33 | 34 | #include "vec_math.h" 35 | 36 | extern "C" { 37 | __constant__ Params params; 38 | } 39 | 40 | 41 | static __forceinline__ __device__ void trace( 42 | OptixTraversableHandle handle, 43 | float3 ray_origin, 44 | float3 ray_direction, 45 | float tmin, 46 | float tmax, 47 | float3* prd 48 | ) 49 | { 50 | unsigned int p0, p1, p2; 51 | p0 = __float_as_int( prd->x ); 52 | p1 = __float_as_int( prd->y ); 53 | p2 = __float_as_int( prd->z ); 54 | optixTrace( 55 | handle, 56 | ray_origin, 57 | ray_direction, 58 | tmin, 59 | tmax, 60 | 0.0f, // rayTime 61 | OptixVisibilityMask( 1 ), 62 | OPTIX_RAY_FLAG_NONE, 63 | 0, // SBT offset 64 | 0, // SBT stride 65 | 0, // missSBTIndex 66 | p0, p1, p2 ); 67 | prd->x = __int_as_float( p0 ); 68 | prd->y = __int_as_float( p1 ); 69 | prd->z = __int_as_float( p2 ); 70 | } 71 | 72 | 73 | static __forceinline__ __device__ void setPayload( float3 p ) 74 | { 75 | optixSetPayload_0( __float_as_int( p.x ) ); 76 | optixSetPayload_1( __float_as_int( p.y ) ); 77 | optixSetPayload_2( __float_as_int( p.z ) ); 78 | } 79 | 80 | 81 | static __forceinline__ __device__ float3 getPayload() 82 | { 83 | return make_float3( 84 | __int_as_float( optixGetPayload_0() ), 85 | __int_as_float( optixGetPayload_1() ), 86 | __int_as_float( optixGetPayload_2() ) 87 | ); 88 | } 89 | 90 | 91 | extern "C" __global__ void __raygen__rg() 92 | { 93 | const uint3 idx = optixGetLaunchIndex(); 94 | const uint3 dim = optixGetLaunchDimensions(); 95 | 96 | const RayGenData* rt_data = reinterpret_cast( optixGetSbtDataPointer() ); 97 | 98 | const float3 U = rt_data->camera_u; 99 | const float3 V = rt_data->camera_v; 100 | const float3 W = rt_data->camera_w; 101 | 102 | const float2 d = 2.0f * make_float2( 103 | static_cast( idx.x ) / static_cast( dim.x ), 104 | static_cast( idx.y ) / static_cast( dim.y ) 105 | ) - 1.0f; 106 | 107 | const float3 origin = rt_data->cam_eye; 108 | const float3 direction = normalize( d.x * U + d.y * V + W ); 109 | float3 payload_rgb = make_float3( 0.5f, 0.5f, 0.5f ); 110 | trace( params.handle, 111 | origin, 112 | direction, 113 | 0.00f, // tmin 114 | 1e16f, // tmax 115 | &payload_rgb ); 116 | 117 | params.image[idx.y * params.image_width + idx.x] = make_color( payload_rgb ); 118 | } 119 | 120 | 121 | extern "C" __global__ void __miss__ms() 122 | { 123 | MissData* rt_data = reinterpret_cast( optixGetSbtDataPointer() ); 124 | float3 payload = getPayload(); 125 | setPayload( make_float3( rt_data->r, rt_data->g, rt_data->b ) ); 126 | } 127 | 128 | 129 | extern "C" __global__ void __closesthit__ch() 130 | { 131 | const float3 shading_normal = 132 | make_float3( 133 | __int_as_float( optixGetAttribute_0() ), 134 | __int_as_float( optixGetAttribute_1() ), 135 | __int_as_float( optixGetAttribute_2() ) 136 | ); 137 | setPayload( normalize( optixTransformNormalFromObjectToWorldSpace( shading_normal ) ) * 0.5f + 0.5f ); 138 | } 139 | 140 | 141 | #define float3_as_ints( u ) __float_as_int( u.x ), __float_as_int( u.y ), __float_as_int( u.z ) 142 | 143 | extern "C" __global__ void __intersection__sphere() 144 | { 145 | const SphereHitGroupData* hit_group_data = reinterpret_cast( optixGetSbtDataPointer() ); 146 | 147 | const float3 ray_orig = optixGetWorldRayOrigin(); 148 | const float3 ray_dir = optixGetWorldRayDirection(); 149 | const float ray_tmin = optixGetRayTmin(); 150 | const float ray_tmax = optixGetRayTmax(); 151 | 152 | const Sphere sphere = hit_group_data->sphere; 153 | 154 | const float3 O = ray_orig - ( sphere.center_x, sphere.center_y, sphere.center_z ); 155 | const float l = 1.0f / length( ray_dir ); 156 | const float3 D = ray_dir * l; 157 | const float radius = sphere.radius; 158 | 159 | float b = dot( O, D ); 160 | float c = dot( O, O ) - radius * radius; 161 | float disc = b * b - c; 162 | if( disc > 0.0f ) 163 | { 164 | float sdisc = sqrtf( disc ); 165 | float root1 = ( -b - sdisc ); 166 | float root11 = 0.0f; 167 | bool check_second = true; 168 | 169 | const bool do_refine = fabsf( root1 ) > ( 10.0f * radius ); 170 | 171 | if( do_refine ) 172 | { 173 | // refine root1 174 | float3 O1 = O + root1 * D; 175 | b = dot( O1, D ); 176 | c = dot( O1, O1 ) - radius * radius; 177 | disc = b * b - c; 178 | 179 | if( disc > 0.0f ) 180 | { 181 | sdisc = sqrtf( disc ); 182 | root11 = ( -b - sdisc ); 183 | } 184 | } 185 | 186 | float t; 187 | float3 normal; 188 | t = ( root1 + root11 ) * l; 189 | if( t > ray_tmin && t < ray_tmax ) 190 | { 191 | normal = ( O + ( root1 + root11 ) * D ) / radius; 192 | if( optixReportIntersection( t, 0, float3_as_ints( normal ), __float_as_int( radius ) ) ) 193 | check_second = false; 194 | } 195 | 196 | if( check_second ) 197 | { 198 | float root2 = ( -b + sdisc ) + ( do_refine ? root1 : 0 ); 199 | t = root2 * l; 200 | normal = ( O + root2 * D ) / radius; 201 | if( t > ray_tmin && t < ray_tmax ) 202 | optixReportIntersection( t, 0, float3_as_ints( normal ), __float_as_int( radius ) ); 203 | } 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /test/test_module.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 NVIDIA CORPORATION All rights reserved. 2 | # Use of this source code is governed by a BSD-style 3 | # license that can be found in the LICENSE file. 4 | 5 | import optix 6 | import cupy as cp 7 | 8 | import array 9 | import pytest 10 | 11 | import sample_ptx 12 | import tutil 13 | 14 | 15 | if tutil.optix_version_gte( (7,2) ): 16 | class TestModuleCompileBoundValueEntry: 17 | def test_compile_bound_value_entry( self ): 18 | bound_value_entry_default = optix.ModuleCompileBoundValueEntry( 19 | ) 20 | 21 | bound_value = array.array( 'f', [0.1, 0.2, 0.3] ) 22 | bound_value_entry = optix.ModuleCompileBoundValueEntry( 23 | pipelineParamOffsetInBytes = 4, 24 | boundValue = bound_value, 25 | annotation = "my_bound_value" 26 | ) 27 | 28 | assert bound_value_entry.pipelineParamOffsetInBytes == 4 29 | with pytest.raises( AttributeError ): 30 | print( bound_value_entry.boundValue ) 31 | assert bound_value_entry.annotation == "my_bound_value" 32 | 33 | bound_value_entry.pipelineParamOffsetInBytes = 8 34 | assert bound_value_entry.pipelineParamOffsetInBytes == 8 35 | bound_value_entry.annotation = "new_bound_value" 36 | assert bound_value_entry.annotation == "new_bound_value" 37 | 38 | 39 | if tutil.optix_version_gte( (7,4) ): 40 | class TestModuleCompilePayloadType: 41 | def test_compile_payload_type( self ): 42 | payload_semantics = [ 0, 1 ] 43 | payload_type_default = optix.PayloadType( 44 | ) 45 | payload_type_default.payloadSemantics = payload_semantics 46 | 47 | payload_type = optix.PayloadType( 48 | payloadSemantics = payload_semantics 49 | ) 50 | 51 | 52 | class TestModule: 53 | if tutil.optix_version_gte( (7,2) ): 54 | def test_options( self ): 55 | mod_opts = optix.ModuleCompileOptions( 56 | maxRegisterCount = 64, 57 | optLevel = optix.COMPILE_OPTIMIZATION_LEVEL_1, 58 | debugLevel = tutil.default_debug_level(), 59 | boundValues = [] 60 | ) 61 | assert mod_opts.maxRegisterCount == 64 62 | assert mod_opts.optLevel == optix.COMPILE_OPTIMIZATION_LEVEL_1 63 | assert mod_opts.debugLevel == tutil.default_debug_level() 64 | # optix.ModuleCompileOptions.boundValues is write-only 65 | with pytest.raises( AttributeError ): 66 | print( mod_opts.boundValues ) 67 | 68 | mod_opts = optix.ModuleCompileOptions() 69 | assert mod_opts.maxRegisterCount == optix.COMPILE_DEFAULT_MAX_REGISTER_COUNT 70 | assert mod_opts.optLevel == optix.COMPILE_OPTIMIZATION_DEFAULT 71 | assert mod_opts.debugLevel == tutil.default_debug_level() 72 | mod_opts.maxRegisterCount = 64 73 | mod_opts.optLevel = optix.COMPILE_OPTIMIZATION_LEVEL_1 74 | mod_opts.debugLevel = tutil.default_debug_level() 75 | mod_opts.boundValues = [ optix.ModuleCompileBoundValueEntry() ]; 76 | assert mod_opts.maxRegisterCount == 64 77 | assert mod_opts.optLevel == optix.COMPILE_OPTIMIZATION_LEVEL_1 78 | assert mod_opts.debugLevel == tutil.default_debug_level() 79 | elif tutil.optix_version_gte( (7,1) ): 80 | def test_options( self ): 81 | mod_opts = optix.ModuleCompileOptions( 82 | maxRegisterCount = 64, 83 | optLevel = optix.COMPILE_OPTIMIZATION_LEVEL_1, 84 | debugLevel = tutil.default_debug_level() 85 | ) 86 | assert mod_opts.maxRegisterCount == 64 87 | assert mod_opts.optLevel == optix.COMPILE_OPTIMIZATION_LEVEL_1 88 | assert mod_opts.debugLevel == tutil.default_debug_level() 89 | 90 | mod_opts = optix.ModuleCompileOptions() 91 | assert mod_opts.maxRegisterCount == optix.COMPILE_DEFAULT_MAX_REGISTER_COUNT 92 | assert mod_opts.optLevel == optix.COMPILE_OPTIMIZATION_DEFAULT 93 | assert mod_opts.debugLevel == optix.COMPILE_DEBUG_LEVEL_DEFAULT 94 | mod_opts.maxRegisterCount = 64 95 | mod_opts.optLevel = optix.COMPILE_OPTIMIZATION_LEVEL_1 96 | mod_opts.debugLevel = tutil.default_debug_level() 97 | assert mod_opts.maxRegisterCount == 64 98 | assert mod_opts.optLevel == optix.COMPILE_OPTIMIZATION_LEVEL_1 99 | assert mod_opts.debugLevel == tutil.default_debug_level() 100 | else: 101 | def test_options( self ): 102 | mod_opts = optix.ModuleCompileOptions( 103 | maxRegisterCount = 64, 104 | optLevel = optix.COMPILE_OPTIMIZATION_LEVEL_1, 105 | debugLevel = tutil.default_debug_level() 106 | ) 107 | assert mod_opts.maxRegisterCount == 64 108 | assert mod_opts.optLevel == optix.COMPILE_OPTIMIZATION_LEVEL_1 109 | assert mod_opts.debugLevel == tutil.default_debug_level() 110 | 111 | mod_opts = optix.ModuleCompileOptions() 112 | assert mod_opts.maxRegisterCount == optix.COMPILE_DEFAULT_MAX_REGISTER_COUNT 113 | assert mod_opts.optLevel == optix.COMPILE_OPTIMIZATION_DEFAULT 114 | assert mod_opts.debugLevel == tutil.default_debug_level() 115 | mod_opts.maxRegisterCount = 64 116 | mod_opts.optLevel = optix.COMPILE_OPTIMIZATION_LEVEL_1 117 | mod_opts.debugLevel = optix.COMPILE_DEBUG_LEVEL_FULL 118 | assert mod_opts.maxRegisterCount == 64 119 | assert mod_opts.optLevel == optix.COMPILE_OPTIMIZATION_LEVEL_1 120 | assert mod_opts.debugLevel == optix.COMPILE_DEBUG_LEVEL_FULL 121 | 122 | def test_create_destroy( self ): 123 | ctx = optix.deviceContextCreate(0, optix.DeviceContextOptions()) 124 | module_opts = optix.ModuleCompileOptions() 125 | pipeline_opts = optix.PipelineCompileOptions() 126 | if tutil.optix_version_gte( (7,7) ): 127 | mod, log = ctx.moduleCreate( 128 | module_opts, 129 | pipeline_opts, 130 | sample_ptx.hello_ptx, 131 | ) 132 | else: 133 | mod, log = ctx.moduleCreateFromPTX( 134 | module_opts, 135 | pipeline_opts, 136 | sample_ptx.hello_ptx, 137 | ) 138 | assert type(mod) is optix.Module 139 | assert type(log) is str 140 | 141 | mod.destroy() 142 | ctx.destroy() 143 | 144 | 145 | if tutil.optix_version_gte( (7,4) ): 146 | def test_payload_semantics_use( self ): 147 | ctx = optix.deviceContextCreate(0, optix.DeviceContextOptions()) 148 | module_opts = optix.ModuleCompileOptions() 149 | pipeline_opts = optix.PipelineCompileOptions() 150 | pipeline_opts.numPayloadValues = 3 151 | 152 | payload_sem = ( 153 | optix.PAYLOAD_SEMANTICS_TRACE_CALLER_READ_WRITE | 154 | optix.PAYLOAD_SEMANTICS_CH_READ_WRITE | 155 | optix.PAYLOAD_SEMANTICS_MS_READ_WRITE | 156 | optix.PAYLOAD_SEMANTICS_AH_READ_WRITE | 157 | optix.PAYLOAD_SEMANTICS_IS_READ_WRITE 158 | ) 159 | 160 | payload_type = optix.PayloadType( [ payload_sem, payload_sem, payload_sem ] ) 161 | module_opts.payloadTypes = [ payload_type ] 162 | if tutil.optix_version_gte( (7,7 ) ): 163 | mod, log = ctx.moduleCreate( 164 | module_opts, 165 | pipeline_opts, 166 | sample_ptx.triangle_ptx, 167 | ) 168 | else: 169 | mod, log = ctx.moduleCreateFromPTX( 170 | module_opts, 171 | pipeline_opts, 172 | sample_ptx.triangle_ptx, 173 | ) 174 | mod.destroy() 175 | ctx.destroy() 176 | 177 | 178 | def test_bound_values_use( self ): 179 | ctx = optix.deviceContextCreate(0, optix.DeviceContextOptions()) 180 | module_opts = optix.ModuleCompileOptions() 181 | pipeline_opts = optix.PipelineCompileOptions() 182 | 183 | bound_value = array.array( 'f', [0.1, 0.2, 0.3] ) 184 | bound_value_entry = optix.ModuleCompileBoundValueEntry( 185 | pipelineParamOffsetInBytes = 4, 186 | boundValue = bound_value, 187 | annotation = "my_bound_value" 188 | ) 189 | module_opts.boundValues = [ bound_value_entry ] 190 | 191 | if tutil.optix_version_gte( (7,7) ): 192 | mod, log = ctx.moduleCreate( 193 | module_opts, 194 | pipeline_opts, 195 | sample_ptx.hello_ptx, 196 | ) 197 | else: 198 | mod, log = ctx.moduleCreateFromPTX( 199 | module_opts, 200 | pipeline_opts, 201 | sample_ptx.hello_ptx, 202 | ) 203 | 204 | mod.destroy() 205 | ctx.destroy() 206 | 207 | 208 | if tutil.optix_version_gte( (7,1) ): 209 | def test_builtin_is_module_get( self ): 210 | ctx = optix.deviceContextCreate(0, optix.DeviceContextOptions()) 211 | module_opts = optix.ModuleCompileOptions() 212 | pipeline_opts = optix.PipelineCompileOptions() 213 | builtin_is_opts = optix.BuiltinISOptions() 214 | builtin_is_opts.builtinISModuleType = optix.PRIMITIVE_TYPE_TRIANGLE 215 | 216 | is_mod = ctx.builtinISModuleGet( 217 | module_opts, 218 | pipeline_opts, 219 | builtin_is_opts 220 | ) 221 | assert type( is_mod ) is optix.Module 222 | is_mod.destroy() 223 | ctx.destroy() 224 | -------------------------------------------------------------------------------- /examples/denoiser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) 2022 NVIDIA CORPORATION All rights reserved. 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file. 5 | 6 | 7 | 8 | import optix 9 | import Imath 10 | import OpenEXR 11 | import cupy as cp 12 | import cupy.cuda.runtime as cuda 13 | import numpy as np 14 | 15 | import ctypes 16 | 17 | 18 | 19 | #------------------------------------------------------------------------------- 20 | # 21 | # Helpers 22 | # 23 | #------------------------------------------------------------------------------- 24 | 25 | class Logger: 26 | def __init__( self ): 27 | self.num_mssgs = 0 28 | 29 | def __call__( self, level, tag, mssg ): 30 | print( "[{:>2}][{:>12}]: {}".format( level, tag, mssg ) ) 31 | self.num_mssgs += 1 32 | 33 | 34 | class State: 35 | def __init__( self ): 36 | self.tile_size = (0, 0) 37 | self.exposure = 0.0 38 | self.layer = optix.DenoiserLayer() 39 | self.guide_layer = optix.DenoiserGuideLayer() 40 | 41 | #self.scratch_size = 0 42 | self.overlap = 0 43 | 44 | self.d_intensity = 0 # 45 | self.d_scratch = 0 # CUPY RAII memory pointers 46 | self.d_state = 0 # 47 | 48 | 49 | def __str__( self ): 50 | return ( 51 | "w : {}\n".format( self.layer.input.width ) + 52 | "h : {}\n".format( self.layer.input.height ) + 53 | "tile : {}\n".format( self.tile_size ) + 54 | "expos : {}" .format( self.exposure ) 55 | ) 56 | 57 | 58 | def optix_version_gte( version ): 59 | if optix.version()[0] > version[0]: 60 | return True 61 | if optix.version()[0] == version[0] and optix.version()[1] >= version[1]: 62 | return True 63 | return False 64 | 65 | 66 | def create_optix_image_2D( w, h, image ): 67 | oi = optix.Image2D() 68 | byte_size = w*h*4*4 69 | d_mem = cuda.malloc( byte_size ) 70 | if image is not None: 71 | cuda.memcpy( 72 | d_mem, 73 | image.ctypes.data, 74 | byte_size, 75 | cuda.memcpyHostToDevice 76 | ) 77 | oi.data = d_mem 78 | oi.width = w 79 | oi.height = h 80 | oi.rowStrideInBytes = w*4*4 81 | oi.pixelStrideInBytes = 4*4 82 | oi.format = optix.PIXEL_FORMAT_FLOAT4 83 | return oi 84 | 85 | 86 | def free_optix_image_2D( optix_image ): 87 | cuda.free( optix_imae.data ) 88 | oi.data = 0 89 | 90 | 91 | def load_exr( filename ): 92 | exr_file = OpenEXR.InputFile( filename ) 93 | exr_header = exr_file.header() 94 | r,g,b = exr_file.channels("RGB", pixel_type=Imath.PixelType(Imath.PixelType.FLOAT) ) 95 | 96 | dw = exr_header[ "dataWindow" ] 97 | w = dw.max.x - dw.min.x + 1 98 | h = dw.max.y - dw.min.y + 1 99 | 100 | image = np.ones( (h, w, 4), dtype = np.float32 ) 101 | image[:, :, 0] = np.core.multiarray.frombuffer( r, dtype = np.float32 ).reshape(h, w) 102 | image[:, :, 1] = np.core.multiarray.frombuffer( g, dtype = np.float32 ).reshape(h, w) 103 | image[:, :, 2] = np.core.multiarray.frombuffer( b, dtype = np.float32 ).reshape(h, w) 104 | return create_optix_image_2D( w, h, image.flatten() ) 105 | 106 | 107 | def write_exr( filename, optix_image ): 108 | w = optix_image.width 109 | h = optix_image.height 110 | data = np.zeros( (h*w*4), dtype = np.float32 ) 111 | cuda.memcpy( 112 | data.ctypes.data, 113 | optix_image.data, 114 | w*h*4*4, 115 | cuda.memcpyDeviceToHost 116 | ) 117 | exr = OpenEXR.OutputFile( filename, OpenEXR.Header( w, h ) ) 118 | exr.writePixels( { 119 | 'R' : data[0::4].tobytes(), 120 | 'G' : data[1::4].tobytes(), 121 | 'B' : data[2::4].tobytes() 122 | } ) 123 | 124 | 125 | def parse_args(): 126 | 127 | import argparse 128 | parser = argparse.ArgumentParser( 129 | description = 'Apply OptiX denoiser to input images' 130 | ) 131 | parser.add_argument( 132 | '-n', '--normal', 133 | metavar = 'normal.exr', 134 | type = str, 135 | help = 'Screen space normals input' 136 | ) 137 | parser.add_argument( 138 | '-a', '--albedo', 139 | metavar = 'albedo.exr', 140 | type = str, 141 | help = 'Albedo input' 142 | ) 143 | parser.add_argument( 144 | '-o', '--out', 145 | metavar = 'out.exr', 146 | type = str, 147 | help="Output filename, default 'denoised.exr'" , 148 | default='denoised.exr' 149 | ) 150 | parser.add_argument( 151 | '-t', '--tilesize', 152 | metavar='INT', 153 | type = int, 154 | nargs = 2, 155 | help="Output image name.", 156 | default = ( 0, 0 ) 157 | ) 158 | parser.add_argument( 159 | '-e', '--exposure', 160 | metavar = 'FLOAT', 161 | type = float, 162 | help = "Exposure to be applied to output", 163 | default = 1.0 164 | ) 165 | parser.add_argument( 166 | 'color', 167 | metavar = 'color.exr', 168 | type = str, 169 | help = "Noisy color image name." 170 | ) 171 | return parser.parse_args() 172 | 173 | 174 | def load_state( args, state ): 175 | 176 | 177 | print( "Loading color file '{}'".format( args.color) ) 178 | state.layer.input = load_exr( args.color ) 179 | state.layer.output = create_optix_image_2D( state.layer.input.width, state.layer.input.height, None ) 180 | print( " ... success" ) 181 | 182 | if args.normal: 183 | print( "Loading normal file '{}'".format( args.normal) ) 184 | state.guide_layer.normal = load_exr( args.normal ) 185 | w = state.guide_layer.normal.width 186 | h = state.guide_layer.normal.height 187 | if w != state.layer.input.width or h != state.layer.input.height: 188 | print( "ERROR: Normal image dims do not match color image dims" ) 189 | sys.exit(0) 190 | print( " ... success" ) 191 | 192 | if args.albedo: 193 | print( "Loading albedo file '{}'".format( args.albedo) ) 194 | state.guide_layer.albedo = load_exr( args.albedo ) 195 | w = state.guide_layer.albedo.width 196 | h = state.guide_layer.albedo.height 197 | if w != state.layer.input.width or h != state.layer.input.height: 198 | print( "ERROR: Albedo image dims do not match color image dims" ) 199 | sys.exit(0) 200 | print( " ... success" ) 201 | 202 | if args.tilesize[0] <= 0 or args.tilesize[1] <= 0: 203 | state.tile_size = ( 204 | state.layer.input.width, 205 | state.layer.input.height 206 | ) 207 | else: 208 | state.tile_size = args.tilesize 209 | state.exposure = args.exposure 210 | 211 | 212 | 213 | 214 | #------------------------------------------------------------------------------- 215 | # 216 | # Denoising 217 | # 218 | #------------------------------------------------------------------------------- 219 | 220 | def create_ctx(): 221 | print( "Creating optix device context ..." ) 222 | 223 | # Note that log callback data is no longer needed. We can 224 | # instead send a callable class instance as the log-function 225 | # which stores any data needed 226 | global logger 227 | logger = Logger() 228 | 229 | # OptiX param struct fields can be set with optional 230 | # keyword constructor arguments. 231 | ctx_options = optix.DeviceContextOptions( 232 | logCallbackFunction = logger, 233 | logCallbackLevel = 4 234 | ) 235 | 236 | # They can also be set and queried as properties on the struct 237 | if optix_version_gte( (7,2) ): 238 | ctx_options.validationMode = optix.DEVICE_CONTEXT_VALIDATION_MODE_ALL 239 | 240 | cu_ctx = 0 241 | return optix.deviceContextCreate( cu_ctx, ctx_options ) 242 | 243 | 244 | def denoiser_init( ctx, state ): 245 | options = optix.DenoiserOptions() 246 | options.guideAlbedo = 0 if state.guide_layer.albedo.width == 0 else 1 247 | options.guideNormal = 0 if state.guide_layer.normal.width == 0 else 1 248 | denoiser = ctx.denoiserCreate( optix.DENOISER_MODEL_KIND_HDR, options ) 249 | 250 | sizes = denoiser.computeMemoryResources( 251 | state.tile_size[0], 252 | state.tile_size[1] 253 | ) 254 | 255 | if state.tile_size[0] == state.layer.input.width and state.tile_size[0] == state.layer.input.width: 256 | state.scratch_size = sizes.withoutOverlapScratchSizeInBytes 257 | else: 258 | state.scratch_size = sizes.withOverlapScratchSizeInBytes 259 | state.overlap = sizes.overlapWindowSizeInPixels 260 | 261 | state.d_state = cp.empty( ( sizes.stateSizeInBytes ), dtype='B' ) 262 | state.d_intensity = cp.empty( ( 1 ), 'f4' ) 263 | state.d_scratch = cp.empty( ( state.scratch_size ), dtype='B' ) 264 | 265 | denoiser.setup( 266 | 0, 267 | state.tile_size[0] + 2*state.overlap, 268 | state.tile_size[1] + 2*state.overlap, 269 | state.d_state.data.ptr, 270 | state.d_state.nbytes, 271 | state.d_scratch.data.ptr, 272 | state.d_scratch.nbytes 273 | ) 274 | 275 | return denoiser 276 | 277 | 278 | def denoiser_exec( denoiser, state ): 279 | 280 | params = optix.DenoiserParams() 281 | params.denoiseAlpha = 0 282 | params.hdrIntensity = state.d_intensity 283 | params.hdrAverageColor = 0 284 | params.blendFactor = 0.0 285 | 286 | 287 | denoiser.computeIntensity( 288 | 0, 289 | state.layer.input, 290 | state.d_intensity.data.ptr, 291 | state.d_scratch.data.ptr, 292 | state.d_scratch.nbytes 293 | ) 294 | 295 | denoiser.invokeTiled( 296 | 0, # CUDA stream 297 | params, 298 | state.d_state.data.ptr, 299 | state.d_state.nbytes, 300 | state.guide_layer, 301 | [ state.layer ], 302 | state.d_scratch.data.ptr, 303 | state.d_scratch.nbytes, 304 | state.overlap, 305 | state.tile_size[0], 306 | state.tile_size[1] 307 | ) 308 | 309 | 310 | #------------------------------------------------------------------------------- 311 | # 312 | # Main 313 | # 314 | #------------------------------------------------------------------------------- 315 | 316 | def main(): 317 | args = parse_args() 318 | state = State() 319 | load_state( args, state ) 320 | print( "\n-------- State loaded --------" ) 321 | print( state ) 322 | print( "------------------------------\n" ) 323 | 324 | ctx = create_ctx() 325 | denoiser = denoiser_init( ctx, state ) 326 | denoiser_exec( denoiser, state ) 327 | write_exr( args.out, state.layer.output ) 328 | 329 | 330 | 331 | 332 | if __name__ == "__main__": 333 | main() 334 | 335 | -------------------------------------------------------------------------------- /test/util/tutil.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 NVIDIA CORPORATION All rights reserved. 2 | # Use of this source code is governed by a BSD-style 3 | # license that can be found in the LICENSE file. 4 | 5 | 6 | 7 | import optix 8 | import cupy as cp 9 | 10 | ptx_string_old = ''' 11 | // 12 | // Generated by NVIDIA NVVM Compiler 13 | // 14 | // Compiler Build ID: CL-29069683 15 | // Cuda compilation tools, release 11.1, V11.1.74 16 | // Based on LLVM 3.4svn 17 | // 18 | 19 | .version 7.1 20 | .target sm_52 21 | .address_size 64 22 | 23 | // .globl __raygen__hello 24 | .const .align 8 .b8 params[16]; 25 | 26 | .visible .entry __raygen__hello( 27 | 28 | ) 29 | { 30 | .reg .pred %p<4>; 31 | .reg .b16 %rs<5>; 32 | .reg .f32 %f<39>; 33 | .reg .b32 %r<13>; 34 | .reg .b64 %rd<6>; 35 | 36 | 37 | // inline asm 38 | call (%r1), _optix_get_launch_index_x, (); 39 | // inline asm 40 | // inline asm 41 | call (%r2), _optix_get_launch_index_y, (); 42 | // inline asm 43 | // inline asm 44 | call (%rd1), _optix_get_sbt_data_ptr_64, (); 45 | // inline asm 46 | ld.const.u64 %rd2, [params]; 47 | cvta.to.global.u64 %rd3, %rd2; 48 | ld.const.u32 %r4, [params+8]; 49 | mad.lo.s32 %r5, %r4, %r2, %r1; 50 | ld.f32 %f1, [%rd1]; 51 | ld.f32 %f2, [%rd1+4]; 52 | ld.f32 %f3, [%rd1+8]; 53 | mov.f32 %f4, 0f3F800000; 54 | min.ftz.f32 %f5, %f1, %f4; 55 | mov.f32 %f6, 0f00000000; 56 | max.ftz.f32 %f7, %f6, %f5; 57 | min.ftz.f32 %f8, %f2, %f4; 58 | max.ftz.f32 %f9, %f6, %f8; 59 | min.ftz.f32 %f10, %f3, %f4; 60 | max.ftz.f32 %f11, %f6, %f10; 61 | lg2.approx.ftz.f32 %f12, %f7; 62 | mul.ftz.f32 %f13, %f12, 0f3ED55555; 63 | ex2.approx.ftz.f32 %f14, %f13; 64 | lg2.approx.ftz.f32 %f15, %f9; 65 | mul.ftz.f32 %f16, %f15, 0f3ED55555; 66 | ex2.approx.ftz.f32 %f17, %f16; 67 | lg2.approx.ftz.f32 %f18, %f11; 68 | mul.ftz.f32 %f19, %f18, 0f3ED55555; 69 | ex2.approx.ftz.f32 %f20, %f19; 70 | setp.lt.ftz.f32 %p1, %f7, 0f3B4D2E1C; 71 | mul.ftz.f32 %f21, %f7, 0f414EB852; 72 | fma.rn.ftz.f32 %f22, %f14, 0f3F870A3D, 0fBD6147AE; 73 | selp.f32 %f23, %f21, %f22, %p1; 74 | setp.lt.ftz.f32 %p2, %f9, 0f3B4D2E1C; 75 | mul.ftz.f32 %f24, %f9, 0f414EB852; 76 | fma.rn.ftz.f32 %f25, %f17, 0f3F870A3D, 0fBD6147AE; 77 | selp.f32 %f26, %f24, %f25, %p2; 78 | setp.lt.ftz.f32 %p3, %f11, 0f3B4D2E1C; 79 | mul.ftz.f32 %f27, %f11, 0f414EB852; 80 | fma.rn.ftz.f32 %f28, %f20, 0f3F870A3D, 0fBD6147AE; 81 | selp.f32 %f29, %f27, %f28, %p3; 82 | min.ftz.f32 %f30, %f23, %f4; 83 | max.ftz.f32 %f31, %f6, %f30; 84 | mul.ftz.f32 %f32, %f31, 0f43800000; 85 | cvt.rzi.ftz.u32.f32 %r6, %f32; 86 | mov.u32 %r7, 255; 87 | min.u32 %r8, %r6, %r7; 88 | min.ftz.f32 %f33, %f26, %f4; 89 | max.ftz.f32 %f34, %f6, %f33; 90 | mul.ftz.f32 %f35, %f34, 0f43800000; 91 | cvt.rzi.ftz.u32.f32 %r9, %f35; 92 | min.u32 %r10, %r9, %r7; 93 | min.ftz.f32 %f36, %f29, %f4; 94 | max.ftz.f32 %f37, %f6, %f36; 95 | mul.ftz.f32 %f38, %f37, 0f43800000; 96 | cvt.rzi.ftz.u32.f32 %r11, %f38; 97 | min.u32 %r12, %r11, %r7; 98 | mul.wide.u32 %rd4, %r5, 4; 99 | add.s64 %rd5, %rd3, %rd4; 100 | cvt.u16.u32 %rs1, %r12; 101 | cvt.u16.u32 %rs2, %r10; 102 | cvt.u16.u32 %rs3, %r8; 103 | mov.u16 %rs4, 255; 104 | st.global.v4.u8 [%rd5], {%rs3, %rs2, %rs1, %rs4}; 105 | ret; 106 | } 107 | ''' 108 | 109 | ptx_string = ''' 110 | 111 | // 112 | // Generated by NVIDIA NVVM Compiler 113 | // 114 | // Compiler Build ID: CL-29373293 115 | // Cuda compilation tools, release 11.2, V11.2.67 116 | // Based on NVVM 7.0.1 117 | // 118 | 119 | .version 7.2 120 | .target sm_60 121 | .address_size 64 122 | 123 | // .globl __raygen__hello 124 | .visible .const .align 8 .b8 params[16]; 125 | 126 | .visible .entry __raygen__hello() 127 | { 128 | .reg .pred %p<4>; 129 | .reg .b16 %rs<5>; 130 | .reg .f32 %f<39>; 131 | .reg .b32 %r<12>; 132 | .reg .b64 %rd<6>; 133 | .loc 1 39 0 134 | Lfunc_begin0: 135 | .loc 1 39 0 136 | 137 | 138 | .loc 1 41 26 139 | .loc 2 5675 5, function_name Linfo_string0, inlined_at 1 41 26 140 | // begin inline asm 141 | call (%r1), _optix_get_launch_index_x, (); 142 | // end inline asm 143 | .loc 2 5676 5, function_name Linfo_string0, inlined_at 1 41 26 144 | // begin inline asm 145 | call (%r2), _optix_get_launch_index_y, (); 146 | // end inline asm 147 | Ltmp0: 148 | .loc 1 42 39 149 | .loc 2 5703 5, function_name Linfo_string1, inlined_at 1 42 39 150 | // begin inline asm 151 | call (%rd1), _optix_get_sbt_data_ptr_64, (); 152 | // end inline asm 153 | Ltmp1: 154 | .loc 1 43 5 155 | ld.const.u64 %rd2, [params]; 156 | cvta.to.global.u64 %rd3, %rd2; 157 | ld.const.u32 %r4, [params+8]; 158 | mad.lo.s32 %r5, %r4, %r2, %r1; 159 | ld.f32 %f1, [%rd1]; 160 | ld.f32 %f2, [%rd1+4]; 161 | ld.f32 %f3, [%rd1+8]; 162 | .loc 3 121 22 163 | mov.f32 %f4, 0f3F800000; 164 | min.ftz.f32 %f5, %f1, %f4; 165 | .loc 3 121 12 166 | mov.f32 %f6, 0f00000000; 167 | max.ftz.f32 %f7, %f6, %f5; 168 | .loc 3 121 22 169 | min.ftz.f32 %f8, %f2, %f4; 170 | .loc 3 121 12 171 | max.ftz.f32 %f9, %f6, %f8; 172 | .loc 3 121 22 173 | min.ftz.f32 %f10, %f3, %f4; 174 | .loc 3 121 12 175 | max.ftz.f32 %f11, %f6, %f10; 176 | .loc 4 38 33 177 | lg2.approx.ftz.f32 %f12, %f7; 178 | mul.ftz.f32 %f13, %f12, 0f3ED55555; 179 | ex2.approx.ftz.f32 %f14, %f13; 180 | .loc 4 38 56 181 | lg2.approx.ftz.f32 %f15, %f9; 182 | mul.ftz.f32 %f16, %f15, 0f3ED55555; 183 | ex2.approx.ftz.f32 %f17, %f16; 184 | .loc 4 38 79 185 | lg2.approx.ftz.f32 %f18, %f11; 186 | mul.ftz.f32 %f19, %f18, 0f3ED55555; 187 | ex2.approx.ftz.f32 %f20, %f19; 188 | setp.lt.ftz.f32 %p1, %f7, 0f3B4D2E1C; 189 | mul.ftz.f32 %f21, %f7, 0f414EB852; 190 | fma.rn.ftz.f32 %f22, %f14, 0f3F870A3D, 0fBD6147AE; 191 | selp.f32 %f23, %f21, %f22, %p1; 192 | setp.lt.ftz.f32 %p2, %f9, 0f3B4D2E1C; 193 | mul.ftz.f32 %f24, %f9, 0f414EB852; 194 | fma.rn.ftz.f32 %f25, %f17, 0f3F870A3D, 0fBD6147AE; 195 | selp.f32 %f26, %f24, %f25, %p2; 196 | setp.lt.ftz.f32 %p3, %f11, 0f3B4D2E1C; 197 | mul.ftz.f32 %f27, %f11, 0f414EB852; 198 | fma.rn.ftz.f32 %f28, %f20, 0f3F870A3D, 0fBD6147AE; 199 | selp.f32 %f29, %f27, %f28, %p3; 200 | Ltmp2: 201 | .loc 4 61 25 202 | .loc 3 121 22, function_name Linfo_string2, inlined_at 4 61 25 203 | min.ftz.f32 %f30, %f23, %f4; 204 | .loc 3 121 12, function_name Linfo_string2, inlined_at 4 61 25 205 | max.ftz.f32 %f31, %f6, %f30; 206 | .loc 4 54 5, function_name Linfo_string2, inlined_at 4 61 25 207 | mul.ftz.f32 %f32, %f31, 0f43800000; 208 | cvt.rzi.ftz.u32.f32 %r6, %f32; 209 | .loc 5 870 10, function_name Linfo_string2, inlined_at 4 61 25 210 | min.u32 %r7, %r6, 255; 211 | Ltmp3: 212 | .loc 4 61 58 213 | .loc 3 121 22, function_name Linfo_string2, inlined_at 4 61 58 214 | min.ftz.f32 %f33, %f26, %f4; 215 | .loc 3 121 12, function_name Linfo_string2, inlined_at 4 61 58 216 | max.ftz.f32 %f34, %f6, %f33; 217 | .loc 4 54 5, function_name Linfo_string2, inlined_at 4 61 58 218 | mul.ftz.f32 %f35, %f34, 0f43800000; 219 | cvt.rzi.ftz.u32.f32 %r8, %f35; 220 | .loc 5 870 10, function_name Linfo_string2, inlined_at 4 61 58 221 | min.u32 %r9, %r8, 255; 222 | Ltmp4: 223 | .loc 4 61 91 224 | .loc 3 121 22, function_name Linfo_string2, inlined_at 4 61 91 225 | min.ftz.f32 %f36, %f29, %f4; 226 | .loc 3 121 12, function_name Linfo_string2, inlined_at 4 61 91 227 | max.ftz.f32 %f37, %f6, %f36; 228 | .loc 4 54 5, function_name Linfo_string2, inlined_at 4 61 91 229 | mul.ftz.f32 %f38, %f37, 0f43800000; 230 | cvt.rzi.ftz.u32.f32 %r10, %f38; 231 | .loc 5 870 10, function_name Linfo_string2, inlined_at 4 61 91 232 | min.u32 %r11, %r10, 255; 233 | Ltmp5: 234 | .loc 4 61 91 235 | mul.wide.u32 %rd4, %r5, 4; 236 | add.s64 %rd5, %rd3, %rd4; 237 | Ltmp6: 238 | .loc 5 870 10, function_name Linfo_string2, inlined_at 4 61 91 239 | cvt.u16.u32 %rs1, %r11; 240 | Ltmp7: 241 | .loc 5 870 10, function_name Linfo_string2, inlined_at 4 61 58 242 | cvt.u16.u32 %rs2, %r9; 243 | Ltmp8: 244 | .loc 5 870 10, function_name Linfo_string2, inlined_at 4 61 25 245 | cvt.u16.u32 %rs3, %r7; 246 | Ltmp9: 247 | .loc 4 61 91 248 | mov.u16 %rs4, 255; 249 | st.global.v4.u8 [%rd5], {%rs3, %rs2, %rs1, %rs4}; 250 | .loc 1 45 1 251 | ret; 252 | Ltmp10: 253 | Lfunc_end0: 254 | 255 | } 256 | // .globl __anyhit__noop 257 | .visible .entry __anyhit__noop() 258 | { 259 | 260 | .loc 1 48 0 261 | Lfunc_begin1: 262 | .loc 1 48 0 263 | 264 | 265 | .loc 1 48 48 266 | ret; 267 | Ltmp11: 268 | Lfunc_end1: 269 | 270 | } 271 | // .globl __closesthit__noop 272 | .visible .entry __closesthit__noop() 273 | { 274 | 275 | .loc 1 51 0 276 | Lfunc_begin2: 277 | .loc 1 51 0 278 | 279 | 280 | .loc 1 51 52 281 | ret; 282 | Ltmp12: 283 | Lfunc_end2: 284 | 285 | } 286 | // .globl __intersection__noop 287 | .visible .entry __intersection__noop() 288 | { 289 | 290 | .loc 1 54 0 291 | Lfunc_begin3: 292 | .loc 1 54 0 293 | 294 | 295 | .loc 1 54 55 296 | ret; 297 | Ltmp13: 298 | Lfunc_end3: 299 | 300 | } 301 | // .globl __intersect__noop 302 | .visible .entry __intersect__noop() 303 | { 304 | 305 | .loc 1 57 0 306 | Lfunc_begin4: 307 | .loc 1 57 0 308 | 309 | 310 | .loc 1 57 52 311 | ret; 312 | Ltmp14: 313 | Lfunc_end4: 314 | 315 | } 316 | // .globl __miss__noop 317 | .visible .entry __miss__noop() 318 | { 319 | 320 | .loc 1 60 0 321 | Lfunc_begin5: 322 | .loc 1 60 0 323 | 324 | 325 | .loc 1 60 47 326 | ret; 327 | Ltmp15: 328 | Lfunc_end5: 329 | 330 | } 331 | // .globl __direct_callable__noop 332 | .visible .entry __direct_callable__noop() 333 | { 334 | 335 | .loc 1 63 0 336 | Lfunc_begin6: 337 | .loc 1 63 0 338 | 339 | 340 | .loc 1 63 58 341 | ret; 342 | Ltmp16: 343 | Lfunc_end6: 344 | 345 | } 346 | // .globl __continuation_callable__noop 347 | .visible .entry __continuation_callable__noop() 348 | { 349 | 350 | .loc 1 66 0 351 | Lfunc_begin7: 352 | .loc 1 66 0 353 | 354 | 355 | .loc 1 66 64 356 | ret; 357 | Ltmp17: 358 | Lfunc_end7: 359 | 360 | } 361 | .file 1 "/home/kmorley/Code/optix_sdk/samples_exp/optixHello/draw_solid_color.cu" 362 | .file 2 "/home/kmorley/Code/optix_sdk/include/internal/optix_7_device_impl.h" 363 | .file 3 "/home/kmorley/Code/optix_sdk/samples_exp/sutil/vec_math.h" 364 | .file 4 "/home/kmorley/Code/optix_sdk/samples_exp/cuda/helpers.h" 365 | .file 5 "/usr/local/cuda/include/crt/math_functions.hpp" 366 | .section .debug_str 367 | { 368 | Linfo_string0: 369 | .b8 95,90,78,55,51,95,73,78,84,69,82,78,65,76,95,53,49,95,116,109,112,120,102,116,95,48,48,49,48,102,48,57,54,95,48,48,48,48,48,48 370 | .b8 48,48,95,55,95,100,114,97,119,95,115,111,108,105,100,95,99,111,108,111,114,95,99,112,112,49,95,105,105,95,51,101,52,98,52,55,50,54,49,57 371 | .b8 111,112,116,105,120,71,101,116,76,97,117,110,99,104,73,110,100,101,120,69,118,0 372 | Linfo_string1: 373 | .b8 95,90,78,55,51,95,73,78,84,69,82,78,65,76,95,53,49,95,116,109,112,120,102,116,95,48,48,49,48,102,48,57,54,95,48,48,48,48,48,48 374 | .b8 48,48,95,55,95,100,114,97,119,95,115,111,108,105,100,95,99,111,108,111,114,95,99,112,112,49,95,105,105,95,51,101,52,98,52,55,50,54,50,50 375 | .b8 111,112,116,105,120,71,101,116,83,98,116,68,97,116,97,80,111,105,110,116,101,114,69,118,0 376 | Linfo_string2: 377 | .b8 95,90,50,49,113,117,97,110,116,105,122,101,85,110,115,105,103,110,101,100,56,66,105,116,115,102,0 378 | 379 | } 380 | ''' 381 | 382 | class Logger: 383 | def __init__( self ): 384 | self.num_mssgs = 0 385 | 386 | def __call__( self, level, tag, mssg ): 387 | print( "[{:>2}][{:>12}]: {}".format( level, tag, mssg ) ) 388 | self.num_mssgs += 1 389 | 390 | 391 | def log_callback( level, tag, mssg ): 392 | print( "[{:>2}][{:>12}]: {}".format( level, tag, mssg ) ) 393 | 394 | 395 | def create_default_ctx(): 396 | ctx_options = optix.DeviceContextOptions() 397 | 398 | cu_ctx = 0 399 | return optix.deviceContextCreate( cu_ctx, ctx_options ) 400 | 401 | 402 | def optix_version_gte( version ): 403 | if optix.version()[0] > version[0]: 404 | return True 405 | if optix.version()[0] == version[0] and optix.version()[1] >= version[1]: 406 | return True 407 | return False 408 | 409 | 410 | def default_debug_level(): 411 | if optix_version_gte( (7,1) ): 412 | return optix.COMPILE_DEBUG_LEVEL_DEFAULT 413 | else: 414 | return optix.COMPILE_DEBUG_LEVEL_LINEINFO 415 | 416 | 417 | def create_default_module(): 418 | ctx = create_default_ctx(); 419 | module_opts = optix.ModuleCompileOptions() 420 | pipeline_opts = optix.PipelineCompileOptions() 421 | mod, log = ctx.moduleCreateFromPTX( 422 | module_opts, 423 | pipeline_opts, 424 | ptx_string, 425 | ) 426 | return ( ctx, mod ) 427 | -------------------------------------------------------------------------------- /examples/hello.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) 2022 NVIDIA CORPORATION All rights reserved. 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file. 5 | 6 | 7 | 8 | import optix 9 | import os 10 | import cupy as cp # CUDA bindings 11 | import numpy as np # Packing of structures in C-compatible format 12 | 13 | import array 14 | import ctypes # C interop helpers 15 | from PIL import Image # Image IO 16 | #from pynvrtc.compiler import Program 17 | from cuda.bindings import runtime, nvrtc 18 | 19 | import path_util 20 | 21 | 22 | #------------------------------------------------------------------------------- 23 | # 24 | # Util 25 | # 26 | #------------------------------------------------------------------------------- 27 | pix_width = 512 28 | pix_height = 512 29 | 30 | 31 | 32 | def checkNVRTC(result, prog = None): 33 | if result[0].value: 34 | if prog: 35 | (res, logsize) = nvrtc.nvrtcGetProgramLogSize(prog) 36 | if not res.value: 37 | log = b" " * logsize 38 | nvrtc.nvrtcGetProgramLog(prog, log) 39 | print(log.decode()) 40 | raise RuntimeError("NVRTC error code={}({})".format(result[0].value, nvrtc.nvrtcGetErrorString(result[0])[1])) 41 | if len(result) == 1: 42 | return None 43 | elif len(result) == 2: 44 | return result[1] 45 | else: 46 | return result[1:] 47 | 48 | 49 | class Logger: 50 | def __init__( self ): 51 | self.num_mssgs = 0 52 | 53 | def __call__( self, level, tag, mssg ): 54 | print( "[{:>2}][{:>12}]: {}".format( level, tag, mssg ) ) 55 | self.num_mssgs += 1 56 | 57 | 58 | def log_callback( level, tag, mssg ): 59 | print( "[{:>2}][{:>12}]: {}".format( level, tag, mssg ) ) 60 | 61 | 62 | def round_up( val, mult_of ): 63 | return val if val % mult_of == 0 else val + mult_of - val % mult_of 64 | 65 | 66 | def get_aligned_itemsize( formats, alignment ): 67 | names = [] 68 | for i in range( len(formats ) ): 69 | names.append( 'x'+str(i) ) 70 | 71 | temp_dtype = np.dtype( { 72 | 'names' : names, 73 | 'formats' : formats, 74 | 'align' : True 75 | } ) 76 | return round_up( temp_dtype.itemsize, alignment ) 77 | 78 | 79 | def optix_version_gte( version ): 80 | if optix.version()[0] > version[0]: 81 | return True 82 | if optix.version()[0] == version[0] and optix.version()[1] >= version[1]: 83 | return True 84 | return False 85 | 86 | 87 | def array_to_device_memory( numpy_array, stream=cp.cuda.Stream() ): 88 | 89 | byte_size = numpy_array.size*numpy_array.dtype.itemsize 90 | 91 | h_ptr = ctypes.c_void_p( numpy_array.ctypes.data ) 92 | d_mem = cp.cuda.memory.alloc( byte_size ) 93 | d_mem.copy_from_async( h_ptr, byte_size, stream ) 94 | return d_mem 95 | 96 | 97 | def compile_cuda( cuda_file ): 98 | compile_options = [ 99 | b'-use_fast_math', 100 | b'-lineinfo', 101 | b'-default-device', 102 | b'-std=c++11', 103 | b'-rdc', 104 | b'true', 105 | f'-I{path_util.include_path}'.encode(), 106 | f'-I{path_util.cuda_tk_path}'.encode() 107 | ] 108 | # Optix 7.0 compiles need path to system stddef.h 109 | # the value of optix.stddef_path is compiled in constant. When building 110 | # the module, the value can be specified via an environment variable, e.g. 111 | # export PYOPTIX_STDDEF_DIR="/usr/include/linux" 112 | if not optix_version_gte( (7,1) ): 113 | compile_options.append( f'-I{path_util.stddef_path}' ) 114 | print("pynvrtc compile options = {}".format(compile_options)) 115 | 116 | with open( cuda_file, 'rb' ) as f: 117 | src = f.read() 118 | 119 | # Create program 120 | prog = checkNVRTC(nvrtc.nvrtcCreateProgram(src, cuda_file.encode(), 0, [], [])) 121 | 122 | # Compile program 123 | checkNVRTC(nvrtc.nvrtcCompileProgram(prog, len(compile_options), compile_options), prog) 124 | 125 | # Get PTX from compilation 126 | ptxSize = checkNVRTC(nvrtc.nvrtcGetPTXSize(prog)) 127 | ptx = b" " * ptxSize 128 | checkNVRTC(nvrtc.nvrtcGetPTX(prog, ptx)) 129 | return ptx 130 | 131 | 132 | #------------------------------------------------------------------------------- 133 | # 134 | # Optix setup 135 | # 136 | #------------------------------------------------------------------------------- 137 | 138 | def create_ctx(): 139 | print( "Creating optix device context ..." ) 140 | 141 | # Note that log callback data is no longer needed. We can 142 | # instead send a callable class instance as the log-function 143 | # which stores any data needed 144 | global logger 145 | logger = Logger() 146 | 147 | # OptiX param struct fields can be set with optional 148 | # keyword constructor arguments. 149 | ctx_options = optix.DeviceContextOptions( 150 | logCallbackFunction = logger, 151 | logCallbackLevel = 4 152 | ) 153 | 154 | # They can also be set and queried as properties on the struct 155 | if optix.version()[1] >= 2: 156 | ctx_options.validationMode = optix.DEVICE_CONTEXT_VALIDATION_MODE_ALL 157 | 158 | cu_ctx = 0 159 | return optix.deviceContextCreate( cu_ctx, ctx_options ) 160 | 161 | 162 | def set_pipeline_options(): 163 | return optix.PipelineCompileOptions( 164 | usesMotionBlur = False, 165 | traversableGraphFlags = 166 | optix.TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING, 167 | numPayloadValues = 2, 168 | numAttributeValues = 2, 169 | exceptionFlags = optix.EXCEPTION_FLAG_NONE, 170 | pipelineLaunchParamsVariableName = "params" 171 | ) 172 | 173 | 174 | def create_module( ctx, pipeline_options, hello_ptx ): 175 | print( "Creating optix module ..." ) 176 | 177 | formats = ['u8', 'u4'] 178 | itemsize = get_aligned_itemsize( formats, 16 ) 179 | params_dtype = np.dtype( { 180 | 'names' : ['image', 'image_width' ], 181 | 'formats' : formats, 182 | 'itemsize': itemsize, 183 | 'align' : True 184 | } ) 185 | 186 | if optix_version_gte( (7,2) ): 187 | bound_value = array.array( 'i', [pix_width] ) 188 | bound_value_entry = optix.ModuleCompileBoundValueEntry( 189 | pipelineParamOffsetInBytes = params_dtype.fields['image_width'][1], 190 | boundValue = bound_value, 191 | annotation = "my_bound_value" 192 | ) 193 | 194 | module_options = optix.ModuleCompileOptions( 195 | maxRegisterCount = optix.COMPILE_DEFAULT_MAX_REGISTER_COUNT, 196 | optLevel = optix.COMPILE_OPTIMIZATION_DEFAULT, 197 | boundValues = [ bound_value_entry ], 198 | debugLevel = optix.COMPILE_DEBUG_LEVEL_DEFAULT 199 | ) 200 | else: 201 | module_options = optix.ModuleCompileOptions( 202 | maxRegisterCount = optix.COMPILE_DEFAULT_MAX_REGISTER_COUNT, 203 | optLevel = optix.COMPILE_OPTIMIZATION_DEFAULT, 204 | debugLevel = optix.COMPILE_DEBUG_LEVEL_DEFAULT 205 | ) 206 | 207 | module, log = ctx.moduleCreate( 208 | module_options, 209 | pipeline_options, 210 | hello_ptx 211 | ) 212 | print( "\tModule create log: <<<{}>>>".format( log ) ) 213 | return module 214 | 215 | 216 | def create_program_groups( ctx, module ): 217 | print( "Creating program groups ... " ) 218 | 219 | raygen_prog_group_desc = optix.ProgramGroupDesc() 220 | raygen_prog_group_desc.raygenModule = module 221 | raygen_prog_group_desc.raygenEntryFunctionName = "__raygen__hello" 222 | 223 | log = None 224 | raygen_prog_group = None 225 | if optix_version_gte( (7,4) ): 226 | # ProgramGroupOptions introduced in OptiX 7.4 227 | program_group_options = optix.ProgramGroupOptions() 228 | raygen_prog_groups, log = ctx.programGroupCreate( 229 | [ raygen_prog_group_desc ], 230 | program_group_options, 231 | ) 232 | else: 233 | raygen_prog_groups, log = ctx.programGroupCreate( 234 | [ raygen_prog_group_desc ] 235 | ) 236 | print( "\tProgramGroup raygen create log: <<<{}>>>".format( log ) ) 237 | 238 | miss_prog_group_desc = optix.ProgramGroupDesc( missEntryFunctionName = "") 239 | program_group_options = optix.ProgramGroupOptions() 240 | miss_prog_groups, log = ctx.programGroupCreate( 241 | [ miss_prog_group_desc ] 242 | # Even in 7.4+, the OptixProgramGroupOptions param is optional 243 | ) 244 | print( "\tProgramGroup miss create log: <<<{}>>>".format( log ) ) 245 | 246 | return ( raygen_prog_groups[0], miss_prog_groups[0] ) 247 | 248 | 249 | def create_pipeline( ctx, raygen_prog_group, pipeline_compile_options ): 250 | print( "Creating pipeline ... " ) 251 | pipeline_link_options = optix.PipelineLinkOptions() 252 | pipeline_link_options.maxTraceDepth = 0 253 | 254 | log = "" 255 | pipeline = ctx.pipelineCreate( 256 | pipeline_compile_options, 257 | pipeline_link_options, 258 | [raygen_prog_group], 259 | log 260 | ) 261 | 262 | stack_sizes = optix.StackSizes() 263 | if optix_version_gte( (7,7) ): 264 | optix.util.accumulateStackSizes( raygen_prog_group, stack_sizes, pipeline ) 265 | else: 266 | optix.util.accumulateStackSizes( raygen_prog_group, stack_sizes ) 267 | 268 | (dc_stack_size_from_trav, dc_stack_size_from_state, cc_stack_size) = \ 269 | optix.util.computeStackSizes( 270 | stack_sizes, 271 | 0, # maxTraceDepth 272 | 0, # maxCCDepth 273 | 0 # maxDCDepth 274 | ) 275 | 276 | pipeline.setStackSize( 277 | dc_stack_size_from_trav, 278 | dc_stack_size_from_state, 279 | cc_stack_size, 280 | 2 # maxTraversableDepth 281 | ) 282 | 283 | return pipeline 284 | 285 | 286 | def create_sbt( raygen_prog_group, miss_prog_group ): 287 | print( "Creating sbt ... " ) 288 | 289 | global d_raygen_sbt 290 | global d_miss_sbt 291 | 292 | header_format = '{}B'.format( optix.SBT_RECORD_HEADER_SIZE ) 293 | 294 | # 295 | # raygen record 296 | # 297 | formats = [ header_format, 'f4', 'f4', 'f4' ] 298 | itemsize = get_aligned_itemsize( formats, optix.SBT_RECORD_ALIGNMENT ) 299 | dtype = np.dtype( { 300 | 'names' : ['header', 'r', 'g', 'b' ], 301 | 'formats' : formats, 302 | 'itemsize': itemsize, 303 | 'align' : True 304 | } ) 305 | h_raygen_sbt = np.array( [ (0, 0.462, 0.725, 0.0 ) ], dtype=dtype ) 306 | optix.sbtRecordPackHeader( raygen_prog_group, h_raygen_sbt ) 307 | d_raygen_sbt = array_to_device_memory( h_raygen_sbt ) 308 | 309 | # 310 | # miss record 311 | # 312 | formats = [ header_format, 'i4'] 313 | itemsize = get_aligned_itemsize( formats, optix.SBT_RECORD_ALIGNMENT ) 314 | dtype = np.dtype( { 315 | 'names' : ['header', 'x' ], 316 | 'formats' : formats, 317 | 'itemsize': itemsize, 318 | 'align' : True 319 | } ) 320 | h_miss_sbt = np.array( [ (0, 127 ) ], dtype=dtype ) 321 | optix.sbtRecordPackHeader( miss_prog_group, h_miss_sbt ) 322 | d_miss_sbt = array_to_device_memory( h_miss_sbt ) 323 | 324 | sbt = optix.ShaderBindingTable() 325 | sbt.raygenRecord = d_raygen_sbt.ptr 326 | sbt.missRecordBase = d_miss_sbt.ptr 327 | sbt.missRecordStrideInBytes = h_miss_sbt.dtype.itemsize 328 | sbt.missRecordCount = 1 329 | return sbt 330 | 331 | 332 | def launch( pipeline, sbt ): 333 | print( "Launching ... " ) 334 | 335 | pix_bytes = pix_width*pix_height*4 336 | 337 | h_pix = np.zeros( (pix_width,pix_height,4), 'B' ) 338 | h_pix[0:256, 0:256] = [255, 128, 0, 255] 339 | d_pix = cp.array( h_pix ) 340 | 341 | formats = ['u8', 'u4'] 342 | itemsize = get_aligned_itemsize( formats, 8 ) 343 | params_dtype = np.dtype( { 344 | 'names' : ['image', 'image_width' ], 345 | 'formats' : formats, 346 | 'itemsize': itemsize, 347 | 'align' : True 348 | } ) 349 | h_params = np.array( [ ( d_pix.data.ptr, pix_width ) ], dtype=params_dtype ) 350 | d_params = array_to_device_memory( h_params ) 351 | 352 | stream = cp.cuda.Stream() 353 | optix.launch( 354 | pipeline, 355 | stream.ptr, 356 | d_params.ptr, 357 | h_params.dtype.itemsize, 358 | sbt, 359 | pix_width, 360 | pix_height, 361 | 1 # depth 362 | ) 363 | 364 | stream.synchronize() 365 | 366 | h_pix = cp.asnumpy( d_pix ) 367 | return h_pix 368 | 369 | 370 | #------------------------------------------------------------------------------- 371 | # 372 | # main 373 | # 374 | #------------------------------------------------------------------------------- 375 | 376 | 377 | def main(): 378 | print( "PyOptiX: OptiX SDK {}".format(optix.version())) 379 | ctx = create_ctx() 380 | 381 | hello_cu = os.path.join(os.path.dirname(__file__), 'hello.cu') 382 | hello_ptx = compile_cuda(hello_cu) 383 | pipeline_options = set_pipeline_options() 384 | module = create_module( ctx, pipeline_options, hello_ptx ) 385 | 386 | raygen_prog_group, miss_prog_group = create_program_groups( ctx, module ) 387 | pipeline = create_pipeline( ctx, raygen_prog_group, pipeline_options ) 388 | sbt = create_sbt( raygen_prog_group, miss_prog_group ) 389 | pix = launch( pipeline, sbt ) 390 | 391 | print( "Total number of log messages: {}".format( logger.num_mssgs ) ) 392 | 393 | img = Image.fromarray( pix, 'RGBA' ) 394 | img.save( 'my.png' ) 395 | img.show() 396 | 397 | 398 | if __name__ == "__main__": 399 | main() 400 | -------------------------------------------------------------------------------- /examples/triangle.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) 2022 NVIDIA CORPORATION All rights reserved. 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file. 5 | 6 | 7 | 8 | import optix 9 | import os 10 | import cupy as cp # CUDA bindings 11 | import numpy as np # Packing of structures in C-compatible format 12 | 13 | import array 14 | import ctypes # C interop helpers 15 | from PIL import Image, ImageOps # Image IO 16 | from cuda.bindings import nvrtc 17 | 18 | import path_util 19 | 20 | 21 | #------------------------------------------------------------------------------- 22 | # 23 | # Util 24 | # 25 | #------------------------------------------------------------------------------- 26 | pix_width = 1024 27 | pix_height = 768 28 | 29 | def checkNVRTC(result, prog = None): 30 | if result[0].value: 31 | if prog: 32 | (res, logsize) = nvrtc.nvrtcGetProgramLogSize(prog) 33 | if not res.value: 34 | log = b" " * logsize 35 | nvrtc.nvrtcGetProgramLog(prog, log) 36 | print(log.decode()) 37 | raise RuntimeError("NVRTC error code={}({})".format(result[0].value, nvrtc.nvrtcGetErrorString(result[0])[1])) 38 | if len(result) == 1: 39 | return None 40 | elif len(result) == 2: 41 | return result[1] 42 | else: 43 | return result[1:] 44 | 45 | 46 | class Logger: 47 | def __init__( self ): 48 | self.num_mssgs = 0 49 | 50 | def __call__( self, level, tag, mssg ): 51 | print( "[{:>2}][{:>12}]: {}".format( level, tag, mssg ) ) 52 | self.num_mssgs += 1 53 | 54 | 55 | def log_callback( level, tag, mssg ): 56 | print( "[{:>2}][{:>12}]: {}".format( level, tag, mssg ) ) 57 | 58 | 59 | def round_up( val, mult_of ): 60 | return val if val % mult_of == 0 else val + mult_of - val % mult_of 61 | 62 | 63 | def get_aligned_itemsize( formats, alignment ): 64 | names = [] 65 | for i in range( len(formats ) ): 66 | names.append( 'x'+str(i) ) 67 | 68 | temp_dtype = np.dtype( { 69 | 'names' : names, 70 | 'formats' : formats, 71 | 'align' : True 72 | } ) 73 | return round_up( temp_dtype.itemsize, alignment ) 74 | 75 | 76 | def optix_version_gte( version ): 77 | if optix.version()[0] > version[0]: 78 | return True 79 | if optix.version()[0] == version[0] and optix.version()[1] >= version[1]: 80 | return True 81 | return False 82 | 83 | 84 | def array_to_device_memory( numpy_array, stream=cp.cuda.Stream() ): 85 | 86 | byte_size = numpy_array.size*numpy_array.dtype.itemsize 87 | 88 | h_ptr = ctypes.c_void_p( numpy_array.ctypes.data ) 89 | d_mem = cp.cuda.memory.alloc( byte_size ) 90 | d_mem.copy_from_async( h_ptr, byte_size, stream ) 91 | return d_mem 92 | 93 | 94 | def compile_cuda( cuda_file ): 95 | compile_options = [ 96 | b'-use_fast_math', 97 | b'-lineinfo', 98 | b'-default-device', 99 | b'-std=c++11', 100 | b'-rdc', 101 | b'true', 102 | f'-I{path_util.include_path}'.encode(), 103 | f'-I{path_util.cuda_tk_path}'.encode() 104 | ] 105 | # Optix 7.0 compiles need path to system stddef.h 106 | # the value of optix.stddef_path is compiled in constant. When building 107 | # the module, the value can be specified via an environment variable, e.g. 108 | # export PYOPTIX_STDDEF_DIR="/usr/include/linux" 109 | if not optix_version_gte( (7,1) ): 110 | compile_options.append( f'-I{path_util.stddef_path}' ) 111 | print("pynvrtc compile options = {}".format(compile_options)) 112 | 113 | with open( cuda_file, 'rb' ) as f: 114 | src = f.read() 115 | 116 | # Create program 117 | prog = checkNVRTC(nvrtc.nvrtcCreateProgram(src, cuda_file.encode(), 0, [], [])) 118 | 119 | # Compile program 120 | checkNVRTC(nvrtc.nvrtcCompileProgram(prog, len(compile_options), compile_options), prog) 121 | 122 | # Get PTX from compilation 123 | ptxSize = checkNVRTC(nvrtc.nvrtcGetPTXSize(prog)) 124 | ptx = b" " * ptxSize 125 | checkNVRTC(nvrtc.nvrtcGetPTX(prog, ptx)) 126 | return ptx 127 | 128 | #------------------------------------------------------------------------------- 129 | # 130 | # Optix setup 131 | # 132 | #------------------------------------------------------------------------------- 133 | 134 | def init_optix(): 135 | print( "Initializing cuda ..." ) 136 | cp.cuda.runtime.free( 0 ) 137 | 138 | print( "Initializing optix ..." ) 139 | optix.init() 140 | 141 | 142 | def create_ctx(): 143 | print( "Creating optix device context ..." ) 144 | 145 | # Note that log callback data is no longer needed. We can 146 | # instead send a callable class instance as the log-function 147 | # which stores any data needed 148 | global logger 149 | logger = Logger() 150 | 151 | # OptiX param struct fields can be set with optional 152 | # keyword constructor arguments. 153 | ctx_options = optix.DeviceContextOptions( 154 | logCallbackFunction = logger, 155 | logCallbackLevel = 4 156 | ) 157 | 158 | # They can also be set and queried as properties on the struct 159 | if optix.version()[1] >= 2: 160 | ctx_options.validationMode = optix.DEVICE_CONTEXT_VALIDATION_MODE_ALL 161 | 162 | cu_ctx = 0 163 | return optix.deviceContextCreate( cu_ctx, ctx_options ) 164 | 165 | 166 | def create_accel( ctx ): 167 | 168 | accel_options = optix.AccelBuildOptions( 169 | buildFlags = int( optix.BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS), 170 | operation = optix.BUILD_OPERATION_BUILD 171 | ) 172 | 173 | global vertices 174 | vertices = cp.array( [ 175 | -0.5, -0.5, 0.0, 176 | 0.5, -0.5, 0.0, 177 | 0.0, 0.5, 0.0 178 | ], dtype = 'f4') 179 | 180 | triangle_input_flags = [ optix.GEOMETRY_FLAG_NONE ] 181 | triangle_input = optix.BuildInputTriangleArray() 182 | triangle_input.vertexFormat = optix.VERTEX_FORMAT_FLOAT3 183 | triangle_input.numVertices = len( vertices )//3 184 | triangle_input.vertexBuffers = [ vertices.data.ptr ] 185 | triangle_input.flags = triangle_input_flags 186 | triangle_input.numSbtRecords = 1; 187 | 188 | gas_buffer_sizes = ctx.accelComputeMemoryUsage( [accel_options], [triangle_input] ) 189 | 190 | d_temp_buffer_gas = cp.cuda.alloc( gas_buffer_sizes.tempSizeInBytes ) 191 | d_gas_output_buffer = cp.cuda.alloc( gas_buffer_sizes.outputSizeInBytes) 192 | 193 | gas_handle = ctx.accelBuild( 194 | 0, # CUDA stream 195 | [ accel_options ], 196 | [ triangle_input ], 197 | d_temp_buffer_gas.ptr, 198 | gas_buffer_sizes.tempSizeInBytes, 199 | d_gas_output_buffer.ptr, 200 | gas_buffer_sizes.outputSizeInBytes, 201 | [] # emitted properties 202 | ) 203 | 204 | return (gas_handle, d_gas_output_buffer) 205 | 206 | 207 | def set_pipeline_options(): 208 | if optix.version()[1] >= 2: 209 | return optix.PipelineCompileOptions( 210 | usesMotionBlur = False, 211 | traversableGraphFlags = int( optix.TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_GAS ), 212 | numPayloadValues = 3, 213 | numAttributeValues = 3, 214 | exceptionFlags = int( optix.EXCEPTION_FLAG_NONE ), 215 | pipelineLaunchParamsVariableName = "params", 216 | usesPrimitiveTypeFlags = optix.PRIMITIVE_TYPE_FLAGS_TRIANGLE 217 | ) 218 | else: 219 | return optix.PipelineCompileOptions( 220 | usesMotionBlur = False, 221 | traversableGraphFlags = int( optix.TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_GAS ), 222 | numPayloadValues = 3, 223 | numAttributeValues = 3, 224 | exceptionFlags = int( optix.EXCEPTION_FLAG_NONE ), 225 | pipelineLaunchParamsVariableName = "params" 226 | ) 227 | 228 | def create_module( ctx, pipeline_options, triangle_ptx ): 229 | print( "Creating optix module ..." ) 230 | 231 | 232 | module_options = optix.ModuleCompileOptions( 233 | maxRegisterCount = optix.COMPILE_DEFAULT_MAX_REGISTER_COUNT, 234 | optLevel = optix.COMPILE_OPTIMIZATION_DEFAULT, 235 | debugLevel = optix.COMPILE_DEBUG_LEVEL_DEFAULT 236 | ) 237 | 238 | module, log = ctx.moduleCreate( 239 | module_options, 240 | pipeline_options, 241 | triangle_ptx 242 | ) 243 | print( "\tModule create log: <<<{}>>>".format( log ) ) 244 | return module 245 | 246 | 247 | def create_program_groups( ctx, module ): 248 | print( "Creating program groups ... " ) 249 | 250 | raygen_prog_group_desc = optix.ProgramGroupDesc() 251 | raygen_prog_group_desc.raygenModule = module 252 | raygen_prog_group_desc.raygenEntryFunctionName = "__raygen__rg" 253 | raygen_prog_group, log = ctx.programGroupCreate( 254 | [ raygen_prog_group_desc ] 255 | ) 256 | print( "\tProgramGroup raygen create log: <<<{}>>>".format( log ) ) 257 | 258 | miss_prog_group_desc = optix.ProgramGroupDesc() 259 | miss_prog_group_desc.missModule = module 260 | miss_prog_group_desc.missEntryFunctionName = "__miss__ms" 261 | program_group_options = optix.ProgramGroupOptions() 262 | miss_prog_group, log = ctx.programGroupCreate( 263 | [ miss_prog_group_desc ] 264 | ) 265 | print( "\tProgramGroup miss create log: <<<{}>>>".format( log ) ) 266 | 267 | hitgroup_prog_group_desc = optix.ProgramGroupDesc() 268 | hitgroup_prog_group_desc.hitgroupModuleCH = module 269 | hitgroup_prog_group_desc.hitgroupEntryFunctionNameCH = "__closesthit__ch" 270 | hitgroup_prog_group, log = ctx.programGroupCreate( 271 | [ hitgroup_prog_group_desc ] 272 | ) 273 | print( "\tProgramGroup hitgroup create log: <<<{}>>>".format( log ) ) 274 | 275 | return [ raygen_prog_group[0], miss_prog_group[0], hitgroup_prog_group[0] ] 276 | 277 | 278 | def create_pipeline( ctx, program_groups, pipeline_compile_options ): 279 | print( "Creating pipeline ... " ) 280 | 281 | max_trace_depth = 1 282 | pipeline_link_options = optix.PipelineLinkOptions() 283 | pipeline_link_options.maxTraceDepth = max_trace_depth 284 | 285 | log = "" 286 | pipeline = ctx.pipelineCreate( 287 | pipeline_compile_options, 288 | pipeline_link_options, 289 | program_groups, 290 | log) 291 | 292 | stack_sizes = optix.StackSizes() 293 | for prog_group in program_groups: 294 | if optix_version_gte( (7,7) ): 295 | optix.util.accumulateStackSizes( prog_group, stack_sizes, pipeline ) 296 | else: 297 | optix.util.accumulateStackSizes( prog_group, stack_sizes ) 298 | 299 | (dc_stack_size_from_trav, dc_stack_size_from_state, cc_stack_size) = \ 300 | optix.util.computeStackSizes( 301 | stack_sizes, 302 | max_trace_depth, 303 | 0, # maxCCDepth 304 | 0 # maxDCDepth 305 | ) 306 | 307 | pipeline.setStackSize( 308 | dc_stack_size_from_trav, 309 | dc_stack_size_from_state, 310 | cc_stack_size, 311 | 1 # maxTraversableDepth 312 | ) 313 | 314 | return pipeline 315 | 316 | 317 | def create_sbt( prog_groups ): 318 | print( "Creating sbt ... " ) 319 | 320 | (raygen_prog_group, miss_prog_group, hitgroup_prog_group ) = prog_groups 321 | 322 | global d_raygen_sbt 323 | global d_miss_sbt 324 | 325 | header_format = '{}B'.format( optix.SBT_RECORD_HEADER_SIZE ) 326 | 327 | # 328 | # raygen record 329 | # 330 | formats = [ header_format ] 331 | itemsize = get_aligned_itemsize( formats, optix.SBT_RECORD_ALIGNMENT ) 332 | dtype = np.dtype( { 333 | 'names' : ['header' ], 334 | 'formats' : formats, 335 | 'itemsize': itemsize, 336 | 'align' : True 337 | } ) 338 | h_raygen_sbt = np.array( [ 0 ], dtype=dtype ) 339 | optix.sbtRecordPackHeader( raygen_prog_group, h_raygen_sbt ) 340 | global d_raygen_sbt 341 | d_raygen_sbt = array_to_device_memory( h_raygen_sbt ) 342 | 343 | # 344 | # miss record 345 | # 346 | formats = [ header_format, 'f4', 'f4', 'f4'] 347 | itemsize = get_aligned_itemsize( formats, optix.SBT_RECORD_ALIGNMENT ) 348 | dtype = np.dtype( { 349 | 'names' : ['header', 'r', 'g', 'b' ], 350 | 'formats' : formats, 351 | 'itemsize': itemsize, 352 | 'align' : True 353 | } ) 354 | h_miss_sbt = np.array( [ (0, 0.3, 0.1, 0.2) ], dtype=dtype ) 355 | optix.sbtRecordPackHeader( miss_prog_group, h_miss_sbt ) 356 | global d_miss_sbt 357 | d_miss_sbt = array_to_device_memory( h_miss_sbt ) 358 | 359 | # 360 | # hitgroup record 361 | # 362 | formats = [ header_format ] 363 | itemsize = get_aligned_itemsize( formats, optix.SBT_RECORD_ALIGNMENT ) 364 | dtype = np.dtype( { 365 | 'names' : ['header' ], 366 | 'formats' : formats, 367 | 'itemsize': itemsize, 368 | 'align' : True 369 | } ) 370 | h_hitgroup_sbt = np.array( [ (0) ], dtype=dtype ) 371 | optix.sbtRecordPackHeader( hitgroup_prog_group, h_hitgroup_sbt ) 372 | global d_hitgroup_sbt 373 | d_hitgroup_sbt = array_to_device_memory( h_hitgroup_sbt ) 374 | 375 | return optix.ShaderBindingTable( 376 | raygenRecord = d_raygen_sbt.ptr, 377 | missRecordBase = d_miss_sbt.ptr, 378 | missRecordStrideInBytes = h_miss_sbt.dtype.itemsize, 379 | missRecordCount = 1, 380 | hitgroupRecordBase = d_hitgroup_sbt.ptr, 381 | hitgroupRecordStrideInBytes = h_hitgroup_sbt.dtype.itemsize, 382 | hitgroupRecordCount = 1 383 | ) 384 | 385 | 386 | def launch( pipeline, sbt, trav_handle ): 387 | print( "Launching ... " ) 388 | 389 | pix_bytes = pix_width*pix_height*4 390 | 391 | h_pix = np.zeros( (pix_width,pix_height,4), 'B' ) 392 | h_pix[0:pix_width, 0:pix_height] = [255, 128, 0, 255] 393 | d_pix = cp.array( h_pix ) 394 | 395 | 396 | params = [ 397 | ( 'u8', 'image', d_pix.data.ptr ), 398 | ( 'u4', 'image_width', pix_width ), 399 | ( 'u4', 'image_height', pix_height ), 400 | ( 'f4', 'cam_eye_x', 0 ), 401 | ( 'f4', 'cam_eye_y', 0 ), 402 | ( 'f4', 'cam_eye_z', 2.0 ), 403 | ( 'f4', 'cam_U_x', 1.10457 ), 404 | ( 'f4', 'cam_U_y', 0 ), 405 | ( 'f4', 'cam_U_z', 0 ), 406 | ( 'f4', 'cam_V_x', 0 ), 407 | ( 'f4', 'cam_V_y', 0.828427 ), 408 | ( 'f4', 'cam_V_z', 0 ), 409 | ( 'f4', 'cam_W_x', 0 ), 410 | ( 'f4', 'cam_W_y', 0 ), 411 | ( 'f4', 'cam_W_z', -2.0 ), 412 | ( 'u8', 'trav_handle', trav_handle ) 413 | ] 414 | 415 | formats = [ x[0] for x in params ] 416 | names = [ x[1] for x in params ] 417 | values = [ x[2] for x in params ] 418 | itemsize = get_aligned_itemsize( formats, 8 ) 419 | params_dtype = np.dtype( { 420 | 'names' : names, 421 | 'formats' : formats, 422 | 'itemsize': itemsize, 423 | 'align' : True 424 | } ) 425 | h_params = np.array( [ tuple(values) ], dtype=params_dtype ) 426 | d_params = array_to_device_memory( h_params ) 427 | 428 | stream = cp.cuda.Stream() 429 | optix.launch( 430 | pipeline, 431 | stream.ptr, 432 | d_params.ptr, 433 | h_params.dtype.itemsize, 434 | sbt, 435 | pix_width, 436 | pix_height, 437 | 1 # depth 438 | ) 439 | 440 | stream.synchronize() 441 | 442 | h_pix = cp.asnumpy( d_pix ) 443 | return h_pix 444 | 445 | 446 | #------------------------------------------------------------------------------- 447 | # 448 | # main 449 | # 450 | #------------------------------------------------------------------------------- 451 | 452 | 453 | def main(): 454 | triangle_cu = os.path.join(os.path.dirname(__file__), 'triangle.cu') 455 | triangle_ptx = compile_cuda( triangle_cu ) 456 | 457 | ctx = create_ctx() 458 | gas_handle, d_gas_output_buffer = create_accel(ctx) 459 | pipeline_options = set_pipeline_options() 460 | module = create_module( ctx, pipeline_options, triangle_ptx ) 461 | prog_groups = create_program_groups( ctx, module ) 462 | pipeline = create_pipeline( ctx, prog_groups, pipeline_options ) 463 | sbt = create_sbt( prog_groups ) 464 | pix = launch( pipeline, sbt, gas_handle ) 465 | 466 | print( "Total number of log messages: {}".format( logger.num_mssgs ) ) 467 | 468 | pix = pix.reshape( ( pix_height, pix_width, 4 ) ) # PIL expects [ y, x ] resolution 469 | img = ImageOps.flip( Image.fromarray( pix, 'RGBA' ) ) # PIL expects y = 0 at bottom 470 | img.show() 471 | img.save( 'my.png' ) 472 | 473 | 474 | if __name__ == "__main__": 475 | main() 476 | -------------------------------------------------------------------------------- /examples/sphere.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # 4 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Redistribution and use in source and binary forms, with or without 7 | # modification, are permitted provided that the following conditions 8 | # are met: 9 | # * Redistributions of source code must retain the above copyright 10 | # notice, this list of conditions and the following disclaimer. 11 | # * Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # * Neither the name of NVIDIA CORPORATION nor the names of its 15 | # contributors may be used to endorse or promote products derived 16 | # from this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 19 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 22 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 23 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 24 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 25 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 26 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | 31 | import optix 32 | import os 33 | import cupy as cp # CUDA bindings 34 | import numpy as np # Packing of structures in C-compatible format 35 | 36 | import array 37 | import ctypes # C interop helpers 38 | from PIL import Image, ImageOps # Image IO 39 | from cuda.bindings import nvrtc 40 | 41 | import path_util 42 | 43 | 44 | #------------------------------------------------------------------------------- 45 | # 46 | # Util 47 | # 48 | #------------------------------------------------------------------------------- 49 | pix_width = 1024 50 | pix_height = 768 51 | 52 | def checkNVRTC(result, prog = None): 53 | if result[0].value: 54 | if prog: 55 | (res, logsize) = nvrtc.nvrtcGetProgramLogSize(prog) 56 | if not res.value: 57 | log = b" " * logsize 58 | nvrtc.nvrtcGetProgramLog(prog, log) 59 | print(log.decode()) 60 | raise RuntimeError("NVRTC error code={}({})".format(result[0].value, nvrtc.nvrtcGetErrorString(result[0])[1])) 61 | if len(result) == 1: 62 | return None 63 | elif len(result) == 2: 64 | return result[1] 65 | else: 66 | return result[1:] 67 | 68 | 69 | class Logger: 70 | def __init__( self ): 71 | self.num_mssgs = 0 72 | 73 | def __call__( self, level, tag, mssg ): 74 | print( "[{:>2}][{:>12}]: {}".format( level, tag, mssg ) ) 75 | self.num_mssgs += 1 76 | 77 | 78 | def log_callback( level, tag, mssg ): 79 | print( "[{:>2}][{:>12}]: {}".format( level, tag, mssg ) ) 80 | 81 | 82 | def round_up( val, mult_of ): 83 | return val if val % mult_of == 0 else val + mult_of - val % mult_of 84 | 85 | 86 | def get_aligned_itemsize( formats, alignment ): 87 | names = [] 88 | for i in range( len(formats ) ): 89 | names.append( 'x'+str(i) ) 90 | 91 | temp_dtype = np.dtype( { 92 | 'names' : names, 93 | 'formats' : formats, 94 | 'align' : True 95 | } ) 96 | return round_up( temp_dtype.itemsize, alignment ) 97 | 98 | 99 | def optix_version_gte( version ): 100 | if optix.version()[0] > version[0]: 101 | return True 102 | if optix.version()[0] == version[0] and optix.version()[1] >= version[1]: 103 | return True 104 | return False 105 | 106 | 107 | def array_to_device_memory( numpy_array, stream=cp.cuda.Stream() ): 108 | 109 | byte_size = numpy_array.size*numpy_array.dtype.itemsize 110 | 111 | h_ptr = ctypes.c_void_p( numpy_array.ctypes.data ) 112 | d_mem = cp.cuda.memory.alloc( byte_size ) 113 | d_mem.copy_from_async( h_ptr, byte_size, stream ) 114 | return d_mem 115 | 116 | 117 | def compile_cuda( cuda_file ): 118 | compile_options = [ 119 | b'-use_fast_math', 120 | b'-lineinfo', 121 | b'-default-device', 122 | b'-std=c++11', 123 | b'-rdc', 124 | b'true', 125 | f'-I{path_util.include_path}'.encode(), 126 | f'-I{path_util.cuda_tk_path}'.encode() 127 | ] 128 | # Optix 7.0 compiles need path to system stddef.h 129 | # the value of optix.stddef_path is compiled in constant. When building 130 | # the module, the value can be specified via an environment variable, e.g. 131 | # export PYOPTIX_STDDEF_DIR="/usr/include/linux" 132 | if not optix_version_gte( (7,1) ): 133 | compile_options.append( f'-I{path_util.stddef_path}' ) 134 | print("pynvrtc compile options = {}".format(compile_options)) 135 | 136 | with open( cuda_file, 'rb' ) as f: 137 | src = f.read() 138 | 139 | # Create program 140 | prog = checkNVRTC(nvrtc.nvrtcCreateProgram(src, cuda_file.encode(), 0, [], [])) 141 | 142 | # Compile program 143 | checkNVRTC(nvrtc.nvrtcCompileProgram(prog, len(compile_options), compile_options), prog) 144 | 145 | # Get PTX from compilation 146 | ptxSize = checkNVRTC(nvrtc.nvrtcGetPTXSize(prog)) 147 | ptx = b" " * ptxSize 148 | checkNVRTC(nvrtc.nvrtcGetPTX(prog, ptx)) 149 | return ptx 150 | 151 | 152 | #------------------------------------------------------------------------------- 153 | # 154 | # Optix setup 155 | # 156 | #------------------------------------------------------------------------------- 157 | 158 | 159 | def create_ctx(): 160 | print( "Creating optix device context ..." ) 161 | 162 | # Note that log callback data is no longer needed. We can 163 | # instead send a callable class instance as the log-function 164 | # which stores any data needed 165 | global logger 166 | logger = Logger() 167 | 168 | # OptiX param struct fields can be set with optional 169 | # keyword constructor arguments. 170 | ctx_options = optix.DeviceContextOptions( 171 | logCallbackFunction = logger, 172 | logCallbackLevel = 4 173 | ) 174 | 175 | # They can also be set and queried as properties on the struct 176 | if optix.version()[1] >= 2: 177 | ctx_options.validationMode = optix.DEVICE_CONTEXT_VALIDATION_MODE_ALL 178 | 179 | cu_ctx = 0 180 | return optix.deviceContextCreate( cu_ctx, ctx_options ) 181 | 182 | 183 | def create_accel( ctx ): 184 | 185 | accel_options = optix.AccelBuildOptions( 186 | buildFlags = int( optix.BUILD_FLAG_ALLOW_COMPACTION ), 187 | operation = optix.BUILD_OPERATION_BUILD 188 | ) 189 | 190 | aabb = cp.array( [ 191 | -1.5, -1.5, -1.5, 192 | 1.5, 1.5, 1.5 ], 193 | dtype = 'f4' ) 194 | 195 | aabb_input_flags = [ optix.GEOMETRY_FLAG_NONE ] 196 | aabb_input = optix.BuildInputCustomPrimitiveArray( 197 | aabbBuffers = [ aabb.data.ptr ], 198 | numPrimitives = 1, 199 | flags = aabb_input_flags, 200 | numSbtRecords = 1, 201 | ) 202 | 203 | gas_buffer_sizes = ctx.accelComputeMemoryUsage( [accel_options], [aabb_input] ) 204 | 205 | d_temp_buffer_gas = cp.cuda.alloc( gas_buffer_sizes.tempSizeInBytes ) 206 | d_gas_output_buffer = cp.cuda.alloc( gas_buffer_sizes.outputSizeInBytes ) 207 | 208 | gas_handle = ctx.accelBuild( 209 | 0, # CUDA stream 210 | [ accel_options ], 211 | [ aabb_input ], 212 | d_temp_buffer_gas.ptr, 213 | gas_buffer_sizes.tempSizeInBytes, 214 | d_gas_output_buffer.ptr, 215 | gas_buffer_sizes.outputSizeInBytes, 216 | [] # emitted properties 217 | ) 218 | 219 | return ( gas_handle, d_gas_output_buffer ) 220 | 221 | 222 | def set_pipeline_options(): 223 | if optix.version()[1] >= 2: 224 | return optix.PipelineCompileOptions( 225 | usesMotionBlur = False, 226 | traversableGraphFlags = int( optix.TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_GAS ), 227 | numPayloadValues = 3, 228 | numAttributeValues = 4, 229 | exceptionFlags = int( optix.EXCEPTION_FLAG_NONE ), 230 | pipelineLaunchParamsVariableName = "params", 231 | usesPrimitiveTypeFlags = optix.PRIMITIVE_TYPE_FLAGS_CUSTOM 232 | ) 233 | else: 234 | return optix.PipelineCompileOptions( 235 | usesMotionBlur = False, 236 | traversableGraphFlags = int( optix.TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_GAS ), 237 | numPayloadValues = 3, 238 | numAttributeValues = 4, 239 | exceptionFlags = int( optix.EXCEPTION_FLAG_NONE ), 240 | pipelineLaunchParamsVariableName = "params", 241 | ) 242 | 243 | 244 | def create_module( ctx, pipeline_options, sphere_ptx ): 245 | print( "Creating OptiX module ..." ) 246 | 247 | module_options = optix.ModuleCompileOptions( 248 | maxRegisterCount = optix.COMPILE_DEFAULT_MAX_REGISTER_COUNT, 249 | optLevel = optix.COMPILE_OPTIMIZATION_DEFAULT, 250 | debugLevel = optix.COMPILE_DEBUG_LEVEL_DEFAULT 251 | ) 252 | 253 | module, log = ctx.moduleCreate( 254 | module_options, 255 | pipeline_options, 256 | sphere_ptx 257 | ) 258 | print( "\tModule create log: <<<{}>>>".format( log ) ) 259 | return module 260 | 261 | 262 | def create_program_groups( ctx, module ): 263 | print( "Creating program groups ... " ) 264 | 265 | raygen_program_desc = optix.ProgramGroupDesc() 266 | raygen_program_desc.raygenModule = module 267 | raygen_program_desc.raygenEntryFunctionName = "__raygen__rg" 268 | raygen_prog_groups, log = ctx.programGroupCreate( 269 | [ raygen_program_desc ] 270 | ) 271 | print( "\tProgramGroup raygen create log: <<<{}>>>".format( log ) ) 272 | 273 | miss_prog_group_desc = optix.ProgramGroupDesc() 274 | miss_prog_group_desc.missModule = module 275 | miss_prog_group_desc.missEntryFunctionName = "__miss__ms" 276 | miss_prog_groups, log = ctx.programGroupCreate( 277 | [ miss_prog_group_desc ] 278 | ) 279 | print( "\tProgramGroup mis create log: <<<{}>>>".format( log ) ) 280 | 281 | hitgroup_prog_group_desc = optix.ProgramGroupDesc() 282 | hitgroup_prog_group_desc.hitgroupModuleCH = module 283 | hitgroup_prog_group_desc.hitgroupEntryFunctionNameCH = "__closesthit__ch" 284 | hitgroup_prog_group_desc.hitgroupModuleIS = module 285 | hitgroup_prog_group_desc.hitgroupEntryFunctionNameIS = "__intersection__sphere" 286 | hitgroup_prog_groups, log = ctx.programGroupCreate( 287 | [ hitgroup_prog_group_desc ] 288 | ) 289 | print( "\tProgramGroup hitgroup create log: <<<{}>>>".format( log ) ) 290 | 291 | return [ raygen_prog_groups[0], miss_prog_groups[0], hitgroup_prog_groups[0] ] 292 | 293 | 294 | def create_pipeline( ctx, program_groups, pipeline_compile_options ): 295 | print( "Creating pipeline ... " ) 296 | 297 | max_trace_depth = 1 298 | pipeline_link_options = optix.PipelineLinkOptions() 299 | pipeline_link_options.maxTraceDepth = max_trace_depth 300 | 301 | log = "" 302 | pipeline = ctx.pipelineCreate( 303 | pipeline_compile_options, 304 | pipeline_link_options, 305 | program_groups, 306 | log) 307 | 308 | stack_sizes = optix.StackSizes() 309 | for prog_group in program_groups: 310 | if optix_version_gte( (7,7) ): 311 | optix.util.accumulateStackSizes( prog_group, stack_sizes, pipeline ) 312 | else: 313 | optix.util.accumulateStackSizes( prog_group, stack_sizes ) 314 | 315 | ( dc_stack_size_from_trav, dc_stack_size_from_state, cc_stack_size ) = \ 316 | optix.util.computeStackSizes( 317 | stack_sizes, 318 | max_trace_depth, 319 | 0, # maxCCDepth 320 | 0 # maxDCDepth 321 | ) 322 | 323 | pipeline.setStackSize( 324 | dc_stack_size_from_trav, 325 | dc_stack_size_from_state, 326 | cc_stack_size, 327 | 1 # maxTraversableDepth 328 | ) 329 | 330 | return pipeline 331 | 332 | 333 | def create_sbt( prog_groups ): 334 | print( "Creating sbt ... " ) 335 | 336 | (raygen_prog_group, miss_prog_group, hitgroup_prog_group ) = prog_groups 337 | 338 | header_format = '{}B'.format( optix.SBT_RECORD_HEADER_SIZE ) 339 | 340 | # 341 | # raygen record 342 | # 343 | formats = [ header_format, 'f4','f4','f4', 344 | 'f4','f4','f4', 345 | 'f4','f4','f4', 346 | 'f4','f4','f4' ] 347 | itemsize = get_aligned_itemsize( formats, optix.SBT_RECORD_ALIGNMENT ) 348 | dtype = np.dtype( { 349 | 'names' : ['header', 'eye_x','eye_y','eye_z', 350 | 'u_x', 'u_y', 'u_z', 351 | 'v_x', 'v_y', 'v_z', 352 | 'w_x', 'w_y', 'w_z' 353 | ], 354 | 'formats' : formats, 355 | 'itemsize' : itemsize, 356 | 'align' : True 357 | }) 358 | h_raygen_sbt = np.array( [ ( 0, 0.0, 0.0, 3.0, 359 | 2.31, -0.0, 0.0, 360 | 0.0, 1.73, 0.0, 361 | 0.0, 0.0, -3.0 362 | ) ], dtype = dtype ) 363 | optix.sbtRecordPackHeader( raygen_prog_group, h_raygen_sbt ) 364 | global d_raygen_sbt 365 | d_raygen_sbt = array_to_device_memory( h_raygen_sbt ) 366 | 367 | # 368 | # miss record 369 | # 370 | formats = [ header_format, 'f4', 'f4', 'f4' ] 371 | itemsize = get_aligned_itemsize( formats, optix.SBT_RECORD_ALIGNMENT ) 372 | dtype = np.dtype( { 373 | 'names' : ['header', 'r', 'g', 'b' ], 374 | 'formats' : formats, 375 | 'itemsize' : itemsize, 376 | 'align' : True 377 | }) 378 | h_miss_sbt = np.array( [ (0, 0.3, 0.1, 0.2) ], dtype = dtype ) 379 | optix.sbtRecordPackHeader( miss_prog_group, h_miss_sbt ) 380 | global d_miss_sbt 381 | d_miss_sbt = array_to_device_memory( h_miss_sbt ) 382 | 383 | # 384 | # hitgroup record 385 | # 386 | formats = [ header_format, 'f4', 'f4', 'f4', 'f4' ] 387 | itemsize = get_aligned_itemsize( formats, optix.SBT_RECORD_ALIGNMENT ) 388 | dtype = np.dtype( { 389 | 'names' : ['header', 'center_x', 'center_y', 'center_z', 'radius' ], 390 | 'formats' : formats, 391 | 'itemsize' : itemsize, 392 | 'align' : True 393 | } ) 394 | h_hitgroup_sbt = np.array( [ ( 0, 0.0, 0.0, 0.0, 1.5) ], dtype=dtype ) 395 | optix.sbtRecordPackHeader( hitgroup_prog_group, h_hitgroup_sbt ) 396 | global d_hitgroup_sbt 397 | d_hitgroup_sbt = array_to_device_memory( h_hitgroup_sbt ) 398 | 399 | return optix.ShaderBindingTable( 400 | raygenRecord = d_raygen_sbt.ptr, 401 | missRecordBase = d_miss_sbt.ptr, 402 | missRecordStrideInBytes = h_miss_sbt.dtype.itemsize, 403 | missRecordCount = 1, 404 | hitgroupRecordBase = d_hitgroup_sbt.ptr, 405 | hitgroupRecordStrideInBytes = h_hitgroup_sbt.dtype.itemsize, 406 | hitgroupRecordCount = 1 407 | ) 408 | 409 | 410 | def launch( pipeline, sbt, trav_handle ): 411 | print( "Launching ... " ) 412 | 413 | pix_bytes = pix_width*pix_height*4 414 | 415 | h_pix = np.zeros( (pix_width, pix_height, 4 ), 'B' ) 416 | h_pix[0:pix_width, 0:pix_height] = [255, 128, 0, 255] 417 | d_pix = cp.array( h_pix ) 418 | 419 | params = [ 420 | ( 'u8', 'image', d_pix.data.ptr ), 421 | ( 'u4', 'image_width', pix_width ), 422 | ( 'u4', 'image_height', pix_height ), 423 | ( 'u4', 'origin_x', pix_width / 2 ), 424 | ( 'u4', 'origin_y', pix_height / 2 ), 425 | ( 'u8', 'trav_handle', trav_handle ) 426 | ] 427 | 428 | formats = [ x[0] for x in params ] 429 | names = [ x[1] for x in params ] 430 | values = [ x[2] for x in params ] 431 | itemsize = get_aligned_itemsize( formats, 8 ) 432 | params_dtype = np.dtype( { 433 | 'names' : names, 434 | 'formats' : formats, 435 | 'itemsize': itemsize, 436 | 'align' : True 437 | } ) 438 | h_params = np.array( [ tuple(values) ], dtype=params_dtype ) 439 | d_params = array_to_device_memory( h_params ) 440 | 441 | stream = cp.cuda.Stream() 442 | optix.launch( 443 | pipeline, 444 | stream.ptr, 445 | d_params.ptr, 446 | h_params.dtype.itemsize, 447 | sbt, 448 | pix_width, 449 | pix_height, 450 | 1 # depth 451 | ) 452 | 453 | stream.synchronize() 454 | 455 | h_pix = cp.asnumpy( d_pix ) 456 | return h_pix 457 | 458 | 459 | #------------------------------------------------------------------------------- 460 | # 461 | # main 462 | # 463 | #------------------------------------------------------------------------------- 464 | 465 | 466 | def main(): 467 | sphere_cu = os.path.join(os.path.dirname(__file__), 'sphere.cu') 468 | sphere_ptx = compile_cuda( sphere_cu ) 469 | 470 | ctx = create_ctx() 471 | gas_handle, d_gas_output_buffer = create_accel(ctx) 472 | pipeline_options = set_pipeline_options() 473 | module = create_module( ctx, pipeline_options, sphere_ptx ) 474 | prog_groups = create_program_groups( ctx, module ) 475 | pipeline = create_pipeline( ctx, prog_groups, pipeline_options ) 476 | sbt = create_sbt( prog_groups ) 477 | pix = launch( pipeline, sbt, gas_handle ) 478 | 479 | print( "Total number of log messages: {}".format( logger.num_mssgs ) ) 480 | 481 | pix = pix.reshape( ( pix_height, pix_width, 4 ) ) # PIL expects [ y, x ] resolution 482 | img = ImageOps.flip( Image.fromarray(pix , 'RGBA' ) ) # PIL expects y = 0 at bottom 483 | img.show() 484 | img.save( 'my.png' ) 485 | 486 | if __name__ == "__main__": 487 | main() 488 | -------------------------------------------------------------------------------- /examples/curves.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # 4 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Redistribution and use in source and binary forms, with or without 7 | # modification, are permitted provided that the following conditions 8 | # are met: 9 | # * Redistributions of source code must retain the above copyright 10 | # notice, this list of conditions and the following disclaimer. 11 | # * Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # * Neither the name of NVIDIA CORPORATION nor the names of its 15 | # contributors may be used to endorse or promote products derived 16 | # from this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 19 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 22 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 23 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 24 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 25 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 26 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | 31 | 32 | from signal import default_int_handler 33 | from telnetlib import DEBUGLEVEL 34 | import optix 35 | import os 36 | import cupy as cp # CUDA bindings 37 | import numpy as np # Packing of structures in C-compatible format 38 | 39 | import array 40 | import ctypes # C interop helpers 41 | from PIL import Image, ImageOps # Image IO 42 | from cuda.bindings import nvrtc 43 | 44 | import path_util 45 | 46 | 47 | #------------------------------------------------------------------------------- 48 | # 49 | # Util 50 | # 51 | #------------------------------------------------------------------------------- 52 | 53 | def checkNVRTC(result, prog = None): 54 | if result[0].value: 55 | if prog: 56 | (res, logsize) = nvrtc.nvrtcGetProgramLogSize(prog) 57 | if not res.value: 58 | log = b" " * logsize 59 | nvrtc.nvrtcGetProgramLog(prog, log) 60 | print(log.decode()) 61 | raise RuntimeError("NVRTC error code={}({})".format(result[0].value, nvrtc.nvrtcGetErrorString(result[0])[1])) 62 | if len(result) == 1: 63 | return None 64 | elif len(result) == 2: 65 | return result[1] 66 | else: 67 | return result[1:] 68 | 69 | class Logger: 70 | def __init__( self ): 71 | self.num_mssgs = 0 72 | 73 | def __call__( self, level, tag, mssg ): 74 | print( "[{:>2}][{:>12}]: {}".format( level, tag, mssg ) ) 75 | self.num_mssgs += 1 76 | 77 | 78 | def log_callback( level, tag, mssg ): 79 | print( "[{:>2}][{:>12}]: {}".format( level, tag, mssg ) ) 80 | 81 | 82 | def round_up( val, mult_of ): 83 | return val if val % mult_of == 0 else val + mult_of - val % mult_of 84 | 85 | 86 | def get_aligned_itemsize( formats, alignment ): 87 | names = [] 88 | for i in range( len(formats ) ): 89 | names.append( 'x'+str(i) ) 90 | 91 | temp_dtype = np.dtype( { 92 | 'names' : names, 93 | 'formats' : formats, 94 | 'align' : True 95 | } ) 96 | return round_up( temp_dtype.itemsize, alignment ) 97 | 98 | 99 | def array_to_device_memory( numpy_array, stream=cp.cuda.Stream() ): 100 | 101 | byte_size = numpy_array.size*numpy_array.dtype.itemsize 102 | 103 | h_ptr = ctypes.c_void_p( numpy_array.ctypes.data ) 104 | d_mem = cp.cuda.memory.alloc( byte_size ) 105 | d_mem.copy_from_async( h_ptr, byte_size, stream ) 106 | return d_mem 107 | 108 | 109 | def optix_version_gte( version ): 110 | if optix.version()[0] > version[0]: 111 | return True 112 | if optix.version()[0] == version[0] and optix.version()[1] >= version[1]: 113 | return True 114 | return False 115 | 116 | 117 | def compile_cuda( cuda_file ): 118 | 119 | compile_options = [ 120 | b'-use_fast_math', 121 | b'-lineinfo', 122 | b'-default-device', 123 | b'-std=c++11', 124 | b'-rdc', 125 | b'true', 126 | f'-I{path_util.include_path}'.encode(), 127 | f'-I{path_util.cuda_tk_path}'.encode() 128 | ] 129 | # Optix 7.0 compiles need path to system stddef.h 130 | # the value of optix.stddef_path is compiled in constant. When building 131 | # the module, the value can be specified via an environment variable, e.g. 132 | # export PYOPTIX_STDDEF_DIR="/usr/include/linux" 133 | if not optix_version_gte( (7,1) ): 134 | compile_options.append( f'-I{path_util.stddef_path}' ) 135 | print("pynvrtc compile options = {}".format(compile_options)) 136 | 137 | with open( cuda_file, 'rb' ) as f: 138 | src = f.read() 139 | 140 | # Create program 141 | prog = checkNVRTC(nvrtc.nvrtcCreateProgram(src, cuda_file.encode(), 0, [], [])) 142 | 143 | # Compile program 144 | checkNVRTC(nvrtc.nvrtcCompileProgram(prog, len(compile_options), compile_options), prog) 145 | 146 | # Get PTX from compilation 147 | ptxSize = checkNVRTC(nvrtc.nvrtcGetPTXSize(prog)) 148 | ptx = b" " * ptxSize 149 | checkNVRTC(nvrtc.nvrtcGetPTX(prog, ptx)) 150 | return ptx 151 | 152 | 153 | #------------------------------------------------------------------------------- 154 | # 155 | # Optix setup 156 | # 157 | #------------------------------------------------------------------------------- 158 | 159 | 160 | width = 1024 161 | height = 768 162 | 163 | 164 | def create_ctx(): 165 | print( "Creating optix device context ..." ) 166 | 167 | # Note that log callback data is no longer needed. We can 168 | # instead send a callable class instance as the log-function 169 | # which stores any data needed 170 | global logger 171 | logger = Logger() 172 | 173 | # OptiX param struct fields can be set with optional 174 | # keyword constructor arguments. 175 | ctx_options = optix.DeviceContextOptions( 176 | logCallbackFunction = logger, 177 | logCallbackLevel = 4 178 | ) 179 | 180 | # They can also be set and queried as properties on the struct 181 | if optix.version()[1] >= 2: 182 | ctx_options.validationMode = optix.DEVICE_CONTEXT_VALIDATION_MODE_ALL 183 | 184 | cu_ctx = 0 185 | return optix.deviceContextCreate( cu_ctx, ctx_options ) 186 | device_context = create_ctx() 187 | 188 | 189 | def create_accel(): 190 | accel_options = optix.AccelBuildOptions( 191 | buildFlags = int( optix.BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS), 192 | operation = optix.BUILD_OPERATION_BUILD 193 | ) 194 | 195 | radius = 0.4 196 | 197 | global vertices 198 | vertices = cp.array( [ 199 | -1.5, -3.5, 0.0, 200 | -1.0, 0.5, 0.0, 201 | 1.0, 0.5, 0.0, 202 | 1.5, -3.5, 0.0 203 | ], dtype = 'f4' ) 204 | 205 | global widths 206 | widths = cp.array( [ 207 | 0.01, radius, radius, 0.01 208 | ], dtype = 'f4' ) 209 | 210 | global segment_indices 211 | segment_indices = cp.array( [ 0 ], dtype = 'int' ) 212 | 213 | curve_input = optix.BuildInputCurveArray() 214 | 215 | curve_input.numPrimitives = 1 216 | curve_input.numVertices = len( vertices )//3 217 | curve_input.vertexBuffers = [ vertices.data.ptr ] 218 | curve_input.widthBuffers = [ widths.data.ptr ] 219 | curve_input.normalBuffers = [ 0 ] 220 | curve_input.indexBuffer = segment_indices.data.ptr 221 | curve_input.curveType = optix.PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE 222 | curve_input.flag = optix.GEOMETRY_FLAG_NONE 223 | curve_input.primitiveIndexOffset = 0 224 | 225 | gas_buffer_sizes = device_context.accelComputeMemoryUsage( [accel_options], [curve_input] ) 226 | 227 | d_temp_buffer_gas = cp.cuda.alloc( gas_buffer_sizes.tempSizeInBytes ) 228 | d_gas_output_buffer = cp.cuda.alloc( gas_buffer_sizes.outputSizeInBytes ) 229 | 230 | gas_handle = device_context.accelBuild( 231 | 0, # CUDA stream 232 | [ accel_options ], 233 | [ curve_input ], 234 | d_temp_buffer_gas.ptr, 235 | gas_buffer_sizes.tempSizeInBytes, 236 | d_gas_output_buffer.ptr, 237 | gas_buffer_sizes.outputSizeInBytes, 238 | [] # emitted properties 239 | ) 240 | 241 | return ( gas_handle, d_gas_output_buffer ) 242 | gas_handle, d_gas_output_buffer = create_accel() 243 | 244 | 245 | def set_pipeline_options(): 246 | return optix.PipelineCompileOptions( 247 | usesMotionBlur = False, 248 | traversableGraphFlags = int( optix.TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_GAS ), 249 | numPayloadValues = 3, 250 | numAttributeValues = 1, 251 | exceptionFlags = int( optix.EXCEPTION_FLAG_NONE ), 252 | pipelineLaunchParamsVariableName = "params", 253 | usesPrimitiveTypeFlags = int( optix.PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE ) 254 | ) 255 | pipeline_compile_options = set_pipeline_options() 256 | 257 | 258 | def create_module(): 259 | print( "Creating optix module ..." ) 260 | 261 | module_compile_options = optix.ModuleCompileOptions( 262 | maxRegisterCount = optix.COMPILE_DEFAULT_MAX_REGISTER_COUNT, 263 | optLevel = optix.COMPILE_OPTIMIZATION_DEFAULT, 264 | debugLevel = optix.COMPILE_DEBUG_LEVEL_DEFAULT 265 | ) 266 | 267 | intersector_options = optix.BuiltinISOptions( 268 | builtinISModuleType = optix.PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE, 269 | usesMotionBlur = False 270 | ) 271 | 272 | device_context.builtinISModuleGet( 273 | module_compile_options, 274 | pipeline_compile_options, 275 | intersector_options 276 | ) 277 | 278 | curves_cu = os.path.join(os.path.dirname(__file__), 'curves.cu' ) 279 | curves_ptx = compile_cuda( curves_cu ) 280 | 281 | shading_module, log = device_context.moduleCreate( 282 | module_compile_options, 283 | pipeline_compile_options, 284 | curves_ptx 285 | ) 286 | 287 | geometry_module = device_context.builtinISModuleGet( 288 | module_compile_options, 289 | pipeline_compile_options, 290 | intersector_options 291 | ) 292 | 293 | print( "\tModule create log: <<<{}>>>".format( log ) ) 294 | return geometry_module, shading_module 295 | geometry_module, shading_module = create_module() 296 | 297 | 298 | def create_program_groups(): 299 | print( "Creating program groups ... " ) 300 | 301 | raygen_prog_group_desc = optix.ProgramGroupDesc() 302 | raygen_prog_group_desc.raygenModule = shading_module 303 | raygen_prog_group_desc.raygenEntryFunctionName = "__raygen__rg" 304 | 305 | raygen_prog_groups, log = device_context.programGroupCreate( 306 | [ raygen_prog_group_desc ] 307 | ) 308 | print( "\tProgramGroup raygen create log: <<<{}>>>".format( log ) ) 309 | 310 | miss_prog_group_desc = optix.ProgramGroupDesc() 311 | miss_prog_group_desc.missModule = shading_module 312 | miss_prog_group_desc.missEntryFunctionName = "__miss__ms" 313 | miss_prog_groups, log = device_context.programGroupCreate( 314 | [ miss_prog_group_desc ] 315 | ) 316 | print( "\tProgramGroup miss create log: <<<{}>>>".format( log ) ) 317 | 318 | hitgroup_prog_group_desc = optix.ProgramGroupDesc() 319 | hitgroup_prog_group_desc.hitgroupModuleCH = shading_module 320 | hitgroup_prog_group_desc.hitgroupEntryFunctionNameCH = "__closesthit__ch" 321 | hitgroup_prog_group_desc.hitgroupModuleIS = geometry_module 322 | hitgroup_prog_group_desc.hitgroupEntryFunctionNameIS = "" # supplied by built-in module 323 | hitgroup_prog_groups, log = device_context.programGroupCreate( 324 | [ hitgroup_prog_group_desc ] 325 | ) 326 | print( "\tProgramGroup hitgroup create log: <<<{}>>>".format( log ) ) 327 | 328 | return [ raygen_prog_groups[0], miss_prog_groups[0], hitgroup_prog_groups[0] ] 329 | program_groups = create_program_groups() 330 | 331 | 332 | def create_pipeline(): 333 | print( "Creating pipeline ... " ) 334 | 335 | max_trace_depth = 1 336 | pipeline_link_options = optix.PipelineLinkOptions() 337 | pipeline_link_options.maxTraceDepth = max_trace_depth 338 | 339 | log = "" 340 | pipeline = device_context.pipelineCreate( 341 | pipeline_compile_options, 342 | pipeline_link_options, 343 | program_groups, 344 | log 345 | ) 346 | 347 | stack_sizes = optix.StackSizes() 348 | for prog_group in program_groups: 349 | if optix_version_gte( (7,7) ): 350 | optix.util.accumulateStackSizes( prog_group, stack_sizes, pipeline ) 351 | else: 352 | optix.util.accumulateStackSizes( prog_group, stack_sizes ) 353 | 354 | ( dc_stack_size_from_trav, dc_stack_size_from_state, cc_stack_size ) = \ 355 | optix.util.computeStackSizes( 356 | stack_sizes, 357 | max_trace_depth, 358 | 0, # maxCCDepth 359 | 0, # maxDCDepth 360 | ) 361 | 362 | pipeline.setStackSize( 363 | dc_stack_size_from_trav, 364 | dc_stack_size_from_state, 365 | cc_stack_size, 366 | 1 # maxTraversableDepth 367 | ) 368 | 369 | return pipeline 370 | pipeline = create_pipeline() 371 | 372 | 373 | def create_sbt(): 374 | print( "Creating sbt ... " ) 375 | 376 | ( raygen_prog_group, miss_prog_group, hitgroup_prog_group ) = program_groups 377 | 378 | global d_raygen_sbt 379 | global d_miss_sbt 380 | 381 | header_format = '{}B'.format( optix.SBT_RECORD_HEADER_SIZE ) 382 | 383 | # 384 | # raygen record 385 | # 386 | formats = [ header_format ] 387 | itemsize = get_aligned_itemsize( formats, optix.SBT_RECORD_ALIGNMENT ) 388 | dtype = np.dtype( { 389 | 'names' : ['header' ], 390 | 'formats' : formats, 391 | 'itemsize': itemsize, 392 | 'align' : True 393 | } ) 394 | h_raygen_sbt = np.array( [ 0 ], dtype=dtype ) 395 | optix.sbtRecordPackHeader( raygen_prog_group, h_raygen_sbt ) 396 | global d_raygen_sbt 397 | d_raygen_sbt = array_to_device_memory( h_raygen_sbt ) 398 | 399 | # 400 | # miss record 401 | # 402 | formats = [ header_format, 'f4', 'f4', 'f4'] 403 | itemsize = get_aligned_itemsize( formats, optix.SBT_RECORD_ALIGNMENT ) 404 | dtype = np.dtype( { 405 | 'names' : ['header', 'r', 'g', 'b' ], 406 | 'formats' : formats, 407 | 'itemsize': itemsize, 408 | 'align' : True 409 | } ) 410 | h_miss_sbt = np.array( [ (0, 0.0, 0.2, 0.6) ], dtype=dtype ) 411 | optix.sbtRecordPackHeader( miss_prog_group, h_miss_sbt ) 412 | global d_miss_sbt 413 | d_miss_sbt = array_to_device_memory( h_miss_sbt ) 414 | 415 | # 416 | # hitgroup record 417 | # 418 | formats = [ header_format ] 419 | itemsize = get_aligned_itemsize( formats, optix.SBT_RECORD_ALIGNMENT ) 420 | dtype = np.dtype( { 421 | 'names' : ['header' ], 422 | 'formats' : formats, 423 | 'itemsize': itemsize, 424 | 'align' : True 425 | } ) 426 | h_hitgroup_sbt = np.array( [ (0) ], dtype=dtype ) 427 | optix.sbtRecordPackHeader( hitgroup_prog_group, h_hitgroup_sbt ) 428 | global d_hitgroup_sbt 429 | d_hitgroup_sbt = array_to_device_memory( h_hitgroup_sbt ) 430 | 431 | return optix.ShaderBindingTable( 432 | raygenRecord = d_raygen_sbt.ptr, 433 | missRecordBase = d_miss_sbt.ptr, 434 | missRecordStrideInBytes = d_miss_sbt.mem.size, 435 | missRecordCount = 1, 436 | hitgroupRecordBase = d_hitgroup_sbt.ptr, 437 | hitgroupRecordStrideInBytes = d_hitgroup_sbt.mem.size, 438 | hitgroupRecordCount = 1 439 | ) 440 | sbt = create_sbt() 441 | 442 | 443 | def launch(): 444 | print( "Launching ... " ) 445 | 446 | width = 1024 447 | height = 768 448 | 449 | pix_bytes = width * height 450 | 451 | h_pix = np.zeros( ( width, height,4), 'B' ) 452 | h_pix[0:width, 0:height] = [255, 128, 0, 255] 453 | d_pix = cp.array( h_pix ) 454 | 455 | params = [ 456 | ( 'u8', 'image', d_pix.data.ptr ), 457 | ( 'u4', 'image_width', width ), 458 | ( 'u4', 'image_height', height ), 459 | ( 'f4', 'cam_eye_x', 0 ), 460 | ( 'f4', 'cam_eye_y', 0 ), 461 | ( 'f4', 'cam_eye_z', 2.0 ), 462 | ( 'f4', 'cam_U_x', 1.10457 ), 463 | ( 'f4', 'cam_U_y', 0 ), 464 | ( 'f4', 'cam_U_z', 0 ), 465 | ( 'f4', 'cam_V_x', 0 ), 466 | ( 'f4', 'cam_V_y', 0.828427 ), 467 | ( 'f4', 'cam_V_z', 0 ), 468 | ( 'f4', 'cam_W_x', 0 ), 469 | ( 'f4', 'cam_W_y', 0 ), 470 | ( 'f4', 'cam_W_z', -2.0 ), 471 | ( 'u8', 'trav_handle', gas_handle ) 472 | ] 473 | 474 | formats = [ x[0] for x in params ] 475 | names = [ x[1] for x in params ] 476 | values = [ x[2] for x in params ] 477 | itemsize = get_aligned_itemsize( formats, 8 ) 478 | params_dtype = np.dtype( { 479 | 'names' : names, 480 | 'formats' : formats, 481 | 'itemsize': itemsize, 482 | 'align' : True 483 | } ) 484 | h_params = np.array( [ tuple(values) ], dtype=params_dtype ) 485 | d_params = array_to_device_memory( h_params ) 486 | 487 | stream = cp.cuda.Stream() 488 | optix.launch( 489 | pipeline, 490 | stream.ptr, 491 | d_params.ptr, 492 | h_params.dtype.itemsize, 493 | sbt, 494 | width, 495 | height, 496 | 1 # depth 497 | ) 498 | 499 | stream.synchronize() 500 | 501 | h_pix = cp.asnumpy( d_pix ) 502 | return h_pix 503 | pix = launch() 504 | 505 | 506 | print( "Total number of log messages: {}".format( logger.num_mssgs ) ) 507 | 508 | pix = pix.reshape( ( height, width, 4 ) ) # PIL expects [ y, x ] resolution 509 | img = ImageOps.flip( Image.fromarray( pix, 'RGBA' ) ) # PIL expects y = 0 at bottom 510 | img.show() 511 | img.save( 'my.png' ) 512 | -------------------------------------------------------------------------------- /test/sample_ptx.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 NVIDIA CORPORATION All rights reserved. 2 | # Use of this source code is governed by a BSD-style 3 | # license that can be found in the LICENSE file. 4 | 5 | 6 | hello_ptx = ''' 7 | // 8 | // Generated by NVIDIA NVVM Compiler 9 | // 10 | // Compiler Build ID: CL-29373293 11 | // Cuda compilation tools, release 11.2, V11.2.67 12 | // Based on NVVM 7.0.1 13 | // 14 | 15 | .version 7.2 16 | .target sm_60 17 | .address_size 64 18 | 19 | // .globl __raygen__hello 20 | .visible .const .align 8 .b8 params[16]; 21 | 22 | .visible .entry __raygen__hello() 23 | { 24 | .reg .pred %p<4>; 25 | .reg .b16 %rs<5>; 26 | .reg .f32 %f<39>; 27 | .reg .b32 %r<12>; 28 | .reg .b64 %rd<6>; 29 | .loc 1 39 0 30 | Lfunc_begin0: 31 | .loc 1 39 0 32 | 33 | 34 | .loc 1 41 26 35 | .loc 2 5675 5, function_name Linfo_string0, inlined_at 1 41 26 36 | // begin inline asm 37 | call (%r1), _optix_get_launch_index_x, (); 38 | // end inline asm 39 | .loc 2 5676 5, function_name Linfo_string0, inlined_at 1 41 26 40 | // begin inline asm 41 | call (%r2), _optix_get_launch_index_y, (); 42 | // end inline asm 43 | Ltmp0: 44 | .loc 1 42 39 45 | .loc 2 5703 5, function_name Linfo_string1, inlined_at 1 42 39 46 | // begin inline asm 47 | call (%rd1), _optix_get_sbt_data_ptr_64, (); 48 | // end inline asm 49 | Ltmp1: 50 | .loc 1 43 5 51 | ld.const.u64 %rd2, [params]; 52 | cvta.to.global.u64 %rd3, %rd2; 53 | ld.const.u32 %r4, [params+8]; 54 | mad.lo.s32 %r5, %r4, %r2, %r1; 55 | ld.f32 %f1, [%rd1]; 56 | ld.f32 %f2, [%rd1+4]; 57 | ld.f32 %f3, [%rd1+8]; 58 | .loc 3 121 22 59 | mov.f32 %f4, 0f3F800000; 60 | min.ftz.f32 %f5, %f1, %f4; 61 | .loc 3 121 12 62 | mov.f32 %f6, 0f00000000; 63 | max.ftz.f32 %f7, %f6, %f5; 64 | .loc 3 121 22 65 | min.ftz.f32 %f8, %f2, %f4; 66 | .loc 3 121 12 67 | max.ftz.f32 %f9, %f6, %f8; 68 | .loc 3 121 22 69 | min.ftz.f32 %f10, %f3, %f4; 70 | .loc 3 121 12 71 | max.ftz.f32 %f11, %f6, %f10; 72 | .loc 4 38 33 73 | lg2.approx.ftz.f32 %f12, %f7; 74 | mul.ftz.f32 %f13, %f12, 0f3ED55555; 75 | ex2.approx.ftz.f32 %f14, %f13; 76 | .loc 4 38 56 77 | lg2.approx.ftz.f32 %f15, %f9; 78 | mul.ftz.f32 %f16, %f15, 0f3ED55555; 79 | ex2.approx.ftz.f32 %f17, %f16; 80 | .loc 4 38 79 81 | lg2.approx.ftz.f32 %f18, %f11; 82 | mul.ftz.f32 %f19, %f18, 0f3ED55555; 83 | ex2.approx.ftz.f32 %f20, %f19; 84 | setp.lt.ftz.f32 %p1, %f7, 0f3B4D2E1C; 85 | mul.ftz.f32 %f21, %f7, 0f414EB852; 86 | fma.rn.ftz.f32 %f22, %f14, 0f3F870A3D, 0fBD6147AE; 87 | selp.f32 %f23, %f21, %f22, %p1; 88 | setp.lt.ftz.f32 %p2, %f9, 0f3B4D2E1C; 89 | mul.ftz.f32 %f24, %f9, 0f414EB852; 90 | fma.rn.ftz.f32 %f25, %f17, 0f3F870A3D, 0fBD6147AE; 91 | selp.f32 %f26, %f24, %f25, %p2; 92 | setp.lt.ftz.f32 %p3, %f11, 0f3B4D2E1C; 93 | mul.ftz.f32 %f27, %f11, 0f414EB852; 94 | fma.rn.ftz.f32 %f28, %f20, 0f3F870A3D, 0fBD6147AE; 95 | selp.f32 %f29, %f27, %f28, %p3; 96 | Ltmp2: 97 | .loc 4 61 25 98 | .loc 3 121 22, function_name Linfo_string2, inlined_at 4 61 25 99 | min.ftz.f32 %f30, %f23, %f4; 100 | .loc 3 121 12, function_name Linfo_string2, inlined_at 4 61 25 101 | max.ftz.f32 %f31, %f6, %f30; 102 | .loc 4 54 5, function_name Linfo_string2, inlined_at 4 61 25 103 | mul.ftz.f32 %f32, %f31, 0f43800000; 104 | cvt.rzi.ftz.u32.f32 %r6, %f32; 105 | .loc 5 870 10, function_name Linfo_string2, inlined_at 4 61 25 106 | min.u32 %r7, %r6, 255; 107 | Ltmp3: 108 | .loc 4 61 58 109 | .loc 3 121 22, function_name Linfo_string2, inlined_at 4 61 58 110 | min.ftz.f32 %f33, %f26, %f4; 111 | .loc 3 121 12, function_name Linfo_string2, inlined_at 4 61 58 112 | max.ftz.f32 %f34, %f6, %f33; 113 | .loc 4 54 5, function_name Linfo_string2, inlined_at 4 61 58 114 | mul.ftz.f32 %f35, %f34, 0f43800000; 115 | cvt.rzi.ftz.u32.f32 %r8, %f35; 116 | .loc 5 870 10, function_name Linfo_string2, inlined_at 4 61 58 117 | min.u32 %r9, %r8, 255; 118 | Ltmp4: 119 | .loc 4 61 91 120 | .loc 3 121 22, function_name Linfo_string2, inlined_at 4 61 91 121 | min.ftz.f32 %f36, %f29, %f4; 122 | .loc 3 121 12, function_name Linfo_string2, inlined_at 4 61 91 123 | max.ftz.f32 %f37, %f6, %f36; 124 | .loc 4 54 5, function_name Linfo_string2, inlined_at 4 61 91 125 | mul.ftz.f32 %f38, %f37, 0f43800000; 126 | cvt.rzi.ftz.u32.f32 %r10, %f38; 127 | .loc 5 870 10, function_name Linfo_string2, inlined_at 4 61 91 128 | min.u32 %r11, %r10, 255; 129 | Ltmp5: 130 | .loc 4 61 91 131 | mul.wide.u32 %rd4, %r5, 4; 132 | add.s64 %rd5, %rd3, %rd4; 133 | Ltmp6: 134 | .loc 5 870 10, function_name Linfo_string2, inlined_at 4 61 91 135 | cvt.u16.u32 %rs1, %r11; 136 | Ltmp7: 137 | .loc 5 870 10, function_name Linfo_string2, inlined_at 4 61 58 138 | cvt.u16.u32 %rs2, %r9; 139 | Ltmp8: 140 | .loc 5 870 10, function_name Linfo_string2, inlined_at 4 61 25 141 | cvt.u16.u32 %rs3, %r7; 142 | Ltmp9: 143 | .loc 4 61 91 144 | mov.u16 %rs4, 255; 145 | st.global.v4.u8 [%rd5], {%rs3, %rs2, %rs1, %rs4}; 146 | .loc 1 45 1 147 | ret; 148 | Ltmp10: 149 | Lfunc_end0: 150 | 151 | } 152 | // .globl __anyhit__noop 153 | .visible .entry __anyhit__noop() 154 | { 155 | 156 | .loc 1 48 0 157 | Lfunc_begin1: 158 | .loc 1 48 0 159 | 160 | 161 | .loc 1 48 48 162 | ret; 163 | Ltmp11: 164 | Lfunc_end1: 165 | 166 | } 167 | // .globl __closesthit__noop 168 | .visible .entry __closesthit__noop() 169 | { 170 | 171 | .loc 1 51 0 172 | Lfunc_begin2: 173 | .loc 1 51 0 174 | 175 | 176 | .loc 1 51 52 177 | ret; 178 | Ltmp12: 179 | Lfunc_end2: 180 | 181 | } 182 | // .globl __intersection__noop 183 | .visible .entry __intersection__noop() 184 | { 185 | 186 | .loc 1 54 0 187 | Lfunc_begin3: 188 | .loc 1 54 0 189 | 190 | 191 | .loc 1 54 55 192 | ret; 193 | Ltmp13: 194 | Lfunc_end3: 195 | 196 | } 197 | // .globl __intersect__noop 198 | .visible .entry __intersect__noop() 199 | { 200 | 201 | .loc 1 57 0 202 | Lfunc_begin4: 203 | .loc 1 57 0 204 | 205 | 206 | .loc 1 57 52 207 | ret; 208 | Ltmp14: 209 | Lfunc_end4: 210 | 211 | } 212 | // .globl __miss__noop 213 | .visible .entry __miss__noop() 214 | { 215 | 216 | .loc 1 60 0 217 | Lfunc_begin5: 218 | .loc 1 60 0 219 | 220 | 221 | .loc 1 60 47 222 | ret; 223 | Ltmp15: 224 | Lfunc_end5: 225 | 226 | } 227 | // .globl __direct_callable__noop 228 | .visible .entry __direct_callable__noop() 229 | { 230 | 231 | .loc 1 63 0 232 | Lfunc_begin6: 233 | .loc 1 63 0 234 | 235 | 236 | .loc 1 63 58 237 | ret; 238 | Ltmp16: 239 | Lfunc_end6: 240 | 241 | } 242 | // .globl __continuation_callable__noop 243 | .visible .entry __continuation_callable__noop() 244 | { 245 | 246 | .loc 1 66 0 247 | Lfunc_begin7: 248 | .loc 1 66 0 249 | 250 | 251 | .loc 1 66 64 252 | ret; 253 | Ltmp17: 254 | Lfunc_end7: 255 | 256 | } 257 | .file 1 "optixHello/draw_solid_color.cu" 258 | .file 2 "include/internal/optix_7_device_impl.h" 259 | .file 3 "sutil/vec_math.h" 260 | .file 4 "cuda/helpers.h" 261 | .file 5 "cuda/include/crt/math_functions.hpp" 262 | .section .debug_str 263 | { 264 | Linfo_string0: 265 | .b8 95,90,78,55,51,95,73,78,84,69,82,78,65,76,95,53,49,95,116,109,112,120,102,116,95,48,48,49,48,102,48,57,54,95,48,48,48,48,48,48 266 | .b8 48,48,95,55,95,100,114,97,119,95,115,111,108,105,100,95,99,111,108,111,114,95,99,112,112,49,95,105,105,95,51,101,52,98,52,55,50,54,49,57 267 | .b8 111,112,116,105,120,71,101,116,76,97,117,110,99,104,73,110,100,101,120,69,118,0 268 | Linfo_string1: 269 | .b8 95,90,78,55,51,95,73,78,84,69,82,78,65,76,95,53,49,95,116,109,112,120,102,116,95,48,48,49,48,102,48,57,54,95,48,48,48,48,48,48 270 | .b8 48,48,95,55,95,100,114,97,119,95,115,111,108,105,100,95,99,111,108,111,114,95,99,112,112,49,95,105,105,95,51,101,52,98,52,55,50,54,50,50 271 | .b8 111,112,116,105,120,71,101,116,83,98,116,68,97,116,97,80,111,105,110,116,101,114,69,118,0 272 | Linfo_string2: 273 | .b8 95,90,50,49,113,117,97,110,116,105,122,101,85,110,115,105,103,110,101,100,56,66,105,116,115,102,0 274 | 275 | } 276 | ''' 277 | 278 | triangle_ptx = ''' 279 | 280 | // 281 | // Generated by NVIDIA NVVM Compiler 282 | // 283 | // Compiler Build ID: CL-29373293 284 | // Cuda compilation tools, release 11.2, V11.2.67 285 | // Based on NVVM 7.0.1 286 | // 287 | 288 | .version 7.2 289 | .target sm_60 290 | .address_size 64 291 | 292 | // .globl __raygen__rg 293 | .visible .const .align 8 .b8 params[72]; 294 | 295 | .visible .entry __raygen__rg() 296 | { 297 | .reg .pred %p<4>; 298 | .reg .b16 %rs<5>; 299 | .reg .f32 %f<89>; 300 | .reg .b32 %r<118>; 301 | .reg .b64 %rd<6>; 302 | .loc 1 64 0 303 | Lfunc_begin0: 304 | .loc 1 64 0 305 | 306 | 307 | .loc 1 67 23 308 | .loc 2 5711 5, function_name Linfo_string0, inlined_at 1 67 23 309 | // begin inline asm 310 | call (%r1), _optix_get_launch_index_x, (); 311 | // end inline asm 312 | .loc 2 5712 5, function_name Linfo_string0, inlined_at 1 67 23 313 | // begin inline asm 314 | call (%r2), _optix_get_launch_index_y, (); 315 | // end inline asm 316 | Ltmp0: 317 | .loc 1 68 23 318 | .loc 2 5725 5, function_name Linfo_string1, inlined_at 1 68 23 319 | // begin inline asm 320 | call (%r4), _optix_get_launch_dimension_x, (); 321 | // end inline asm 322 | .loc 2 5726 5, function_name Linfo_string1, inlined_at 1 68 23 323 | // begin inline asm 324 | call (%r5), _optix_get_launch_dimension_y, (); 325 | // end inline asm 326 | Ltmp1: 327 | .loc 1 73 5 328 | ld.const.v2.f32 {%f10, %f11}, [params+24]; 329 | mov.u32 %r44, 0; 330 | mov.u32 %r43, 1; 331 | ld.const.v2.f32 {%f13, %f14}, [params+32]; 332 | ld.const.v2.f32 {%f17, %f18}, [params+40]; 333 | ld.const.v2.f32 {%f21, %f22}, [params+48]; 334 | ld.const.v2.f32 {%f25, %f26}, [params+56]; 335 | cvt.rn.f32.u32 %f29, %r1; 336 | cvt.rn.f32.u32 %f30, %r4; 337 | div.approx.ftz.f32 %f31, %f29, %f30; 338 | cvt.rn.f32.u32 %f32, %r2; 339 | cvt.rn.f32.u32 %f33, %r5; 340 | div.approx.ftz.f32 %f34, %f32, %f33; 341 | fma.rn.ftz.f32 %f35, %f31, 0f40000000, 0fBF800000; 342 | mov.f32 %f36, 0f3F800000; 343 | fma.rn.ftz.f32 %f37, %f34, 0f40000000, 0fBF800000; 344 | ld.const.v2.f32 {%f38, %f39}, [params+16]; 345 | mul.ftz.f32 %f40, %f17, %f37; 346 | mul.ftz.f32 %f41, %f18, %f37; 347 | mul.ftz.f32 %f42, %f21, %f37; 348 | fma.rn.ftz.f32 %f43, %f11, %f35, %f40; 349 | fma.rn.ftz.f32 %f44, %f13, %f35, %f41; 350 | fma.rn.ftz.f32 %f45, %f14, %f35, %f42; 351 | add.ftz.f32 %f46, %f22, %f43; 352 | add.ftz.f32 %f47, %f25, %f44; 353 | add.ftz.f32 %f48, %f26, %f45; 354 | mul.ftz.f32 %f49, %f47, %f47; 355 | fma.rn.ftz.f32 %f50, %f46, %f46, %f49; 356 | fma.rn.ftz.f32 %f51, %f48, %f48, %f50; 357 | Ltmp2: 358 | .loc 3 547 25 359 | rsqrt.approx.ftz.f32 %f52, %f51; 360 | mul.ftz.f32 %f4, %f52, %f46; 361 | mul.ftz.f32 %f5, %f52, %f47; 362 | mul.ftz.f32 %f6, %f52, %f48; 363 | Ltmp3: 364 | .loc 1 77 5 365 | ld.const.u64 %rd1, [params+64]; 366 | Ltmp4: 367 | .loc 1 77 5 368 | .loc 2 198 5, function_name Linfo_string2, inlined_at 1 77 5 369 | mov.f32 %f8, 0f5A0E1BCA; 370 | mov.f32 %f9, 0f00000000; 371 | mov.u32 %r40, 255; 372 | mov.u32 %r45, 3; 373 | // begin inline asm 374 | call(%r7,%r8,%r9,%r10,%r11,%r12,%r13,%r14,%r15,%r16,%r17,%r18,%r19,%r20,%r21,%r22,%r23,%r24,%r25,%r26,%r27,%r28,%r29,%r30,%r31,%r32,%r33,%r34,%r35,%r36,%r37,%r38),_optix_trace_typed_32,(%r44,%rd1,%f38,%f39,%f10,%f4,%f5,%f6,%f9,%f8,%f9,%r40,%r44,%r44,%r43,%r44,%r45,%r78,%r79,%r80,%r81,%r82,%r83,%r84,%r85,%r86,%r87,%r88,%r89,%r90,%r91,%r92,%r93,%r94,%r95,%r96,%r97,%r98,%r99,%r100,%r101,%r102,%r103,%r104,%r105,%r106,%r107,%r108,%r109); 375 | // end inline asm 376 | Ltmp5: 377 | .loc 4 137 10 378 | mov.b32 %f53, %r7; 379 | mov.b32 %f54, %r8; 380 | mov.b32 %f55, %r9; 381 | .loc 1 96 5 382 | ld.const.u64 %rd2, [params]; 383 | cvta.to.global.u64 %rd3, %rd2; 384 | ld.const.u32 %r110, [params+8]; 385 | mad.lo.s32 %r111, %r110, %r2, %r1; 386 | .loc 3 121 22 387 | min.ftz.f32 %f56, %f53, %f36; 388 | .loc 3 121 12 389 | max.ftz.f32 %f57, %f9, %f56; 390 | .loc 3 121 22 391 | min.ftz.f32 %f58, %f54, %f36; 392 | .loc 3 121 12 393 | max.ftz.f32 %f59, %f9, %f58; 394 | .loc 3 121 22 395 | min.ftz.f32 %f60, %f55, %f36; 396 | .loc 3 121 12 397 | max.ftz.f32 %f61, %f9, %f60; 398 | .loc 5 38 33 399 | lg2.approx.ftz.f32 %f62, %f57; 400 | mul.ftz.f32 %f63, %f62, 0f3ED55555; 401 | ex2.approx.ftz.f32 %f64, %f63; 402 | .loc 5 38 56 403 | lg2.approx.ftz.f32 %f65, %f59; 404 | mul.ftz.f32 %f66, %f65, 0f3ED55555; 405 | ex2.approx.ftz.f32 %f67, %f66; 406 | .loc 5 38 79 407 | lg2.approx.ftz.f32 %f68, %f61; 408 | mul.ftz.f32 %f69, %f68, 0f3ED55555; 409 | ex2.approx.ftz.f32 %f70, %f69; 410 | setp.lt.ftz.f32 %p1, %f57, 0f3B4D2E1C; 411 | mul.ftz.f32 %f71, %f57, 0f414EB852; 412 | fma.rn.ftz.f32 %f72, %f64, 0f3F870A3D, 0fBD6147AE; 413 | selp.f32 %f73, %f71, %f72, %p1; 414 | setp.lt.ftz.f32 %p2, %f59, 0f3B4D2E1C; 415 | mul.ftz.f32 %f74, %f59, 0f414EB852; 416 | fma.rn.ftz.f32 %f75, %f67, 0f3F870A3D, 0fBD6147AE; 417 | selp.f32 %f76, %f74, %f75, %p2; 418 | setp.lt.ftz.f32 %p3, %f61, 0f3B4D2E1C; 419 | mul.ftz.f32 %f77, %f61, 0f414EB852; 420 | fma.rn.ftz.f32 %f78, %f70, 0f3F870A3D, 0fBD6147AE; 421 | selp.f32 %f79, %f77, %f78, %p3; 422 | Ltmp6: 423 | .loc 5 61 25 424 | .loc 3 121 22, function_name Linfo_string3, inlined_at 5 61 25 425 | min.ftz.f32 %f80, %f73, %f36; 426 | .loc 3 121 12, function_name Linfo_string3, inlined_at 5 61 25 427 | max.ftz.f32 %f81, %f9, %f80; 428 | .loc 5 54 5, function_name Linfo_string3, inlined_at 5 61 25 429 | mul.ftz.f32 %f82, %f81, 0f43800000; 430 | cvt.rzi.ftz.u32.f32 %r112, %f82; 431 | .loc 6 870 10, function_name Linfo_string3, inlined_at 5 61 25 432 | min.u32 %r113, %r112, 255; 433 | Ltmp7: 434 | .loc 5 61 58 435 | .loc 3 121 22, function_name Linfo_string3, inlined_at 5 61 58 436 | min.ftz.f32 %f83, %f76, %f36; 437 | .loc 3 121 12, function_name Linfo_string3, inlined_at 5 61 58 438 | max.ftz.f32 %f84, %f9, %f83; 439 | .loc 5 54 5, function_name Linfo_string3, inlined_at 5 61 58 440 | mul.ftz.f32 %f85, %f84, 0f43800000; 441 | cvt.rzi.ftz.u32.f32 %r114, %f85; 442 | .loc 6 870 10, function_name Linfo_string3, inlined_at 5 61 58 443 | min.u32 %r115, %r114, 255; 444 | Ltmp8: 445 | .loc 5 61 91 446 | .loc 3 121 22, function_name Linfo_string3, inlined_at 5 61 91 447 | min.ftz.f32 %f86, %f79, %f36; 448 | .loc 3 121 12, function_name Linfo_string3, inlined_at 5 61 91 449 | max.ftz.f32 %f87, %f9, %f86; 450 | .loc 5 54 5, function_name Linfo_string3, inlined_at 5 61 91 451 | mul.ftz.f32 %f88, %f87, 0f43800000; 452 | cvt.rzi.ftz.u32.f32 %r116, %f88; 453 | .loc 6 870 10, function_name Linfo_string3, inlined_at 5 61 91 454 | min.u32 %r117, %r116, 255; 455 | Ltmp9: 456 | .loc 5 61 91 457 | mul.wide.u32 %rd4, %r111, 4; 458 | add.s64 %rd5, %rd3, %rd4; 459 | Ltmp10: 460 | .loc 6 870 10, function_name Linfo_string3, inlined_at 5 61 91 461 | cvt.u16.u32 %rs1, %r117; 462 | Ltmp11: 463 | .loc 6 870 10, function_name Linfo_string3, inlined_at 5 61 58 464 | cvt.u16.u32 %rs2, %r115; 465 | Ltmp12: 466 | .loc 6 870 10, function_name Linfo_string3, inlined_at 5 61 25 467 | cvt.u16.u32 %rs3, %r113; 468 | Ltmp13: 469 | .loc 5 61 91 470 | mov.u16 %rs4, 255; 471 | st.global.v4.u8 [%rd5], {%rs3, %rs2, %rs1, %rs4}; 472 | .loc 1 97 1 473 | ret; 474 | Ltmp14: 475 | Lfunc_end0: 476 | 477 | } 478 | // .globl __miss__ms 479 | .visible .entry __miss__ms() 480 | { 481 | .reg .b32 %r<7>; 482 | .reg .b64 %rd<2>; 483 | .loc 1 100 0 484 | Lfunc_begin1: 485 | .loc 1 100 0 486 | 487 | 488 | .loc 1 102 56 489 | .loc 2 5739 5, function_name Linfo_string4, inlined_at 1 102 56 490 | // begin inline asm 491 | call (%rd1), _optix_get_sbt_data_ptr_64, (); 492 | // end inline asm 493 | Ltmp15: 494 | .loc 1 103 5 495 | ld.u32 %r2, [%rd1]; 496 | ld.u32 %r4, [%rd1+4]; 497 | ld.u32 %r6, [%rd1+8]; 498 | Ltmp16: 499 | .loc 1 43 5 500 | .loc 2 3921 5, function_name Linfo_string5, inlined_at 1 43 5 501 | mov.u32 %r1, 0; 502 | // begin inline asm 503 | call _optix_set_payload, (%r1, %r2); 504 | // end inline asm 505 | Ltmp17: 506 | .loc 1 44 5 507 | .loc 2 3931 5, function_name Linfo_string6, inlined_at 1 44 5 508 | mov.u32 %r3, 1; 509 | // begin inline asm 510 | call _optix_set_payload, (%r3, %r4); 511 | // end inline asm 512 | Ltmp18: 513 | .loc 1 45 5 514 | .loc 2 3941 5, function_name Linfo_string7, inlined_at 1 45 5 515 | mov.u32 %r5, 2; 516 | // begin inline asm 517 | call _optix_set_payload, (%r5, %r6); 518 | // end inline asm 519 | Ltmp19: 520 | .loc 1 104 1 521 | ret; 522 | Ltmp20: 523 | Lfunc_end1: 524 | 525 | } 526 | // .globl __closesthit__ch 527 | .visible .entry __closesthit__ch() 528 | { 529 | .reg .f32 %f<3>; 530 | .reg .b32 %r<7>; 531 | .loc 1 107 0 532 | Lfunc_begin2: 533 | .loc 1 107 0 534 | 535 | 536 | .loc 1 111 33 537 | .loc 2 5699 5, function_name Linfo_string8, inlined_at 1 111 33 538 | // begin inline asm 539 | call (%f1, %f2), _optix_get_triangle_barycentrics, (); 540 | // end inline asm 541 | Ltmp21: 542 | .loc 4 132 10 543 | mov.b32 %r2, %f1; 544 | Ltmp22: 545 | .loc 1 43 5 546 | .loc 2 3921 5, function_name Linfo_string5, inlined_at 1 43 5 547 | mov.u32 %r1, 0; 548 | // begin inline asm 549 | call _optix_set_payload, (%r1, %r2); 550 | // end inline asm 551 | Ltmp23: 552 | .loc 4 132 10 553 | mov.b32 %r4, %f2; 554 | Ltmp24: 555 | .loc 1 44 5 556 | .loc 2 3931 5, function_name Linfo_string6, inlined_at 1 44 5 557 | mov.u32 %r3, 1; 558 | // begin inline asm 559 | call _optix_set_payload, (%r3, %r4); 560 | // end inline asm 561 | Ltmp25: 562 | .loc 1 45 5 563 | .loc 2 3941 5, function_name Linfo_string7, inlined_at 1 45 5 564 | mov.u32 %r5, 2; 565 | mov.u32 %r6, 1065353216; 566 | // begin inline asm 567 | call _optix_set_payload, (%r5, %r6); 568 | // end inline asm 569 | Ltmp26: 570 | .loc 1 114 1 571 | ret; 572 | Ltmp27: 573 | Lfunc_end2: 574 | 575 | } 576 | .file 1 "optixTriangle/optixTriangle.cu" 577 | .file 2 "include/internal/optix_7_device_impl.h" 578 | .file 3 "sutil/vec_math.h" 579 | .file 4 "cuda/include/crt/device_functions.hpp" 580 | .file 5 "cuda/helpers.h" 581 | .file 6 "include/crt/math_functions.hpp" 582 | .section .debug_str 583 | { 584 | Linfo_string0: 585 | .b8 95,90,78,55,48,95,73,78,84,69,82,78,65,76,95,52,56,95,116,109,112,120,102,116,95,48,48,48,48,99,54,52,101,95,48,48,48,48,48,48 586 | .b8 48,48,95,55,95,111,112,116,105,120,84,114,105,97,110,103,108,101,95,99,112,112,49,95,105,105,95,100,101,98,99,100,99,53,49,49,57,111,112,116 587 | .b8 105,120,71,101,116,76,97,117,110,99,104,73,110,100,101,120,69,118,0 588 | Linfo_string1: 589 | .b8 95,90,78,55,48,95,73,78,84,69,82,78,65,76,95,52,56,95,116,109,112,120,102,116,95,48,48,48,48,99,54,52,101,95,48,48,48,48,48,48 590 | .b8 48,48,95,55,95,111,112,116,105,120,84,114,105,97,110,103,108,101,95,99,112,112,49,95,105,105,95,100,101,98,99,100,99,53,49,50,52,111,112,116 591 | .b8 105,120,71,101,116,76,97,117,110,99,104,68,105,109,101,110,115,105,111,110,115,69,118,0 592 | Linfo_string2: 593 | .b8 95,90,78,55,48,95,73,78,84,69,82,78,65,76,95,52,56,95,116,109,112,120,102,116,95,48,48,48,48,99,54,52,101,95,48,48,48,48,48,48 594 | .b8 48,48,95,55,95,111,112,116,105,120,84,114,105,97,110,103,108,101,95,99,112,112,49,95,105,105,95,100,101,98,99,100,99,53,49,49,48,111,112,116 595 | .b8 105,120,84,114,97,99,101,69,121,54,102,108,111,97,116,51,83,48,95,102,102,102,106,106,106,106,106,82,106,83,49,95,83,49,95,0 596 | Linfo_string3: 597 | .b8 95,90,50,49,113,117,97,110,116,105,122,101,85,110,115,105,103,110,101,100,56,66,105,116,115,102,0 598 | Linfo_string4: 599 | .b8 95,90,78,55,48,95,73,78,84,69,82,78,65,76,95,52,56,95,116,109,112,120,102,116,95,48,48,48,48,99,54,52,101,95,48,48,48,48,48,48 600 | .b8 48,48,95,55,95,111,112,116,105,120,84,114,105,97,110,103,108,101,95,99,112,112,49,95,105,105,95,100,101,98,99,100,99,53,49,50,50,111,112,116 601 | .b8 105,120,71,101,116,83,98,116,68,97,116,97,80,111,105,110,116,101,114,69,118,0 602 | Linfo_string5: 603 | .b8 95,90,78,55,48,95,73,78,84,69,82,78,65,76,95,52,56,95,116,109,112,120,102,116,95,48,48,48,48,99,54,52,101,95,48,48,48,48,48,48 604 | .b8 48,48,95,55,95,111,112,116,105,120,84,114,105,97,110,103,108,101,95,99,112,112,49,95,105,105,95,100,101,98,99,100,99,53,49,49,55,111,112,116 605 | .b8 105,120,83,101,116,80,97,121,108,111,97,100,95,48,69,106,0 606 | Linfo_string6: 607 | .b8 95,90,78,55,48,95,73,78,84,69,82,78,65,76,95,52,56,95,116,109,112,120,102,116,95,48,48,48,48,99,54,52,101,95,48,48,48,48,48,48 608 | .b8 48,48,95,55,95,111,112,116,105,120,84,114,105,97,110,103,108,101,95,99,112,112,49,95,105,105,95,100,101,98,99,100,99,53,49,49,55,111,112,116 609 | .b8 105,120,83,101,116,80,97,121,108,111,97,100,95,49,69,106,0 610 | Linfo_string7: 611 | .b8 95,90,78,55,48,95,73,78,84,69,82,78,65,76,95,52,56,95,116,109,112,120,102,116,95,48,48,48,48,99,54,52,101,95,48,48,48,48,48,48 612 | .b8 48,48,95,55,95,111,112,116,105,120,84,114,105,97,110,103,108,101,95,99,112,112,49,95,105,105,95,100,101,98,99,100,99,53,49,49,55,111,112,116 613 | .b8 105,120,83,101,116,80,97,121,108,111,97,100,95,50,69,106,0 614 | Linfo_string8: 615 | .b8 95,90,78,55,48,95,73,78,84,69,82,78,65,76,95,52,56,95,116,109,112,120,102,116,95,48,48,48,48,99,54,52,101,95,48,48,48,48,48,48 616 | .b8 48,48,95,55,95,111,112,116,105,120,84,114,105,97,110,103,108,101,95,99,112,112,49,95,105,105,95,100,101,98,99,100,99,53,49,50,56,111,112,116 617 | .b8 105,120,71,101,116,84,114,105,97,110,103,108,101,66,97,114,121,99,101,110,116,114,105,99,115,69,118,0 618 | 619 | } 620 | ''' 621 | -------------------------------------------------------------------------------- /examples/simpleMotionBlur.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # 4 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Redistribution and use in source and binary forms, with or without 7 | # modification, are permitted provided that the following conditions 8 | # are met: 9 | # * Redistributions of source code must retain the above copyright 10 | # notice, this list of conditions and the following disclaimer. 11 | # * Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # * Neither the name of NVIDIA CORPORATION nor the names of its 15 | # contributors may be used to endorse or promote products derived 16 | # from this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 19 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 22 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 23 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 24 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 25 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 26 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | 31 | 32 | import optix 33 | import os 34 | import cupy as cp # CUDA bindings 35 | import numpy as np # Packing of structures in C-compatible format 36 | 37 | import array 38 | import ctypes # C interop helpers 39 | from PIL import Image, ImageOps # Image IO 40 | from cuda.bindings import nvrtc 41 | 42 | import path_util 43 | 44 | 45 | class State: 46 | def __init__( self ): 47 | self.context = None 48 | 49 | self.tri_gas_handle = 0 50 | self.d_tri_gas_output_buffer = 0 # Triangle AS memory 51 | 52 | self.sphere_gas_handle = 0 # Traversable handle for sphere 53 | self.d_sphere_gas_output_buffer = 0 # Sphere AS memory 54 | self.sphere_motion_transform_handle = 0 55 | self.d_sphere_motion_transform = 0 56 | 57 | self.ias_handle = 0 # Traversable handle for instance AS 58 | self.d_ias_output_buffer = 0 # Instance AS memory 59 | 60 | self.ptx_module = None 61 | self.pipeline_compile_options = None 62 | self.pipeline = None 63 | 64 | self.raygen_prog_group = None 65 | self.miss_group = None 66 | self.tri_hit_group = None 67 | self.sphere_hit_group = None 68 | 69 | self.stream = stream=cp.cuda.Stream() 70 | self.params = None 71 | self.d_params = 0 72 | 73 | self.sbt = None 74 | self.d_raygen_record = 0 75 | self.d_miss_records = 0 76 | self.d_hitgroup_records = 0 77 | 78 | 79 | #------------------------------------------------------------------------------- 80 | # 81 | # Util 82 | # 83 | #------------------------------------------------------------------------------- 84 | 85 | def checkNVRTC(result, prog = None): 86 | if result[0].value: 87 | if prog: 88 | (res, logsize) = nvrtc.nvrtcGetProgramLogSize(prog) 89 | if not res.value: 90 | log = b" " * logsize 91 | nvrtc.nvrtcGetProgramLog(prog, log) 92 | print(log.decode()) 93 | raise RuntimeError("NVRTC error code={}({})".format(result[0].value, nvrtc.nvrtcGetErrorString(result[0])[1])) 94 | if len(result) == 1: 95 | return None 96 | elif len(result) == 2: 97 | return result[1] 98 | else: 99 | return result[1:] 100 | 101 | 102 | class Logger: 103 | def __init__( self ): 104 | self.num_mssgs = 0 105 | 106 | def __call__( self, level, tag, mssg ): 107 | print( "[{:>2}][{:>12}]: {}".format( level, tag, mssg ) ) 108 | self.num_mssgs += 1 109 | 110 | 111 | def log_callback( level, tag, mssg ): 112 | print( "[{:>2}][{:>12}]: {}".format( level, tag, mssg ) ) 113 | 114 | 115 | def round_up( val, mult_of ): 116 | return val if val % mult_of == 0 else val + mult_of - val % mult_of 117 | 118 | 119 | def get_aligned_itemsize( formats, alignment ): 120 | names = [] 121 | for i in range( len(formats ) ): 122 | names.append( 'x'+str(i) ) 123 | 124 | temp_dtype = np.dtype( { 125 | 'names' : names, 126 | 'formats' : formats, 127 | 'aligned' : True 128 | } ) 129 | return round_up( temp_dtype.itemsize, alignment ) 130 | 131 | 132 | def array_to_device_memory( numpy_array, stream=cp.cuda.Stream() ): 133 | 134 | byte_size = numpy_array.size*numpy_array.dtype.itemsize 135 | 136 | h_ptr = ctypes.c_void_p( numpy_array.ctypes.data ) 137 | d_mem = cp.cuda.memory.alloc( byte_size ) 138 | d_mem.copy_from_async( h_ptr, byte_size, stream ) 139 | return d_mem 140 | 141 | 142 | def optix_version_gte( version ): 143 | if optix.version()[0] > version[0]: 144 | return True 145 | if optix.version()[0] == version[0] and optix.version()[1] >= version[1]: 146 | return True 147 | return False 148 | 149 | 150 | def compile_cuda( cuda_file ): 151 | compile_options = [ 152 | b'-use_fast_math', 153 | b'-lineinfo', 154 | b'-default-device', 155 | b'-std=c++11', 156 | b'-rdc', 157 | b'true', 158 | f'-I{path_util.include_path}'.encode(), 159 | f'-I{path_util.cuda_tk_path}'.encode() 160 | ] 161 | # Optix 7.0 compiles need path to system stddef.h 162 | # the value of optix.stddef_path is compiled in constant. When building 163 | # the module, the value can be specified via an environment variable, e.g. 164 | # export PYOPTIX_STDDEF_DIR="/usr/include/linux" 165 | if not optix_version_gte( (7,1) ): 166 | compile_options.append( f'-I{path_util.stddef_path}' ) 167 | print("pynvrtc compile options = {}".format(compile_options)) 168 | 169 | with open( cuda_file, 'rb' ) as f: 170 | src = f.read() 171 | 172 | # Create program 173 | prog = checkNVRTC(nvrtc.nvrtcCreateProgram(src, cuda_file.encode(), 0, [], [])) 174 | 175 | # Compile program 176 | checkNVRTC(nvrtc.nvrtcCompileProgram(prog, len(compile_options), compile_options), prog) 177 | 178 | # Get PTX from compilation 179 | ptxSize = checkNVRTC(nvrtc.nvrtcGetPTXSize(prog)) 180 | ptx = b" " * ptxSize 181 | checkNVRTC(nvrtc.nvrtcGetPTX(prog, ptx)) 182 | return ptx 183 | 184 | 185 | #------------------------------------------------------------------------------- 186 | # 187 | # Optix setup 188 | # 189 | #------------------------------------------------------------------------------- 190 | 191 | pix_width = 768 192 | pix_height = 768 193 | 194 | 195 | def create_context( state ): 196 | print( "Creating optix device context ..." ) 197 | 198 | # Note that log callback data is no longer needed. We can 199 | # instead send a callable class instance as the log-function 200 | # which stores any data needed 201 | global logger 202 | logger = Logger() 203 | 204 | # OptiX param struct fields can be set with optional 205 | # keyword constructor arguments. 206 | ctx_options = optix.DeviceContextOptions( 207 | logCallbackFunction = logger, 208 | logCallbackLevel = 4 209 | ) 210 | 211 | # They can also be set and queried as properties on the struct 212 | if optix_version_gte( (7,2) ): 213 | ctx_options.validationMode = optix.DEVICE_CONTEXT_VALIDATION_MODE_ALL 214 | 215 | cu_ctx = 0 216 | state.context = optix.deviceContextCreate( cu_ctx, ctx_options ) 217 | 218 | 219 | def build_triangle_gas( state ): 220 | 221 | NUM_KEYS = 3 222 | 223 | motion_options = optix.MotionOptions() 224 | motion_options.numKeys = NUM_KEYS 225 | motion_options.timeBegin = 0.0 226 | motion_options.timeEnd = 1.0 227 | motion_options.flags = optix.MOTION_FLAG_NONE 228 | 229 | accel_options = optix.AccelBuildOptions( 230 | buildFlags = optix.BUILD_FLAG_ALLOW_COMPACTION, 231 | operation = optix.BUILD_OPERATION_BUILD, 232 | motionOptions = motion_options 233 | ) 234 | 235 | # 236 | # Copy triangle mesh data to device 237 | # 238 | NUM_VERTS = 3 239 | vertices_0 = cp.array( [ 240 | 0.0, 0.0, 0.0, 0.0, 241 | 1.0, 0.0, 0.0, 0.0, 242 | 0.5, 1.0, 0.0, 0.0, 243 | ], dtype = 'f4' ) 244 | 245 | vertices_1 = cp.array( [ 246 | 0.5, 0.0, 0.0, 0.0, 247 | 1.5, 0.0, 0.0, 0.0, 248 | 1.0, 1.0, 0.0, 0.0, 249 | ], 250 | dtype = 'f4' 251 | ) 252 | vertices_2 = cp.array( [ 253 | 0.5, -0.5, 0.0, 0.0, 254 | 1.5, -0.5, 0.0, 0.0, 255 | 1.0, 0.5, 0.0, 0.0 256 | ], 257 | dtype = 'f4' 258 | ) 259 | 260 | 261 | triangle_input = optix.BuildInputTriangleArray() 262 | triangle_input.vertexFormat = optix.VERTEX_FORMAT_FLOAT3 263 | triangle_input.vertexStrideInBytes = np.dtype( 'f4' ).itemsize*4 # four floats per vert 264 | triangle_input.numVertices = NUM_VERTS 265 | triangle_input.vertexBuffers = [ vertices_0.data.ptr, vertices_1.data.ptr, vertices_2.data.ptr ] 266 | triangle_input.flags = [ optix.GEOMETRY_FLAG_DISABLE_ANYHIT ] 267 | triangle_input.numSbtRecords = 1 268 | triangle_input.sbtIndexOffsetBuffer = 0 269 | 270 | gas_buffer_sizes = state.context.accelComputeMemoryUsage( 271 | [ accel_options ], 272 | [ triangle_input ] 273 | ) 274 | 275 | d_temp_buffer = cp.cuda.alloc( gas_buffer_sizes.tempSizeInBytes ) 276 | d_output_buffer = cp.cuda.alloc( gas_buffer_sizes.outputSizeInBytes ) 277 | d_result = cp.array( [ 0 ], dtype = 'u8' ) 278 | 279 | emit_property = optix.AccelEmitDesc( 280 | type = optix.PROPERTY_TYPE_COMPACTED_SIZE, 281 | result = d_result.data.ptr 282 | ) 283 | 284 | state.tri_gas_handle = state.context.accelBuild( 285 | 0, # CUDA stream 286 | [ accel_options ], 287 | [ triangle_input ], 288 | d_temp_buffer.ptr, 289 | gas_buffer_sizes.tempSizeInBytes, 290 | d_output_buffer.ptr, 291 | gas_buffer_sizes.outputSizeInBytes, 292 | [ emit_property ] 293 | ) 294 | 295 | compacted_gas_size = cp.asnumpy( d_result )[0] 296 | 297 | if compacted_gas_size < gas_buffer_sizes.outputSizeInBytes and False: 298 | 299 | state.d_tri_gas_output_buffer = cp.cuda.alloc( compacted_gas_size ) 300 | state.tri_gas_handle = state.context.accelCompact( 301 | 0, #CUDA stream 302 | state.tri_gas_handle, 303 | state.d_tri_gas_output_buffer.ptr, 304 | compacted_gas_size 305 | ) 306 | else: 307 | state.d_tri_gas_output_buffer = d_output_buffer 308 | 309 | 310 | def build_sphere_gas( state ): 311 | 312 | accel_options = optix.AccelBuildOptions( 313 | buildFlags = optix.BUILD_FLAG_ALLOW_COMPACTION, 314 | operation = optix.BUILD_OPERATION_BUILD 315 | ) 316 | 317 | aabb = cp.array( [ 318 | -1.5, -1.0, -0.5, 319 | -0.5, 0.0, 0.5 320 | #-1.0, -1.0, -1.0, 321 | # 1.0, 1.0, 1.0 322 | ], dtype = 'f4' 323 | ) 324 | 325 | sphere_input = optix.BuildInputCustomPrimitiveArray( 326 | aabbBuffers = [ aabb.data.ptr ], 327 | numPrimitives = 1, 328 | #flags = [ optix.GEOMETRY_FLAG_DISABLE_ANYHIT ], 329 | flags = [ optix.GEOMETRY_FLAG_NONE], 330 | numSbtRecords = 1 331 | ) 332 | 333 | gas_buffer_sizes = state.context.accelComputeMemoryUsage( 334 | [ accel_options ], 335 | [ sphere_input ] 336 | ) 337 | 338 | d_temp_buffer = cp.cuda.alloc( gas_buffer_sizes.tempSizeInBytes ) 339 | d_output_buffer = cp.cuda.alloc( gas_buffer_sizes.outputSizeInBytes ) 340 | d_result = cp.array( [ 0 ], dtype = 'u8' ) 341 | 342 | emit_property = optix.AccelEmitDesc( 343 | type = optix.PROPERTY_TYPE_COMPACTED_SIZE, 344 | result = d_result.data.ptr 345 | ) 346 | 347 | state.sphere_gas_handle = state.context.accelBuild( 348 | 0, # CUDA stream 349 | [ accel_options ], 350 | [ sphere_input ], 351 | d_temp_buffer.ptr, 352 | gas_buffer_sizes.tempSizeInBytes, 353 | d_output_buffer.ptr, 354 | gas_buffer_sizes.outputSizeInBytes, 355 | [ emit_property ] 356 | ) 357 | 358 | compacted_gas_size = cp.asnumpy( d_result )[0] 359 | 360 | if compacted_gas_size < gas_buffer_sizes.outputSizeInBytes and False: 361 | state.d_sphere_gas_output_buffer = cp.cuda.alloc( compacted_gas_size ) 362 | state.sphere_gas_handle = state.context.accelCompact( 363 | 0, #CUDA stream 364 | state.sphere_gas_handle, 365 | state.d_sphere_gas_output_buffer, 366 | compacted_gas_size 367 | ) 368 | else: 369 | state.d_sphere_gas_output_buffer = d_output_buffer 370 | 371 | 372 | def create_sphere_xform( state ): 373 | 374 | motion_keys = [ 375 | 1.0, 0.0, 0.0, 0.0, 376 | 0.0, 1.0, 0.0, 0.0, 377 | 0.0, 0.0, 1.0, 0.0, 378 | 379 | 1.0, 0.0, 0.0, 0.0, 380 | 0.0, 1.0, 0.0, 0.5, 381 | 0.0, 0.0, 1.0, 0.0 382 | ] 383 | 384 | motion_options = optix.MotionOptions() 385 | motion_options.numKeys = 2 386 | motion_options.timeBegin = 0.0 387 | motion_options.timeEnd = 1.0 388 | motion_options.flags = optix.MOTION_FLAG_NONE 389 | 390 | motion_transform = optix.MatrixMotionTransform( 391 | child = state.sphere_gas_handle, 392 | motionOptions = motion_options, 393 | transform = motion_keys 394 | ) 395 | 396 | xform_bytes = optix.getDeviceRepresentation( motion_transform ) 397 | state.d_sphere_motion_transform = cp.array( np.frombuffer( xform_bytes, dtype='B' ) ) 398 | 399 | state.sphere_motion_transform_handle = optix.convertPointerToTraversableHandle( 400 | state.context, 401 | state.d_sphere_motion_transform.data.ptr, 402 | optix.TRAVERSABLE_TYPE_MATRIX_MOTION_TRANSFORM 403 | ) 404 | 405 | 406 | def build_ias( state ): 407 | 408 | instance_xform = [ 409 | 1.0, 0.0, 0.0, 0.0, 410 | 0.0, 1.0, 0.0, 0.0, 411 | 0.0, 0.0, 1.0, 0.0 412 | ] 413 | 414 | sphere_instance = optix.Instance( 415 | transform = instance_xform, 416 | flags = optix.INSTANCE_FLAG_NONE, 417 | instanceId = 0, 418 | sbtOffset = 0, 419 | visibilityMask = 1, 420 | traversableHandle = state.sphere_motion_transform_handle 421 | ) 422 | 423 | triangle_instance = optix.Instance( 424 | transform = instance_xform, 425 | flags = optix.INSTANCE_FLAG_NONE, 426 | instanceId = 1, 427 | sbtOffset = 1, 428 | visibilityMask = 1, 429 | traversableHandle = state.tri_gas_handle 430 | ) 431 | 432 | instances = [ sphere_instance, triangle_instance ] 433 | instances_bytes = optix.getDeviceRepresentation( instances ) 434 | d_instances = cp.array( np.frombuffer( instances_bytes, dtype='B' ) ) 435 | 436 | instance_input = optix.BuildInputInstanceArray( 437 | instances = d_instances.data.ptr, 438 | numInstances = len( instances ) 439 | ) 440 | 441 | motion_options = optix.MotionOptions() 442 | motion_options.numKeys = 2 443 | motion_options.timeBegin = 0.0 444 | motion_options.timeEnd = 1.0 445 | motion_options.flags = optix.MOTION_FLAG_NONE 446 | 447 | accel_options = optix.AccelBuildOptions( 448 | buildFlags = optix.BUILD_FLAG_NONE, 449 | operation = optix.BUILD_OPERATION_BUILD, 450 | motionOptions = motion_options 451 | ) 452 | 453 | ias_buffer_sizes = state.context.accelComputeMemoryUsage( 454 | [ accel_options ], 455 | [ instance_input ] 456 | ) 457 | d_temp_buffer = cp.cuda.alloc( ias_buffer_sizes.tempSizeInBytes ) 458 | state.d_ias_output_buffer = cp.cuda.alloc( ias_buffer_sizes.outputSizeInBytes ) 459 | 460 | state.ias_handle = state.context.accelBuild( 461 | 0, # CUDA stream 462 | [ accel_options ], 463 | [ instance_input ], 464 | d_temp_buffer.ptr, 465 | ias_buffer_sizes.tempSizeInBytes, 466 | state.d_ias_output_buffer.ptr, 467 | ias_buffer_sizes.outputSizeInBytes, 468 | [] # emitted properties 469 | ) 470 | 471 | 472 | def create_module( state ): 473 | 474 | module_compile_options = optix.ModuleCompileOptions() 475 | module_compile_options.maxRegisterCount = optix.COMPILE_DEFAULT_MAX_REGISTER_COUNT 476 | module_compile_options.optLevel = optix.COMPILE_OPTIMIZATION_DEFAULT 477 | module_compile_options.debugLevel = optix.COMPILE_DEBUG_LEVEL_DEFAULT 478 | 479 | state.pipeline_compile_options = optix.PipelineCompileOptions( 480 | traversableGraphFlags = optix.TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY, 481 | numPayloadValues = 3, 482 | numAttributeValues = 3, 483 | usesMotionBlur = True, 484 | exceptionFlags = optix.EXCEPTION_FLAG_NONE, 485 | pipelineLaunchParamsVariableName = "params" 486 | ) 487 | 488 | simple_motion_blur_cu = os.path.join(os.path.dirname(__file__), 'simpleMotionBlur.cu') 489 | simple_motion_blur_ptx = compile_cuda( simple_motion_blur_cu ) 490 | 491 | state.ptx_module, log = state.context.moduleCreate( 492 | module_compile_options, 493 | state.pipeline_compile_options, 494 | simple_motion_blur_ptx 495 | ) 496 | 497 | 498 | def create_program_groups( state ): 499 | 500 | 501 | raygen_program_group_desc = optix.ProgramGroupDesc( 502 | raygenModule = state.ptx_module, 503 | raygenEntryFunctionName = "__raygen__rg" 504 | ) 505 | 506 | raygen_prog_groups, log = state.context.programGroupCreate( 507 | [ raygen_program_group_desc ] 508 | ) 509 | state.raygen_prog_group = raygen_prog_groups[0] 510 | print( "\tProgramGroup raygen create log: <<<{}>>>".format( log ) ) 511 | 512 | miss_prog_group_desc = optix.ProgramGroupDesc( 513 | missModule = state.ptx_module, 514 | missEntryFunctionName = "__miss__camera" 515 | ) 516 | miss_groups, log = state.context.programGroupCreate( 517 | [ miss_prog_group_desc ] 518 | ) 519 | state.miss_group = miss_groups[0] 520 | print( "\tProgramGroup miss create log: <<<{}>>>".format( log ) ) 521 | 522 | hitgroup_prog_group_desc = optix.ProgramGroupDesc( 523 | hitgroupModuleCH = state.ptx_module, 524 | hitgroupEntryFunctionNameCH = "__closesthit__camera", 525 | ) 526 | tri_hit_groups, log = state.context.programGroupCreate( 527 | [ hitgroup_prog_group_desc ] 528 | ) 529 | state.tri_hit_group = tri_hit_groups[0] 530 | print( "\tProgramGroup triangle hit create log: <<<{}>>>".format( log ) ) 531 | 532 | hitgroup_prog_group_desc.hitgroupModuleIS = state.ptx_module 533 | hitgroup_prog_group_desc.hitgroupEntryFunctionNameIS = "__intersection__sphere" 534 | sphere_hit_groups, log = state.context.programGroupCreate( 535 | [ hitgroup_prog_group_desc ] 536 | ) 537 | state.sphere_hit_group = sphere_hit_groups[0] 538 | print( "\tProgramGroup sphere hit create log: <<<{}>>>".format( log ) ) 539 | 540 | 541 | def create_pipeline( state ): 542 | 543 | program_groups = [ 544 | state.raygen_prog_group, 545 | state.miss_group, 546 | state.sphere_hit_group, 547 | state.tri_hit_group 548 | ] 549 | 550 | pipeline_link_options = optix.PipelineLinkOptions( 551 | maxTraceDepth = 2, 552 | ) 553 | 554 | log = "" 555 | state.pipeline = state.context.pipelineCreate( 556 | state.pipeline_compile_options, 557 | pipeline_link_options, 558 | program_groups, 559 | log 560 | ) 561 | 562 | stack_sizes = optix.StackSizes() 563 | for prog_group in program_groups: 564 | if optix_version_gte( (7,7) ): 565 | optix.util.accumulateStackSizes( prog_group, stack_sizes, state.pipeline ) 566 | else: 567 | optix.util.accumulateStackSizes( prog_group, stack_sizes ) 568 | 569 | ( dc_stack_size_from_trav, dc_stack_size_from_state, cc_stack_size ) = \ 570 | optix.util.computeStackSizes( 571 | stack_sizes, 572 | 1, # maxTraceDepth 573 | 0, # maxCCDepth 574 | 0 # maxDCDepth 575 | ) 576 | 577 | state.pipeline.setStackSize( 578 | 1024, #dc_stack_size_from_trav, 579 | 1024, #dc_stack_size_from_state, 580 | 1024, #cc_stack_size, 581 | 3 # maxTraversableDepth ( 3 since largest depth is IAS->MT->GAS ) 582 | ) 583 | 584 | 585 | def create_sbt( state ): 586 | print( "Creating sbt ... " ) 587 | 588 | header_format = '{}V'.format( optix.SBT_RECORD_HEADER_SIZE ) 589 | 590 | # 591 | # raygen record 592 | # 593 | formats = [ header_format ] 594 | itemsize = get_aligned_itemsize( formats, optix.SBT_RECORD_ALIGNMENT ) 595 | dtype = np.dtype( { 596 | 'names' : ['header'], 597 | 'formats' : formats, 598 | 'itemsize' : itemsize, 599 | 'aligned' : True 600 | } ) 601 | h_raygen_record = np.array( 602 | [ optix.sbtRecordGetHeader( state.raygen_prog_group) ], 603 | dtype = dtype 604 | ) 605 | optix.sbtRecordPackHeader( state.raygen_prog_group, h_raygen_record ) 606 | state.d_raygen_record = array_to_device_memory( h_raygen_record ) 607 | 608 | # 609 | # miss records 610 | # 611 | formats = [ header_format, 'f4','f4','f4', 'u4' ] 612 | itemsize = get_aligned_itemsize( formats, optix.SBT_RECORD_ALIGNMENT ) 613 | dtype = np.dtype( { 614 | 'names' : [ 'header', 'r', 'g', 'b', 'pad' ], 615 | 'formats' : formats, 616 | 'itemsize' : itemsize, 617 | 'aligned' : True 618 | } ) 619 | h_miss_record = np.array( [ ( 620 | optix.sbtRecordGetHeader( state.miss_group ), 621 | 0.1, 0.1, 0.1, 622 | 0 623 | ) ], 624 | dtype=dtype 625 | ) 626 | optix.sbtRecordPackHeader( state.miss_group, h_miss_record ) 627 | state.d_miss_records = array_to_device_memory( h_miss_record ) 628 | 629 | # 630 | # hit group records 631 | # 632 | formats = [ 633 | header_format, 634 | 'f4','f4','f4', 635 | 'f4','f4','f4', 636 | 'f4', 637 | 'u4' 638 | ] 639 | itemsize = get_aligned_itemsize( formats, optix.SBT_RECORD_ALIGNMENT ) 640 | hit_record_dtype = np.dtype( { 641 | 'names' : [ 642 | 'header', 643 | 'r','g','b', 644 | 'x','y','z', 645 | 'rad', 646 | 'pad' 647 | ], 648 | 'formats' : formats, 649 | 'itemsize' : itemsize, 650 | 'aligned' : True 651 | } ) 652 | 653 | sphere_record_header = optix.sbtRecordGetHeader( state.sphere_hit_group ) 654 | triangle_record_header = optix.sbtRecordGetHeader( state.tri_hit_group ) 655 | 656 | h_hitgroup_records = np.array( [ 657 | ( 658 | sphere_record_header, 659 | 0.9, 0.1, 0.1, 660 | -1.0, -0.5, 0.1, 661 | 0.5, 662 | 0.0 663 | ), 664 | ( 665 | triangle_record_header, 666 | 0.1, 0.1, 0.9, 667 | 0.0, 0.0, 0.0, # unused 668 | 0.0, # unused 669 | 0.0 670 | ) ], 671 | dtype=hit_record_dtype 672 | ) 673 | 674 | state.d_hitgroup_records = array_to_device_memory( h_hitgroup_records ) 675 | 676 | state.sbt = optix.ShaderBindingTable( 677 | raygenRecord = state.d_raygen_record.ptr, 678 | missRecordBase = state.d_miss_records.ptr, 679 | missRecordStrideInBytes = h_miss_record.dtype.itemsize, 680 | missRecordCount = 1, 681 | hitgroupRecordBase = state.d_hitgroup_records.ptr, 682 | hitgroupRecordStrideInBytes = h_hitgroup_records.dtype.itemsize, 683 | hitgroupRecordCount = 2 684 | ) 685 | 686 | 687 | def launch( state ): 688 | print( "Launching ... " ) 689 | 690 | pix_bytes = pix_width * pix_height * 4 691 | 692 | h_accum = np.zeros( (pix_width, pix_height, 4 ), 'f4' ) 693 | h_accum[0:pix_width, 0:pix_height] = [255, 128, 0, 255] 694 | d_accum = cp.array( h_accum ) 695 | 696 | h_frame = np.zeros( (pix_width, pix_height, 4 ), 'B' ) 697 | h_frame[0:pix_width, 0:pix_height] = [255, 128, 0, 255] 698 | d_frame = cp.array( h_frame ) 699 | 700 | params = [ 701 | ( 'u4', 'image_width', pix_width ), 702 | ( 'u4', 'image_height', pix_height ), 703 | ( 'u8', 'accum', d_accum.data.ptr ), 704 | ( 'u8', 'frame', d_frame.data.ptr ), 705 | ( 'u4', 'subframe index', 0 ), 706 | ( 'f4', 'cam_eye_x', 0 ), 707 | ( 'f4', 'cam_eye_y', 0 ), 708 | ( 'f4', 'cam_eye_z', 5.0 ), 709 | ( 'f4', 'cam_U_x', 1.10457 ), 710 | ( 'f4', 'cam_U_y', 0 ), 711 | ( 'f4', 'cam_U_z', 0 ), 712 | ( 'f4', 'cam_V_x', 0 ), 713 | ( 'f4', 'cam_V_y', 0.828427 ), 714 | ( 'f4', 'cam_V_z', 0 ), 715 | ( 'f4', 'cam_W_x', 0 ), 716 | ( 'f4', 'cam_W_y', 0 ), 717 | ( 'f4', 'cam_W_z', -2.0 ), 718 | ( 'u8', 'trav_handle', state.ias_handle ) 719 | #( 'u8', 'trav_handle', state.tri_gas_handle) 720 | ] 721 | 722 | formats = [ x[0] for x in params ] 723 | names = [ x[1] for x in params ] 724 | values = [ x[2] for x in params ] 725 | itemsize = get_aligned_itemsize( formats, 8 ) 726 | params_dtype = np.dtype( { 727 | 'names' : names, 728 | 'formats' : formats, 729 | 'itemsize': itemsize, 730 | 'aligned' : True 731 | } ) 732 | h_params = np.array( [ tuple(values) ], dtype=params_dtype ) 733 | d_params = array_to_device_memory( h_params ) 734 | 735 | stream = cp.cuda.Stream() 736 | optix.launch( 737 | state.pipeline, 738 | stream.ptr, 739 | d_params.ptr, 740 | h_params.dtype.itemsize, 741 | state.sbt, 742 | pix_width, 743 | pix_height, 744 | 1 # depth 745 | ) 746 | 747 | stream.synchronize() 748 | 749 | h_pix = cp.asnumpy( d_frame ) 750 | return h_pix 751 | 752 | 753 | 754 | #------------------------------------------------------------------------------- 755 | # 756 | # main 757 | # 758 | #------------------------------------------------------------------------------- 759 | 760 | 761 | def main(): 762 | state = State() 763 | create_context ( state ) 764 | build_triangle_gas ( state ) 765 | build_sphere_gas ( state ) 766 | create_sphere_xform ( state ) 767 | build_ias ( state ) 768 | create_module ( state ) 769 | create_program_groups( state ) 770 | create_pipeline ( state ) 771 | create_sbt ( state ) 772 | 773 | pix = launch( state ) 774 | 775 | print( "Total number of log messages: {}".format( logger.num_mssgs ) ) 776 | 777 | pix = pix.reshape( ( pix_height, pix_width, 4 ) ) # PIL expects [ y, x ] resolution 778 | img = ImageOps.flip( Image.fromarray( pix, 'RGBA' ) ) # PIL expects y = 0 at bottom 779 | img.show() 780 | img.save( 'my.png' ) 781 | 782 | 783 | if __name__ == "__main__": 784 | main() 785 | --------------------------------------------------------------------------------