├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── external.ci.yml
    │   ├── gfx-windows-public-bmg.env
    │   ├── gfx-windows-public.env
    │   └── internal.ci.yml
├── .gitignore
├── .gitmodules
├── CHANGELOG.md
├── CMakeLists.txt
├── LICENSE.txt
├── README.md
├── SECURITY.md
├── api
    ├── CMakeLists.txt
    └── api.cpp
├── apps
    ├── CMakeLists.txt
    ├── oidnBenchmark.cpp
    ├── oidnDenoise.cpp
    ├── oidnTest.cpp
    └── utils
    │   ├── CMakeLists.txt
    │   ├── arg_parser.cpp
    │   ├── arg_parser.h
    │   ├── device_info.h
    │   ├── dummy.c
    │   ├── image_buffer.cpp
    │   ├── image_buffer.h
    │   ├── image_io.cpp
    │   ├── image_io.h
    │   └── random.h
├── cmake
    ├── Config.cmake.in
    ├── FindLevelZero.cmake
    ├── FindOpenImageIO.cmake
    ├── FindTBB.cmake
    ├── oidn_bnns.cmake
    ├── oidn_common.cmake
    ├── oidn_common_external.cmake
    ├── oidn_dnnl.cmake
    ├── oidn_ispc.cmake
    ├── oidn_macros.cmake
    ├── oidn_metal.cmake
    ├── oidn_package.cmake
    ├── oidn_platform.cmake
    └── oidn_version.cmake
├── common
    ├── CMakeLists.txt
    ├── common.cpp
    ├── common.h
    ├── export.linux.map.in
    ├── export.macos.map.in
    ├── half.cpp
    ├── half.h
    ├── oidn.rc
    ├── oidn_utils.cpp
    ├── oidn_utils.h
    ├── platform.cpp
    ├── platform.h
    └── timer.h
├── core
    ├── CMakeLists.txt
    ├── arena.cpp
    ├── arena.h
    ├── arena_planner.cpp
    ├── arena_planner.h
    ├── autoexposure.h
    ├── buffer.cpp
    ├── buffer.h
    ├── color.cpp
    ├── color.h
    ├── concat_conv.cpp
    ├── concat_conv.h
    ├── concat_conv_chw.cpp
    ├── concat_conv_chw.h
    ├── concat_conv_hwc.cpp
    ├── concat_conv_hwc.h
    ├── context.cpp
    ├── context.h
    ├── conv.cpp
    ├── conv.h
    ├── data.h
    ├── device.cpp
    ├── device.h
    ├── device_factory.h
    ├── engine.cpp
    ├── engine.h
    ├── exception.cpp
    ├── exception.h
    ├── filter.cpp
    ├── filter.h
    ├── graph.cpp
    ├── graph.h
    ├── heap.cpp
    ├── heap.h
    ├── image.cpp
    ├── image.h
    ├── image_accessor.h
    ├── image_copy.h
    ├── input_process.cpp
    ├── input_process.h
    ├── kernel.h
    ├── math.h
    ├── module.cpp
    ├── module.h
    ├── op.cpp
    ├── op.h
    ├── output_process.cpp
    ├── output_process.h
    ├── pool.cpp
    ├── pool.h
    ├── progress.cpp
    ├── progress.h
    ├── record.h
    ├── ref.h
    ├── rt_filter.cpp
    ├── rt_filter.h
    ├── rtlightmap_filter.cpp
    ├── rtlightmap_filter.h
    ├── subdevice.cpp
    ├── subdevice.h
    ├── tensor.cpp
    ├── tensor.h
    ├── tensor_accessor.h
    ├── tensor_layout.h
    ├── tensor_reorder.cpp
    ├── tensor_reorder.h
    ├── thread.cpp
    ├── thread.h
    ├── tile.h
    ├── tza.cpp
    ├── tza.h
    ├── unet_filter.cpp
    ├── unet_filter.h
    ├── upsample.cpp
    ├── upsample.h
    ├── vec.h
    └── verbose.h
├── devices
    ├── CMakeLists.txt
    ├── cpu
    │   ├── CMakeLists.txt
    │   ├── bnns
    │   │   ├── bnns_common.cpp
    │   │   ├── bnns_common.h
    │   │   ├── bnns_conv.cpp
    │   │   ├── bnns_conv.h
    │   │   ├── bnns_engine.cpp
    │   │   ├── bnns_engine.h
    │   │   ├── bnns_pool.cpp
    │   │   └── bnns_pool.h
    │   ├── color.ispc
    │   ├── color.isph
    │   ├── cpu_autoexposure.cpp
    │   ├── cpu_autoexposure.h
    │   ├── cpu_autoexposure.ispc
    │   ├── cpu_common.cpp
    │   ├── cpu_common.h
    │   ├── cpu_conv.cpp
    │   ├── cpu_conv.h
    │   ├── cpu_conv.ispc
    │   ├── cpu_conv_compute.isph
    │   ├── cpu_conv_compute_block.isph
    │   ├── cpu_device.cpp
    │   ├── cpu_device.h
    │   ├── cpu_engine.cpp
    │   ├── cpu_engine.h
    │   ├── cpu_image_copy.cpp
    │   ├── cpu_image_copy.h
    │   ├── cpu_image_copy.ispc
    │   ├── cpu_input_process.cpp
    │   ├── cpu_input_process.h
    │   ├── cpu_input_process.ispc
    │   ├── cpu_module.cpp
    │   ├── cpu_output_process.cpp
    │   ├── cpu_output_process.h
    │   ├── cpu_output_process.ispc
    │   ├── cpu_pool.cpp
    │   ├── cpu_pool.h
    │   ├── cpu_pool.ispc
    │   ├── cpu_upsample.cpp
    │   ├── cpu_upsample.h
    │   ├── cpu_upsample.ispc
    │   ├── dnnl
    │   │   ├── dnnl_common.cpp
    │   │   ├── dnnl_common.h
    │   │   ├── dnnl_conv.cpp
    │   │   ├── dnnl_conv.h
    │   │   ├── dnnl_engine.cpp
    │   │   ├── dnnl_engine.h
    │   │   ├── dnnl_tensor.cpp
    │   │   └── dnnl_tensor.h
    │   ├── image_accessor.isph
    │   ├── math.isph
    │   ├── platform.ispc
    │   ├── platform.isph
    │   ├── tasking.cpp
    │   ├── tasking.h
    │   ├── tensor_accessor.isph
    │   ├── tile.isph
    │   └── vec.isph
    ├── cuda
    │   ├── CMakeLists.txt
    │   ├── cuda_conv.cu
    │   ├── cuda_conv.h
    │   ├── cuda_device.cpp
    │   ├── cuda_device.h
    │   ├── cuda_engine.cu
    │   ├── cuda_engine.h
    │   ├── cuda_external_buffer.cpp
    │   ├── cuda_external_buffer.h
    │   ├── cuda_module.cpp
    │   ├── curtn.cpp
    │   ├── curtn.h
    │   ├── cutlass_conv.h
    │   ├── cutlass_conv_sm70.cu
    │   ├── cutlass_conv_sm75.cu
    │   └── cutlass_conv_sm80.cu
    ├── gpu
    │   ├── gpu_autoexposure.h
    │   ├── gpu_image_copy.h
    │   ├── gpu_input_process.h
    │   ├── gpu_output_process.h
    │   ├── gpu_pool.h
    │   └── gpu_upsample.h
    ├── hip
    │   ├── CMakeLists.txt
    │   ├── ck_conv.h
    │   ├── ck_conv_dl.cpp
    │   ├── ck_conv_wmma.cpp
    │   ├── hip_conv.cpp
    │   ├── hip_conv.h
    │   ├── hip_device.cpp
    │   ├── hip_device.h
    │   ├── hip_engine.cpp
    │   ├── hip_engine.h
    │   ├── hip_external_buffer.cpp
    │   ├── hip_external_buffer.h
    │   └── hip_module.cpp
    ├── metal
    │   ├── CMakeLists.txt
    │   ├── metal_buffer.h
    │   ├── metal_buffer.mm
    │   ├── metal_common.h
    │   ├── metal_common.mm
    │   ├── metal_conv.h
    │   ├── metal_conv.mm
    │   ├── metal_device.h
    │   ├── metal_device.mm
    │   ├── metal_engine.h
    │   ├── metal_engine.mm
    │   ├── metal_heap.h
    │   ├── metal_heap.mm
    │   ├── metal_kernels.metal
    │   └── metal_module.mm
    └── sycl
    │   ├── CMakeLists.txt
    │   ├── sycl_common.h
    │   ├── sycl_conv.h
    │   ├── sycl_conv_xe.h
    │   ├── sycl_conv_xe2.cpp
    │   ├── sycl_conv_xehpc.cpp
    │   ├── sycl_conv_xehpg.cpp
    │   ├── sycl_conv_xelp.cpp
    │   ├── sycl_device.cpp
    │   ├── sycl_device.h
    │   ├── sycl_device_table.h
    │   ├── sycl_engine.cpp
    │   ├── sycl_engine.h
    │   ├── sycl_external_buffer.cpp
    │   ├── sycl_external_buffer.h
    │   └── sycl_module.cpp
├── doc
    ├── .gitignore
    ├── BINARY-LICENSE.txt
    ├── Makefile
    ├── api.md
    ├── compilation.md
    ├── documentation.md
    ├── downloads.md
    ├── examples.md
    ├── filter-latex.py
    ├── filter-sectionnumbers.py
    ├── gallery.md
    ├── images.md
    ├── legal.md
    ├── links.md
    ├── overview.md
    ├── preamble.tex
    ├── readme.tex
    ├── readme_head.md
    ├── related_projects.md
    ├── stylesheet.css
    ├── teaser.html
    ├── training.md
    └── webtemplate.html
├── external
    └── catch.hpp
├── include
    └── OpenImageDenoise
    │   ├── config.h.in
    │   ├── oidn.h
    │   └── oidn.hpp
├── readme.pdf
├── requirements.txt
├── scripts
    ├── blob_to_cpp.py
    ├── build.py
    ├── build_src.py
    ├── build_weights.py
    ├── common.py
    ├── csan.supp.xml
    ├── protex_scan.sh
    ├── store-files.sh
    ├── test.py
    └── valgrind.supp
├── third-party-programs-DPCPP.txt
├── third-party-programs-oneDNN.txt
├── third-party-programs-oneTBB.txt
├── third-party-programs.txt
└── training
    ├── .gitignore
    ├── color.py
    ├── compare_image.py
    ├── config.py
    ├── convert_image.py
    ├── dataset.py
    ├── export.py
    ├── find_lr.py
    ├── image.py
    ├── infer.py
    ├── loss.py
    ├── model.py
    ├── preprocess.py
    ├── result.py
    ├── split_exr.py
    ├── ssim.py
    ├── train.py
    ├── tza.py
    ├── util.py
    └── visualize.py


/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "pip" # See documentation for possible values
 9 |     directory: "/" # Location of package manifests
10 |     schedule:
11 |       interval: "monthly"
12 | 


--------------------------------------------------------------------------------
/.github/workflows/gfx-windows-public-bmg.env:
--------------------------------------------------------------------------------
1 | GFX_DRIVER_VERSION=windows-101.6256


--------------------------------------------------------------------------------
/.github/workflows/gfx-windows-public.env:
--------------------------------------------------------------------------------
1 | GFX_DRIVER_VERSION=windows-101.6449


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .*
 2 | !.gitignore
 3 | !.github
 4 | venv/
 5 | 
 6 | *~
 7 | *.autosave
 8 | *.a
 9 | *.core
10 | *.moc
11 | *.o
12 | *.obj
13 | *.orig
14 | *.rej
15 | *.so
16 | *.so.*
17 | *_pch.h.cpp
18 | *.qm
19 | .#*
20 | *.*#
21 | core
22 | !core/
23 | tags
24 | .DS_Store
25 | .directory
26 | *.debug
27 | *.prl
28 | *.app
29 | moc_*.cpp
30 | ui_*.h
31 | qrc_*.cpp
32 | Thumbs.db
33 | *.res
34 | /.qmake.cache
35 | /.qmake.stash
36 | .bash_history
37 | .python_history
38 | 
39 | # Qt Creator generated files
40 | *.txt.user*
41 | *.pro.user*
42 | 
43 | # xemacs temporary files
44 | *.flc
45 | 
46 | # Vim temporary files
47 | .*.swp
48 | 
49 | # Visual Studio generated files
50 | *.ib_pdb_index
51 | *.idb
52 | *.ilk
53 | *.pdb
54 | *.sln
55 | *.suo
56 | *.vcproj
57 | *vcproj.*.*.user
58 | *.ncb
59 | *.sdf
60 | *.opensdf
61 | *.vcxproj
62 | *vcxproj.*
63 | *.log
64 | 
65 | # Visual Studio Code generated files
66 | .vscode
67 | 
68 | # MinGW generated files
69 | *.Debug
70 | *.Release
71 | 
72 | # Python byte code
73 | *.pyc
74 | 
75 | # Binaries
76 | *.dll
77 | *.exe
78 | 
79 | # Images
80 | /images*
81 | *.exr
82 | *.pfm
83 | *.phm
84 | *.png
85 | *.bmp
86 | *.ppm
87 | 
88 | # Build directories
89 | /build*
90 | 
91 | # Dependencies
92 | /deps
93 | 
94 | # Generated files
95 | include/OpenImageDenoise/config.h
96 | common/export.linux.map
97 | common/export.macos.map
98 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "mkl-dnn"]
 2 | 	path = external/mkl-dnn
 3 | 	url = ../mkl-dnn.git
 4 | [submodule "weights"]
 5 | 	path = weights
 6 | 	url = ../oidn-weights.git
 7 | [submodule "cutlass"]
 8 | 	path = external/cutlass
 9 | 	url = https://github.com/NVIDIA/cutlass.git
10 | [submodule "external/composable_kernel"]
11 | 	path = external/composable_kernel
12 | 	url = https://github.com/ROCmSoftwarePlatform/composable_kernel.git
13 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security Policy
 2 | Intel is committed to rapidly addressing security vulnerabilities affecting our customers and providing clear guidance on the solution, impact, severity and mitigation. 
 3 | 
 4 | ## Reporting a Vulnerability
 5 | Please report any security vulnerabilities in this project utilizing the guidelines [here](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html).
 6 | 
 7 | 
 8 | ## Security Considerations
 9 | When integrating this library into your application, you are responsible for ensuring overall application security. If you are building and using the **Open Image Denoise (OIDN) library** from source, be aware of potential security risks, including **DLL planting attacks** and other dynamic library loading vulnerabilities.
10 | 
11 | To help mitigate such risks, we provide the `OIDN_DEPENDENTLOADFLAG` CMake option. This allows you to specify the appropriate **Windows linker flag** based on your security requirements. By default, this option is not set, and you should configure it according to your deployment needs. See the official Microsoft documentation for more details: [DEPENDENTLOADFLAG Linker Option](https://learn.microsoft.com/en-us/cpp/build/reference/dependentloadflag?view=msvc-170).
12 | 
13 | To enable and set this flag, configure your build with:
14 | 
15 | cmake -DOIDN_DEPENDENTLOADFLAG=<value> ..
16 | 
17 | For more information on securing dynamic library loading, refer to Microsoft's official documentation on:  
18 | 
19 | - [Safe DLL Search Mode](https://learn.microsoft.com/en-us/windows/win32/dlls/dynamic-link-library-search-order)  
20 | - [Dynamic-Link Library Security](https://learn.microsoft.com/en-us/windows/win32/dlls/dynamic-link-library-security)  
21 | 


--------------------------------------------------------------------------------
/api/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ## Copyright 2018 Intel Corporation
 2 | ## SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | set(API_SOURCES
 5 |   api.cpp
 6 | )
 7 | 
 8 | add_library(OpenImageDenoise ${OIDN_LIB_TYPE} ${API_SOURCES} ${OIDN_RESOURCE_FILE})
 9 | 
10 | set_target_properties(OpenImageDenoise PROPERTIES
11 |   OUTPUT_NAME ${OIDN_LIBRARY_NAME}
12 | )
13 | if(OIDN_LIBRARY_VERSIONED)
14 |   set_target_properties(OpenImageDenoise PROPERTIES
15 |     VERSION ${PROJECT_VERSION}
16 |     SOVERSION ${PROJECT_VERSION_MAJOR}
17 |   )
18 | endif()
19 | 
20 | target_include_directories(OpenImageDenoise
21 |   PUBLIC
22 |     $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
23 |     $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
24 | )
25 | 
26 | target_link_libraries(OpenImageDenoise PRIVATE OpenImageDenoise_core)
27 | 
28 | if(NOT OIDN_STATIC_LIB)
29 |   oidn_strip_symbols(OpenImageDenoise)
30 | endif()
31 | 
32 | install(TARGETS OpenImageDenoise
33 |   EXPORT
34 |     OpenImageDenoise_Exports
35 |   ARCHIVE
36 |     DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT devel
37 |   LIBRARY
38 |     DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT devel
39 |   RUNTIME
40 |     DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT lib
41 | )


--------------------------------------------------------------------------------
/apps/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ## Copyright 2020 Intel Corporation
 2 | ## SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | add_subdirectory(utils)
 5 | 
 6 | macro(oidn_add_app APP_NAME)
 7 |   add_executable(${APP_NAME} ${ARGN} ${OIDN_RESOURCE_FILE})
 8 |   target_link_libraries(${APP_NAME} PRIVATE OpenImageDenoise_common OpenImageDenoise_utils OpenImageDenoise)
 9 |   install(TARGETS ${APP_NAME} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT apps)
10 | endmacro()
11 | 
12 | oidn_add_app(oidnDenoise oidnDenoise.cpp)
13 | oidn_add_app(oidnBenchmark oidnBenchmark.cpp)
14 | oidn_add_app(oidnTest oidnTest.cpp "${PROJECT_SOURCE_DIR}/external/catch.hpp")


--------------------------------------------------------------------------------
/apps/utils/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ## Copyright 2020 Intel Corporation
 2 | ## SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | include(CMakeDependentOption)
 5 | 
 6 | set(OIDN_UTILS_SOURCES
 7 |   arg_parser.h
 8 |   arg_parser.cpp
 9 |   device_info.h
10 |   image_buffer.h
11 |   image_buffer.cpp
12 |   image_io.h
13 |   image_io.cpp
14 |   random.h
15 | )
16 | 
17 | if(NOT OIDN_API_NAMESPACE)
18 |   list(APPEND OIDN_UTILS_SOURCES dummy.c)
19 | endif()
20 | 
21 | add_library(OpenImageDenoise_utils STATIC ${OIDN_UTILS_SOURCES})
22 | set_target_properties(OpenImageDenoise_utils PROPERTIES OUTPUT_NAME ${OIDN_LIBRARY_NAME}_utils)
23 | target_link_libraries(OpenImageDenoise_utils PUBLIC OpenImageDenoise_common OpenImageDenoise)
24 | 
25 | # OpenImageIO
26 | option(OIDN_APPS_OPENIMAGEIO "Enable OpenImageIO support in example and test apps." OFF)
27 | if(OIDN_APPS_OPENIMAGEIO)
28 |   find_package(OpenImageIO REQUIRED)
29 |   if(OPENIMAGEIO_VERSION VERSION_GREATER_EQUAL "3")
30 |     # OpenImageIO 3.x requires C++17
31 |     set_target_properties(OpenImageDenoise_utils PROPERTIES CXX_STANDARD 17)
32 |   elseif(OPENIMAGEIO_VERSION VERSION_GREATER_EQUAL "2.3")
33 |     # OpenImageIO 2.3+ requires C++14
34 |     set_target_properties(OpenImageDenoise_utils PROPERTIES CXX_STANDARD 14)
35 |   endif()
36 |   target_compile_definitions(OpenImageDenoise_utils PRIVATE OIDN_USE_OPENIMAGEIO)
37 |   target_link_libraries(OpenImageDenoise_utils PRIVATE OpenImageIO::OpenImageIO)
38 | endif()
39 | 


--------------------------------------------------------------------------------
/apps/utils/arg_parser.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "arg_parser.h"
 5 | 
 6 | OIDN_NAMESPACE_BEGIN
 7 | 
 8 |   ArgParser::ArgParser(int argc, char* argv[])
 9 |     : argc(argc), argv(argv),
10 |       pos(1) {}
11 | 
12 |   bool ArgParser::hasNext() const
13 |   {
14 |     return pos < argc;
15 |   }
16 | 
17 |   std::string ArgParser::getNext()
18 |   {
19 |     if (pos < argc)
20 |       return argv[pos++];
21 |     else
22 |       throw std::invalid_argument("argument expected");
23 |   }
24 | 
25 |   std::string ArgParser::getNextOpt()
26 |   {
27 |     std::string str = getNext();
28 |     size_t pos = str.find_first_not_of("-");
29 |     if (pos == 0 || pos == std::string::npos)
30 |       throw std::invalid_argument("option expected");
31 |     return str.substr(pos);
32 |   }
33 | 
34 |   template<>
35 |   std::string ArgParser::getNextValue()
36 |   {
37 |     std::string str = getNext();
38 |     if (!str.empty() && str[0] == '-')
39 |       throw std::invalid_argument("value expected");
40 |     return str;
41 |   }
42 | 
43 | OIDN_NAMESPACE_END
44 | 
45 | 


--------------------------------------------------------------------------------
/apps/utils/arg_parser.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "common/platform.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   // Command-line argument parser
11 |   class ArgParser
12 |   {
13 |   public:
14 |     ArgParser(int argc, char* argv[]);
15 | 
16 |     bool hasNext() const;
17 |     std::string getNext();
18 |     std::string getNextOpt();
19 | 
20 |     template<typename T = std::string>
21 |     T getNextValue()
22 |     {
23 |       return fromString<T>(getNextValue());
24 |     }
25 | 
26 |   private:
27 |     int argc;
28 |     char** argv;
29 |     int pos;
30 |   };
31 | 
32 |   template<>
33 |   std::string ArgParser::getNextValue();
34 | 
35 | OIDN_NAMESPACE_END
36 | 
37 | 


--------------------------------------------------------------------------------
/apps/utils/device_info.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "common/common.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   inline int printPhysicalDevices()
11 |   {
12 |     const int numDevices = getNumPhysicalDevices();
13 |     if (numDevices == 0)
14 |     {
15 |       std::cout << "No supported devices found" << std::endl;
16 |       return 1;
17 |     }
18 | 
19 |     for (int i = 0; i < numDevices; ++i)
20 |     {
21 |       PhysicalDeviceRef physicalDevice(i);
22 |       std::cout << "Device " << i << std::endl;
23 |       std::cout << "  Name: " << physicalDevice.get<std::string>("name") << std::endl;
24 |       std::cout << "  Type: " << physicalDevice.get<DeviceType>("type") << std::endl;
25 |       if (physicalDevice.get<bool>("uuidSupported"))
26 |         std::cout << "  UUID: " << physicalDevice.get<OIDN_NAMESPACE::UUID>("uuid") << std::endl;
27 |       if (physicalDevice.get<bool>("luidSupported"))
28 |       {
29 |         std::cout << "  LUID: " << physicalDevice.get<OIDN_NAMESPACE::LUID>("luid") << std::endl;
30 |         std::cout << "  Node: " << physicalDevice.get<uint32_t>("nodeMask") << std::endl;
31 |       }
32 |       if (physicalDevice.get<bool>("pciAddressSupported"))
33 |       {
34 |         auto flags = std::cout.flags();
35 |         std::cout << "  PCI : "
36 |                   << std::hex << std::setfill('0')
37 |                   << std::setw(4) << physicalDevice.get<int>("pciDomain") << ":"
38 |                   << std::setw(2) << physicalDevice.get<int>("pciBus")    << ":"
39 |                   << std::setw(2) << physicalDevice.get<int>("pciDevice") << "."
40 |                   << std::setw(1) << physicalDevice.get<int>("pciFunction")
41 |                   << std::endl;
42 |         std::cout.flags(flags);
43 |       }
44 |       if (i < numDevices-1)
45 |         std::cout << std::endl;
46 |     }
47 | 
48 |     return 0;
49 |   }
50 | 
51 | OIDN_NAMESPACE_END
52 | 


--------------------------------------------------------------------------------
/apps/utils/dummy.c:
--------------------------------------------------------------------------------
1 | // Copyright 2023 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 | 
4 | // This dummy file is used for checking whether the headers are valid C code
5 | #include "include/OpenImageDenoise/oidn.h"


--------------------------------------------------------------------------------
/apps/utils/image_io.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "image_buffer.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   // Loads an image with optionally specified number of channels and data type
11 |   std::shared_ptr<ImageBuffer> loadImage(const DeviceRef& device,
12 |                                          const std::string& filename,
13 |                                          DataType dataType = DataType::Void,
14 |                                          Storage storage = Storage::Undefined);
15 | 
16 |   // Loads an image with/without sRGB to linear conversion
17 |   std::shared_ptr<ImageBuffer> loadImage(const DeviceRef& device,
18 |                                          const std::string& filename,
19 |                                          bool srgb,
20 |                                          DataType dataType = DataType::Void,
21 |                                          Storage storage = Storage::Undefined);
22 | 
23 |   // Saves an image
24 |   void saveImage(const std::string& filename, const ImageBuffer& image);
25 | 
26 |   // Saves an image with/without linear to sRGB conversion
27 |   void saveImage(const std::string& filename, const ImageBuffer& image, bool srgb);
28 | 
29 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/apps/utils/random.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "common/platform.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   // Simple and very fast LCG random number generator
11 |   class Random
12 |   {
13 |   private:
14 |     uint32_t state;
15 | 
16 |   public:
17 |     oidn_inline Random(uint32_t seed = 1) : state(seed) {}
18 | 
19 |     oidn_inline void reset(uint32_t seed = 1)
20 |     {
21 |       state = (seed * 8191) ^ 140167;
22 |     }
23 | 
24 |     oidn_inline void next()
25 |     {
26 |       const uint32_t multiplier = 1664525;
27 |       const uint32_t increment  = 1013904223;
28 |       state = multiplier * state + increment;
29 |     }
30 | 
31 |     oidn_inline uint32_t getUInt()
32 |     {
33 |       next();
34 |       return state;
35 |     }
36 | 
37 |     oidn_inline int getInt()
38 |     {
39 |       next();
40 |       return state;
41 |     }
42 | 
43 |     oidn_inline float getFloat()
44 |     {
45 |       next();
46 |       return float(state) * 2.3283064365386962890625e-10f; // x / 2^32
47 |     }
48 |   };
49 | 
50 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/cmake/Config.cmake.in:
--------------------------------------------------------------------------------
 1 | ## Copyright 2023 Intel Corporation
 2 | ## SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | @PACKAGE_INIT@
 5 | 
 6 | set(OIDN_DEVICE_CPU  @OIDN_DEVICE_CPU@)
 7 | set(OIDN_DEVICE_SYCL @OIDN_DEVICE_SYCL@)
 8 | set(OIDN_DEVICE_CUDA @OIDN_DEVICE_CUDA@)
 9 | set(OIDN_DEVICE_HIP  @OIDN_DEVICE_HIP@)
10 | 
11 | set(OIDN_FILTER_RT @OIDN_FILTER_RT@)
12 | set(OIDN_FILTER_RTLIGHTMAP @OIDN_FILTER_RTLIGHTMAP@)
13 | 
14 | set(OIDN_STATIC_LIB @OIDN_STATIC_LIB@)
15 | 
16 | if(OIDN_STATIC_LIB AND OIDN_DEVICE_CPU)
17 |   include(CMakeFindDependencyMacro)
18 |   find_dependency(TBB)
19 | endif()
20 | 
21 | include("${CMAKE_CURRENT_LIST_DIR}/OpenImageDenoiseTargets.cmake")
22 | 
23 | check_required_components(OpenImageDenoise)


--------------------------------------------------------------------------------
/cmake/oidn_bnns.cmake:
--------------------------------------------------------------------------------
 1 | ## Copyright 2021 Intel Corporation
 2 | ## SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | if(NOT IOS)
 5 |   set(OIDN_APPLE_SDK_VERSION_MIN 11.0)
 6 |   set(OIDN_APPLE_SDK_VERSION_MAX 11.0)
 7 | else()
 8 |   set(OIDN_APPLE_SDK_VERSION_MIN 14.0)
 9 |   set(OIDN_APPLE_SDK_VERSION_MAX 14.0)
10 | endif()
11 | 
12 | if(OIDN_APPLE_SDK_VERSION VERSION_LESS OIDN_APPLE_SDK_VERSION_MAX)
13 |   message(FATAL_ERROR "Building with BNNS support requires Apple SDK version ${OIDN_APPLE_SDK_VERSION_MAX} or newer")
14 | endif()


--------------------------------------------------------------------------------
/cmake/oidn_common.cmake:
--------------------------------------------------------------------------------
 1 | ## Copyright 2018 Intel Corporation
 2 | ## SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | # Policy: find_package() uses <PackageName>_ROOT variables
 5 | if(POLICY CMP0074)
 6 |   cmake_policy(SET CMP0074 NEW)
 7 | endif()
 8 | 
 9 | # Set build output directories
10 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${OIDN_ROOT_BINARY_DIR})
11 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${OIDN_ROOT_BINARY_DIR})
12 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${OIDN_ROOT_BINARY_DIR})
13 | 
14 | # CMake macros
15 | include(oidn_macros)
16 | 
17 | # Configuration types
18 | set(CONFIGURATION_TYPES "Debug;Release;RelWithDebInfo")
19 | if(WIN32)
20 |   if(NOT OIDN_DEFAULT_CMAKE_CONFIGURATION_TYPES_SET)
21 |     set(CMAKE_CONFIGURATION_TYPES "${CONFIGURATION_TYPES}"
22 |         CACHE STRING "List of generated configurations." FORCE)
23 |     set(OIDN_DEFAULT_CMAKE_CONFIGURATION_TYPES_SET ON
24 |         CACHE INTERNAL "Default CMake configuration types set.")
25 |   endif()
26 | else()
27 |   if(NOT CMAKE_BUILD_TYPE)
28 |     set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the build type." FORCE)
29 |   endif()
30 |   set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS ${CONFIGURATION_TYPES})
31 | endif()
32 | 
33 | # Build as shared or static library
34 | option(OIDN_STATIC_LIB "Build Open Image Denoise as a static or hybrid static/shared library.")
35 | mark_as_advanced(CLEAR OIDN_STATIC_LIB)
36 | if(OIDN_STATIC_LIB)
37 |   set(OIDN_LIB_TYPE STATIC)
38 | else()
39 |   set(OIDN_LIB_TYPE SHARED)
40 | endif()
41 | 
42 | # Library name
43 | set(OIDN_LIBRARY_NAME "OpenImageDenoise" CACHE STRING "Base name of the Open Image Denoise library files.")
44 | option(OIDN_LIBRARY_VERSIONED "Build versioned Open Image Denoise library files." ON)
45 | mark_as_advanced(OIDN_LIBRARY_VERSIONED)
46 | 
47 | # API namespace
48 | set(OIDN_API_NAMESPACE "" CACHE STRING "C++ namespace to put API symbols into.")
49 | if(OIDN_API_NAMESPACE)
50 |   set(OIDN_NAMESPACE ${OIDN_API_NAMESPACE}::oidn)
51 | else()
52 |   set(OIDN_NAMESPACE oidn)
53 | endif()
54 | 
55 | # File containing targets exported from the build tree for external projects
56 | set(OIDN_BUILD_TREE_EXPORT_FILE ${OIDN_ROOT_BINARY_DIR}/cmake/oidn_targets.cmake)
57 | 
58 | # Common resource file
59 | set(OIDN_RESOURCE_FILE ${OIDN_ROOT_SOURCE_DIR}/common/oidn.rc)
60 | 
61 | # Platform-specific settings
62 | include(oidn_platform)
63 | 
64 | # Packaging
65 | include(oidn_package)


--------------------------------------------------------------------------------
/cmake/oidn_common_external.cmake:
--------------------------------------------------------------------------------
 1 | ## Copyright 2023 Intel Corporation
 2 | ## SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | # Define cache variables for common paths which will be set by the main library build
 5 | set(OIDN_ROOT_BINARY_DIR "" CACHE PATH "Location of the main library build directory.")
 6 | set(OIDN_INSTALL_RPATH_PREFIX "" CACHE PATH "Prefix for the RPATH of installed binaries.")
 7 | 
 8 | if(NOT OIDN_ROOT_BINARY_DIR)
 9 |   message(FATAL_ERROR "OIDN_ROOT_BINARY_DIR is not set. The cache may have been deleted, please try building again.")
10 | endif()
11 | 
12 | # Common
13 | include(oidn_common)
14 | 
15 | # Import targets from the main library build directory
16 | include(${OIDN_BUILD_TREE_EXPORT_FILE})


--------------------------------------------------------------------------------
/cmake/oidn_version.cmake:
--------------------------------------------------------------------------------
 1 | ## Copyright 2018 Intel Corporation
 2 | ## SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | set(OIDN_VERSION_MAJOR 2)
 5 | set(OIDN_VERSION_MINOR 3)
 6 | set(OIDN_VERSION_PATCH 3)
 7 | set(OIDN_VERSION_NOTE "")
 8 | 
 9 | set(OIDN_VERSION ${OIDN_VERSION_MAJOR}.${OIDN_VERSION_MINOR}.${OIDN_VERSION_PATCH})
10 | math(EXPR OIDN_VERSION_NUMBER "10000*${OIDN_VERSION_MAJOR} + 100*${OIDN_VERSION_MINOR} + ${OIDN_VERSION_PATCH}")


--------------------------------------------------------------------------------
/common/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ## Copyright 2018 Intel Corporation
 2 | ## SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | # Generate config.h
 5 | configure_file(
 6 |   "${PROJECT_SOURCE_DIR}/include/OpenImageDenoise/config.h.in"
 7 |   "${PROJECT_SOURCE_DIR}/include/OpenImageDenoise/config.h"
 8 | )
 9 | 
10 | # Generate list of symbols to export
11 | configure_file(
12 |   "${PROJECT_SOURCE_DIR}/common/export.linux.map.in"
13 |   "${PROJECT_SOURCE_DIR}/common/export.linux.map"
14 | )
15 | configure_file(
16 |   "${PROJECT_SOURCE_DIR}/common/export.macos.map.in"
17 |   "${PROJECT_SOURCE_DIR}/common/export.macos.map"
18 | )
19 | 
20 | add_library(OpenImageDenoise_common OBJECT
21 |   ${PROJECT_SOURCE_DIR}/include/OpenImageDenoise/config.h
22 |   ${PROJECT_SOURCE_DIR}/include/OpenImageDenoise/oidn.h
23 |   ${PROJECT_SOURCE_DIR}/include/OpenImageDenoise/oidn.hpp
24 |   common.h
25 |   common.cpp
26 |   half.h
27 |   half.cpp
28 |   oidn_utils.h
29 |   oidn_utils.cpp
30 |   platform.h
31 |   platform.cpp
32 |   timer.h
33 | )
34 | 
35 | set_target_properties(OpenImageDenoise_common PROPERTIES OUTPUT_NAME ${OIDN_LIBRARY_NAME}_common)
36 | 
37 | target_include_directories(OpenImageDenoise_common
38 |   PUBLIC
39 |     $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>
40 |     $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/external>
41 |     $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}>
42 | )
43 | 
44 | install(TARGETS OpenImageDenoise_common EXPORT OpenImageDenoise_Exports)


--------------------------------------------------------------------------------
/common/common.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "common.h"
 5 | 
 6 | OIDN_NAMESPACE_BEGIN
 7 | 
 8 |   size_t getDataTypeSize(DataType dataType)
 9 |   {
10 |     switch (dataType)
11 |     {
12 |     case DataType::UInt8:   return 1;
13 |     case DataType::Float16: return sizeof(int16_t);
14 |     case DataType::Float32: return sizeof(float);
15 |     default:
16 |       throw std::invalid_argument("invalid data type");
17 |     }
18 |   }
19 | 
20 |   DataType getFormatDataType(Format format)
21 |   {
22 |     switch (format)
23 |     {
24 |     case Format::Undefined:
25 |       return DataType::Void;
26 |     case Format::Float:
27 |     case Format::Float2:
28 |     case Format::Float3:
29 |     case Format::Float4:
30 |       return DataType::Float32;
31 |     case Format::Half:
32 |     case Format::Half2:
33 |     case Format::Half3:
34 |     case Format::Half4:
35 |       return DataType::Float16;
36 |     default:
37 |       throw std::invalid_argument("invalid format");
38 |     }
39 |   }
40 | 
41 |   Format makeFormat(DataType dataType, int numChannels)
42 |   {
43 |     if (dataType == DataType::Void)
44 |       return Format::Undefined;
45 | 
46 |     Format baseFormat;
47 |     switch (dataType)
48 |     {
49 |     case DataType::Float16:
50 |       baseFormat = Format::Half;
51 |       break;
52 |     case DataType::Float32:
53 |       baseFormat = Format::Float;
54 |       break;
55 |     default:
56 |       throw std::invalid_argument("unsupported format data type");
57 |     }
58 | 
59 |     if (numChannels < 1 || numChannels > 4)
60 |       throw std::invalid_argument("invalid number of channels");
61 | 
62 |     return Format(int(baseFormat) + numChannels - 1);
63 |   }
64 | 
65 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/common/common.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "oidn_utils.h" // must be included before platform.h
 7 | #include "platform.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   // Synchronization mode for operations
12 |   enum class SyncMode
13 |   {
14 |     Blocking,
15 |     Async
16 |   };
17 | 
18 |   template<typename T>
19 |   struct DataTypeOf;
20 | 
21 |   template<> struct DataTypeOf<void>    { static constexpr DataType value = DataType::Void;    };
22 |   template<> struct DataTypeOf<uint8_t> { static constexpr DataType value = DataType::UInt8;   };
23 |   template<> struct DataTypeOf<half>    { static constexpr DataType value = DataType::Float16; };
24 |   template<> struct DataTypeOf<float>   { static constexpr DataType value = DataType::Float32; };
25 | 
26 |   // Returns the size of a data type in bytes
27 |   size_t getDataTypeSize(DataType dataType);
28 | 
29 |   // Returns the data type of a format
30 |   DataType getFormatDataType(Format format);
31 | 
32 |   // Makes a format from a data type and number of channels
33 |   Format makeFormat(DataType dataType, int numChannels);
34 | 
35 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/common/export.linux.map.in:
--------------------------------------------------------------------------------
 1 | {
 2 | global:
 3 |   oidn[A-Z]*;
 4 |   oidn_*;
 5 |   @OIDN_API_NAMESPACE@_oidn_*;
 6 |   _ZN[0-9]@OIDN_API_NAMESPACE@[0-9]oidn[A-Z]*;
 7 |   _ZN[0-9]@OIDN_API_NAMESPACE@[0-9][0-9]oidn[A-Z]*;
 8 |   _ZN[0-9][0-9]@OIDN_API_NAMESPACE@[0-9]oidn[A-Z]*;
 9 |   _ZN[0-9][0-9]@OIDN_API_NAMESPACE@[0-9][0-9]oidn[A-Z]*;
10 | local:
11 |   *;
12 | };


--------------------------------------------------------------------------------
/common/export.macos.map.in:
--------------------------------------------------------------------------------
1 | _oidn[A-Z]*
2 | _oidn_*
3 | _@OIDN_API_NAMESPACE@_oidn_*
4 | __ZN[0-9]@OIDN_API_NAMESPACE@[0-9]oidn[A-Z]*
5 | __ZN[0-9]@OIDN_API_NAMESPACE@[0-9][0-9]oidn[A-Z]*
6 | __ZN[0-9][0-9]@OIDN_API_NAMESPACE@[0-9]oidn[A-Z]*
7 | __ZN[0-9][0-9]@OIDN_API_NAMESPACE@[0-9][0-9]oidn[A-Z]*


--------------------------------------------------------------------------------
/common/half.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "include/OpenImageDenoise/config.h"
 7 | #include <cstdint>
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   float half_to_float(int16_t x);
12 |   int16_t float_to_half(float x);
13 | 
14 |   // Minimal half data type
15 |   class half
16 |   {
17 |   public:
18 |     half() = default;
19 |     half(const half& h) : x(h.x) {}
20 |     half(float f) : x(float_to_half(f)) {}
21 | 
22 |     half& operator =(const half& h) { x = h.x; return *this; }
23 |     half& operator =(float f) { x = float_to_half(f); return *this; }
24 | 
25 |     operator float() const { return half_to_float(x); }
26 | 
27 |   private:
28 |     int16_t x;
29 |   };
30 | 
31 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/common/oidn.rc:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "include/OpenImageDenoise/config.h"
 5 | 
 6 | 1 VERSIONINFO
 7 | FILEVERSION OIDN_VERSION_MAJOR,OIDN_VERSION_MINOR,OIDN_VERSION_PATCH,0
 8 | PRODUCTVERSION OIDN_VERSION_MAJOR,OIDN_VERSION_MINOR,OIDN_VERSION_PATCH,0
 9 | FILEFLAGSMASK 0x3fL
10 | #ifdef _DEBUG
11 | FILEFLAGS 0x1L
12 | #else
13 | FILEFLAGS 0x0L
14 | #endif
15 | FILEOS 0x40004L
16 | FILETYPE 0x2L
17 | FILESUBTYPE 0x0L
18 | {
19 |   BLOCK "StringFileInfo"
20 |   {
21 |     BLOCK "040904b0"
22 |     {
23 |       VALUE "CompanyName", "Intel"
24 |       VALUE "FileDescription", "Intel Open Image Denoise Library"
25 |       VALUE "FileVersion", OIDN_VERSION_STRING
26 |       VALUE "ProductVersion", OIDN_VERSION_STRING
27 |       VALUE "LegalCopyright", "Copyright 2009-2025 Intel Corporation"
28 |       VALUE "InternalName", "Intel Open Image Denoise"
29 |       VALUE "ProductName", "Intel(R) Open Image Denoise"
30 |     }
31 |   }
32 |   BLOCK "VarFileInfo"
33 |   {
34 |     VALUE "Translation", 0x409, 1200
35 |   }
36 | }
37 | 


--------------------------------------------------------------------------------
/common/oidn_utils.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "include/OpenImageDenoise/oidn.hpp"
 7 | #include <iostream>
 8 | #include <iomanip>
 9 | 
10 | OIDN_NAMESPACE_BEGIN
11 | 
12 |   // Returns the size of a format in bytes
13 |   size_t getFormatSize(Format format);
14 | 
15 |   std::ostream& operator <<(std::ostream& sm, Format format);
16 | 
17 |   std::ostream& operator <<(std::ostream& sm, DeviceType deviceType);
18 |   std::istream& operator >>(std::istream& sm, DeviceType& deviceType);
19 | 
20 |   std::ostream& operator <<(std::ostream& sm, Quality quality);
21 | 
22 |   std::ostream& operator <<(std::ostream& sm, const UUID& uuid);
23 |   std::ostream& operator <<(std::ostream& sm, const LUID& luid);
24 | 
25 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/common/timer.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "common/platform.h"
 7 | #include <chrono>
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   class Timer
12 |   {
13 |   public:
14 |     Timer()
15 |     {
16 |       reset();
17 |     }
18 | 
19 |     void reset()
20 |     {
21 |       start = clock::now();
22 |     }
23 | 
24 |     double query() const
25 |     {
26 |       auto end = clock::now();
27 |       return std::chrono::duration_cast<std::chrono::duration<double>>(end - start).count();
28 |     }
29 | 
30 |   private:
31 |     using clock = std::chrono::steady_clock;
32 | 
33 |     std::chrono::time_point<clock> start;
34 |   };
35 | 
36 | OIDN_NAMESPACE_END
37 | 


--------------------------------------------------------------------------------
/core/autoexposure.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #if !defined(OIDN_COMPILE_METAL_DEVICE)
 7 |   #include "op.h"
 8 |   #include "image.h"
 9 |   #include "record.h"
10 | #endif
11 | 
12 | OIDN_NAMESPACE_BEGIN
13 | 
14 |   struct AutoexposureParams
15 |   {
16 |     static constexpr oidn_constant int maxBinSize = 16;
17 |     static constexpr oidn_constant float key = 0.18f;
18 |     static constexpr oidn_constant float eps = 1e-8f;
19 |   };
20 | 
21 | #if !defined(OIDN_COMPILE_METAL_DEVICE)
22 | 
23 |   class Autoexposure : public BaseOp, public AutoexposureParams
24 |   {
25 |   public:
26 |     explicit Autoexposure(const ImageDesc& srcDesc)
27 |       : srcDesc(srcDesc)
28 |     {
29 |       numBinsH = ceil_div(srcDesc.getH(), maxBinSize);
30 |       numBinsW = ceil_div(srcDesc.getW(), maxBinSize);
31 |       numBins = numBinsH * numBinsW;
32 |     }
33 | 
34 |     void setSrc(const Ref<Image>& src)
35 |     {
36 |       if (!src || src->getW() != srcDesc.getW() || src->getH() != srcDesc.getH())
37 |         throw std::invalid_argument("invalid autoexposure source");
38 |       this->src = src;
39 |     }
40 | 
41 |     void setDst(const Ref<Record<float>>& dst) { this->dst = dst; }
42 |     float* getDstPtr() const { return dst->getPtr(); }
43 | 
44 |   protected:
45 |     ImageDesc srcDesc;
46 |     Ref<Image> src;
47 |     Ref<Record<float>> dst;
48 | 
49 |     int numBinsH;
50 |     int numBinsW;
51 |     int numBins;
52 |   };
53 | 
54 | #endif // !defined(OIDN_COMPILE_METAL_DEVICE)
55 | 
56 | OIDN_NAMESPACE_END
57 | 


--------------------------------------------------------------------------------
/core/color.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "color.h"
 5 | 
 6 | OIDN_NAMESPACE_BEGIN
 7 | 
 8 |   constexpr float TransferFunction::yMax;
 9 | 
10 |   TransferFunction::TransferFunction(Type type)
11 |     : type(type)
12 |   {
13 |     const float xMax = math::reduce_max(forward(yMax));
14 |     normScale    = 1./xMax;
15 |     rcpNormScale = xMax;
16 |   }
17 | 
18 | OIDN_NAMESPACE_END
19 | 


--------------------------------------------------------------------------------
/core/concat_conv.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "concat_conv.h"
 5 | 
 6 | OIDN_NAMESPACE_BEGIN
 7 | 
 8 |   ConcatConv::ConcatConv(const ConcatConvDesc& desc)
 9 |     : ConcatConvDesc(desc)
10 |   {
11 |     if (src1Desc.getRank() != 3 ||
12 |         src2Desc.getRank() != 3 ||
13 |         src1Desc.getH() != src2Desc.getH() ||
14 |         src1Desc.getW() != src2Desc.getW() ||
15 |         src1Desc.layout != src2Desc.layout ||
16 |         src1Desc.dataType != src2Desc.dataType)
17 |       throw std::invalid_argument("invalid concat+conv source descriptor");
18 |     if (weightDesc.getRank() != 4 || weightDesc.getI() != (src1Desc.getC() + src2Desc.getC()) ||
19 |         weightDesc.getPaddedI() != (src1Desc.getPaddedC() + src2Desc.getPaddedC()))
20 |       throw std::invalid_argument("invalid concat+conv weight shape");
21 | 
22 |     TensorDims dstDims{weightDesc.getO(), src1Desc.getH(), src1Desc.getW()};
23 |     TensorDims dstPaddedDims{weightDesc.getPaddedO(), src1Desc.getH(), src1Desc.getW()};
24 |     dstDesc = {dstDims, dstPaddedDims, src1Desc.layout, src1Desc.dataType};
25 |   }
26 | 
27 |   void ConcatConv::setSrc(const Ref<Tensor>& src1, const Ref<Tensor>& src2)
28 |   {
29 |     if (!src1 || src1->getDesc() != src1Desc || !src2 || src2->getDesc() != src2Desc)
30 |       throw std::invalid_argument("invalid concat+conv source");
31 | 
32 |     this->src1 = src1;
33 |     this->src2 = src2;
34 |     updateSrc();
35 |   }
36 | 
37 |   void ConcatConv::setBias(const Ref<Tensor>& bias)
38 |   {
39 |     if (!bias || bias->getDesc() != biasDesc)
40 |       throw std::invalid_argument("invalid concat+conv bias");
41 | 
42 |     this->bias = bias;
43 |     updateBias();
44 |   }
45 | 
46 |   void ConcatConv::setDst(const Ref<Tensor>& dst)
47 |   {
48 |     if (!dst || dst->getDesc() != dstDesc)
49 |       throw std::invalid_argument("invalid concat+conv destination");
50 | 
51 |     this->dst = dst;
52 |     updateDst();
53 |   }
54 | 
55 | OIDN_NAMESPACE_END
56 | 


--------------------------------------------------------------------------------
/core/concat_conv.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "conv.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   // Concatenation + convolution descriptor
11 |   struct ConcatConvDesc
12 |   {
13 |     TensorDesc src1Desc;
14 |     TensorDesc src2Desc;
15 |     TensorDesc weightDesc;
16 |     TensorDesc biasDesc;
17 |     Activation activation;
18 |     bool fastMath; // prefer performance over accuracy
19 |   };
20 | 
21 |   class ConcatConv : public BaseOp, protected ConcatConvDesc
22 |   {
23 |   public:
24 |     ConcatConv(const ConcatConvDesc& desc);
25 | 
26 |     TensorDesc getDstDesc() const { return dstDesc; }
27 |     Ref<Tensor> getDst() const { return dst; }
28 | 
29 |     void setSrc(const Ref<Tensor>& src1, const Ref<Tensor>& src2);
30 |     void setBias(const Ref<Tensor>& bias);
31 |     void setDst(const Ref<Tensor>& dst);
32 | 
33 |   protected:
34 |     virtual void updateSrc() {}
35 |     virtual void updateBias() {}
36 |     virtual void updateDst() {}
37 | 
38 |     TensorDesc dstDesc;
39 | 
40 |     Ref<Tensor> src1;
41 |     Ref<Tensor> src2;
42 |     Ref<Tensor> bias;
43 |     Ref<Tensor> dst;
44 |   };
45 | 
46 | OIDN_NAMESPACE_END
47 | 


--------------------------------------------------------------------------------
/core/concat_conv_chw.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "concat_conv_chw.h"
 5 | #include "engine.h"
 6 | 
 7 | OIDN_NAMESPACE_BEGIN
 8 | 
 9 |   ConcatConvCHW::ConcatConvCHW(Engine* engine, const ConcatConvDesc& desc)
10 |     : ConcatConv(desc)
11 |   {
12 |     if (src1Desc.layout == TensorLayout::hwc)
13 |       throw std::invalid_argument("unsupported concat+conv source layout");
14 | 
15 |     TensorDims srcDims{src1Desc.getC() + src2Desc.getC(), src1Desc.getH(), src1Desc.getW()};
16 |     TensorDims srcPaddedDims{src1Desc.getPaddedC() + src2Desc.getPaddedC(), src1Desc.getH(), src1Desc.getW()};
17 |     srcDesc = {srcDims, srcPaddedDims, src1Desc.layout, src1Desc.dataType};
18 | 
19 |     conv = engine->newConv({srcDesc, weightDesc, biasDesc, activation, PostOp::None, fastMath});
20 |   }
21 | 
22 |   void ConcatConvCHW::updateSrc()
23 |   {
24 |     if (!src1->getBuffer() || !src2->getBuffer())
25 |       throw std::invalid_argument("concat+conv sources must be backed by buffers");
26 |     if (src1->getBuffer() != src2->getBuffer() ||
27 |         (static_cast<char*>(src1->getPtr()) + src1->getByteSize()) != static_cast<char*>(src2->getPtr()))
28 |       throw std::invalid_argument("concat+conv sources are not pre-concatenated in memory");
29 | 
30 |     auto src = src1->getBuffer()->newTensor(srcDesc, src1->getByteOffset());
31 |     conv->setSrc(src);
32 |   }
33 | 
34 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/core/concat_conv_chw.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "concat_conv.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   // Concatenation + convolution for CHW tensors (including blocked) stored consecutively in memory
11 |   // Since the tensors are pre-concatenated in memory, only the convolution needs to be executed
12 |   class ConcatConvCHW final : public ConcatConv
13 |   {
14 |   public:
15 |     ConcatConvCHW(Engine* engine, const ConcatConvDesc& desc);
16 | 
17 |     Engine* getEngine() const override { return conv->getEngine(); }
18 | 
19 |     size_t getScratchByteSize() override { return conv->getScratchByteSize(); }
20 |     void setScratch(const Ref<Buffer>& scratch) override { conv->setScratch(scratch); }
21 | 
22 |     void setWeight(const Ref<Tensor>& weight) { conv->setWeight(weight); }
23 | 
24 |     void finalize() override { conv->finalize(); }
25 |     void submitKernels(const Ref<CancellationToken>& ct) override { conv->submitKernels(ct); }
26 | 
27 |   private:
28 |     void updateSrc() override;
29 |     void updateBias() override { conv->setBias(bias); }
30 |     void updateDst() override { conv->setDst(dst); }
31 | 
32 |     TensorDesc srcDesc;         // pre-concatenated source
33 |     Ref<Conv> conv;
34 |   };
35 | 
36 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/core/concat_conv_hwc.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "concat_conv_hwc.h"
 5 | #include "engine.h"
 6 | 
 7 | OIDN_NAMESPACE_BEGIN
 8 | 
 9 |   ConcatConvHWC::ConcatConvHWC(Engine* engine, const ConcatConvDesc& desc)
10 |     : ConcatConv(desc)
11 |   {
12 |     if (src1Desc.layout != TensorLayout::hwc)
13 |       throw std::logic_error("unsupported concat+conv source layout");
14 | 
15 |     // Split the convolution into two smaller convolutions
16 |     weight1Desc = {{dstDesc.getC(),       src1Desc.getC(),       weightDesc.getH(), weightDesc.getW()},
17 |                    {dstDesc.getPaddedC(), src1Desc.getPaddedC(), weightDesc.getH(), weightDesc.getW()},
18 |                    weightDesc.layout,
19 |                    weightDesc.dataType};
20 | 
21 |     weight2Desc = {{dstDesc.getC(),       src2Desc.getC(),       weightDesc.getH(), weightDesc.getW()},
22 |                    {dstDesc.getPaddedC(), src2Desc.getPaddedC(), weightDesc.getH(), weightDesc.getW()},
23 |                    weightDesc.layout,
24 |                    weightDesc.dataType};
25 | 
26 |     // Convolution 1: dst = conv(src1, weight1) + bias
27 |     conv1 = engine->newConv({src1Desc, weight1Desc, biasDesc, Activation::None, PostOp::None, fastMath});
28 | 
29 |     // Convolution 2: dst = activation(conv(src2, weight2) + dst)
30 |     // We use dst as bias
31 |     conv2 = engine->newConv({src2Desc, weight2Desc, dstDesc, activation, PostOp::None, fastMath});
32 |   }
33 | 
34 |   bool ConcatConvHWC::isSupported() const
35 |   {
36 |     return conv1->isSupported() && conv2->isSupported();
37 |   }
38 | 
39 |   size_t ConcatConvHWC::getScratchByteSize()
40 |   {
41 |     return max(conv1->getScratchByteSize(), conv2->getScratchByteSize());
42 |   }
43 | 
44 |   void ConcatConvHWC::setScratch(const Ref<Buffer>& scratch)
45 |   {
46 |     conv1->setScratch(scratch);
47 |     conv2->setScratch(scratch);
48 |   }
49 | 
50 |   void ConcatConvHWC::setWeight(const Ref<Tensor>& weight1, const Ref<Tensor>& weight2)
51 |   {
52 |     conv1->setWeight(weight1);
53 |     conv2->setWeight(weight2);
54 |   }
55 | 
56 |   void ConcatConvHWC::updateSrc()
57 |   {
58 |     conv1->setSrc(src1);
59 |     conv2->setSrc(src2);
60 |   }
61 | 
62 |   void ConcatConvHWC::updateDst()
63 |   {
64 |     conv1->setDst(dst);
65 | 
66 |     conv2->setBias(dst);
67 |     conv2->setDst(dst);
68 |   }
69 | 
70 |   void ConcatConvHWC::finalize()
71 |   {
72 |     conv1->finalize();
73 |     conv2->finalize();
74 |   }
75 | 
76 |   void ConcatConvHWC::submitKernels(const Ref<CancellationToken>& ct)
77 |   {
78 |     conv1->submitKernels(ct);
79 |     conv2->submitKernels(ct);
80 |   }
81 | 
82 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/core/concat_conv_hwc.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "concat_conv.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   // Concatenation + convolution for HWC tensors
11 |   // The convolution is split into two smaller convolutions, one for each input tensor
12 |   // The weights for each convolution must be set separately
13 |   class ConcatConvHWC final : public ConcatConv
14 |   {
15 |   public:
16 |     ConcatConvHWC(Engine* engine, const ConcatConvDesc& desc);
17 | 
18 |     Engine* getEngine() const override { return conv1->getEngine(); }
19 |     bool isSupported() const override;
20 | 
21 |     size_t getScratchByteSize() override;
22 |     void setScratch(const Ref<Buffer>& scratch) override;
23 | 
24 |     TensorDesc getWeight1Desc() const { return weight1Desc; }
25 |     TensorDesc getWeight2Desc() const { return weight2Desc; }
26 |     void setWeight(const Ref<Tensor>& weight1, const Ref<Tensor>& weight2);
27 | 
28 |     void finalize() override;
29 |     void submitKernels(const Ref<CancellationToken>& ct) override;
30 | 
31 |   private:
32 |     void updateSrc() override;
33 |     void updateBias() override { conv1->setBias(bias); }
34 |     void updateDst() override;
35 | 
36 |     TensorDesc weight1Desc;
37 |     TensorDesc weight2Desc;
38 | 
39 |     Ref<Conv> conv1;
40 |     Ref<Conv> conv2;
41 |   };
42 | 
43 | OIDN_NAMESPACE_END
44 | 


--------------------------------------------------------------------------------
/core/context.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "context.h"
 5 | 
 6 | OIDN_NAMESPACE_BEGIN
 7 | 
 8 |   Context& Context::get()
 9 |   {
10 |     static Context instance;
11 |     return instance;
12 |   }
13 | 
14 |   bool Context::isDeviceSupported(DeviceType type) const
15 |   {
16 |     return deviceFactories.find(type) != deviceFactories.end();
17 |   }
18 | 
19 |   DeviceFactory* Context::getDeviceFactory(DeviceType type) const
20 |   {
21 |     auto it = deviceFactories.find(type);
22 |     if (it == deviceFactories.end())
23 |       throw Exception(Error::UnsupportedHardware, "unsupported device type: " + toString(type));
24 |     return it->second.get();
25 |   }
26 | 
27 |   const Ref<PhysicalDevice>& Context::getPhysicalDevice(int id) const
28 |   {
29 |     if (id < 0 || static_cast<size_t>(id) >= physicalDevices.size())
30 |       throw Exception(Error::InvalidArgument, "invalid physical device ID: " + toString(id));
31 |     return physicalDevices[id];
32 |   }
33 | 
34 |   Ref<Device> Context::newDevice(int physicalDeviceID)
35 |   {
36 |     const auto& physicalDevice = getPhysicalDevice(physicalDeviceID);
37 |     const DeviceType type = physicalDevice->type;
38 |     return getDeviceFactory(type)->newDevice(physicalDevice);
39 |   }
40 | 
41 |   Ref<Device> Context::newDevice(DeviceType type)
42 |   {
43 |     if (type == DeviceType::Default)
44 |       return newDevice(0);
45 | 
46 |     // Find the first physical device of the specified type
47 |     for (const auto& physicalDevice : physicalDevices)
48 |     {
49 |       if (physicalDevice->type == type)
50 |         return getDeviceFactory(type)->newDevice(physicalDevice);
51 |     }
52 | 
53 |     throw Exception(Error::UnsupportedHardware, "unsupported device type: " + toString(type));
54 |   }
55 | 
56 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/core/conv.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "op.h"
 7 | #include "tensor.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   // Activation function
12 |   enum class Activation
13 |   {
14 |     None, // identity
15 |     ReLU
16 |   };
17 | 
18 |   enum class PostOp
19 |   {
20 |     None,
21 |     Pool,
22 |     Upsample
23 |   };
24 | 
25 |   // Convolution descriptor
26 |   struct ConvDesc
27 |   {
28 |     TensorDesc srcDesc;
29 |     TensorDesc weightDesc;
30 |     TensorDesc biasDesc;
31 |     Activation activation;
32 |     PostOp postOp;
33 |     bool fastMath; // prefer performance over accuracy
34 |   };
35 | 
36 |   // Convolution
37 |   class Conv : public BaseOp, protected ConvDesc
38 |   {
39 |   public:
40 |     Conv(const ConvDesc& desc);
41 | 
42 |     TensorDesc getDstDesc() const { return dstDesc; }
43 |     Ref<Tensor> getDst() const { return dst; }
44 | 
45 |     void setSrc(const Ref<Tensor>& src);
46 |     void setWeight(const Ref<Tensor>& weight);
47 |     void setBias(const Ref<Tensor>& bias);
48 |     void setDst(const Ref<Tensor>& dst);
49 | 
50 |   protected:
51 |     virtual void updateSrc() {}
52 |     virtual void updateWeight() {}
53 |     virtual void updateBias() {}
54 |     virtual void updateDst() {}
55 | 
56 |     TensorDesc dstDesc;
57 |     Ref<Tensor> src;
58 |     Ref<Tensor> weight;
59 |     Ref<Tensor> bias;
60 |     Ref<Tensor> dst;
61 |   };
62 | 
63 | OIDN_NAMESPACE_END
64 | 


--------------------------------------------------------------------------------
/core/data.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "common/platform.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   // Opaque read-only data
11 |   struct Data
12 |   {
13 |     const void* ptr;
14 |     size_t size;
15 | 
16 |     Data() : ptr(nullptr), size(0) {}
17 |     Data(std::nullptr_t) : ptr(nullptr), size(0) {}
18 | 
19 |     template<typename T>
20 |     Data(T* ptr, size_t size)
21 |       : ptr(ptr),
22 |         size(size)
23 |     {
24 |       if (ptr == nullptr && size > 0)
25 |         throw Exception(Error::InvalidArgument, "data pointer is null");
26 |     }
27 | 
28 |     template<typename T, size_t N>
29 |     Data(T (&array)[N]) : ptr(array), size(sizeof(array)) {}
30 | 
31 |     template<typename T, size_t N>
32 |     Data& operator =(T (&array)[N])
33 |     {
34 |       ptr = array;
35 |       size = sizeof(array);
36 |       return *this;
37 |     }
38 | 
39 |     oidn_inline operator bool() const
40 |     {
41 |       return ptr != nullptr;
42 |     }
43 |   };
44 | 
45 | OIDN_NAMESPACE_END
46 | 


--------------------------------------------------------------------------------
/core/device_factory.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "device.h"
 7 | #include <map>
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   class DeviceFactory : public RefCount
12 |   {
13 |   public:
14 |     virtual Ref<Device> newDevice(const Ref<PhysicalDevice>& physicalDevice) = 0;
15 |   };
16 | 
17 |   class SYCLDeviceFactoryBase : public DeviceFactory
18 |   {
19 |   public:
20 |     using DeviceFactory::newDevice;
21 | 
22 |     virtual bool isDeviceSupported(const sycl::device* device) = 0;
23 |     virtual Ref<Device> newDevice(const sycl::queue* queues, int numQueues) = 0;
24 |   };
25 | 
26 |   class CUDADeviceFactoryBase : public DeviceFactory
27 |   {
28 |   public:
29 |     using DeviceFactory::newDevice;
30 | 
31 |     virtual bool isDeviceSupported(int deviceID) = 0;
32 |     virtual Ref<Device> newDevice(const int* deviceIDs, const cudaStream_t* streams, int numPairs) = 0;
33 |   };
34 | 
35 |   class HIPDeviceFactoryBase : public DeviceFactory
36 |   {
37 |   public:
38 |     using DeviceFactory::newDevice;
39 | 
40 |     virtual bool isDeviceSupported(int deviceID) = 0;
41 |     virtual Ref<Device> newDevice(const int* deviceIDs, const hipStream_t* streams, int numPairs) = 0;
42 |   };
43 | 
44 |   class MetalDeviceFactoryBase : public DeviceFactory
45 |   {
46 |   public:
47 |     using DeviceFactory::newDevice;
48 | 
49 |     virtual bool isDeviceSupported(MTLDevice_id device) = 0;
50 |     virtual Ref<Device> newDevice(const MTLCommandQueue_id* commandQueues, int numQueues) = 0;
51 |   };
52 | 
53 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/core/exception.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "exception.h"
 5 | 
 6 | OIDN_NAMESPACE_BEGIN
 7 | 
 8 |   // We *must* define this function here because Exception must have a key function, which is the
 9 |   // first non-pure out-of-line virtual function of a type. Otherwise, the type_info would be
10 |   // emitted as a weak symbol and its address may be different in dynamically loaded modules,
11 |   // which would cause exception handling and dynamic_cast to fail.
12 |   const char* Exception::what() const noexcept
13 |   {
14 |     return message->c_str();
15 |   }
16 | 
17 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/core/exception.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "common/common.h"
 7 | #include <exception>
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   class Exception : public std::exception
12 |   {
13 |   public:
14 |     Exception(Error error, const char* message)
15 |       : error(error),
16 |         message(std::make_shared<std::string>(message)) {}
17 | 
18 |     Exception(Error error, const std::string& message)
19 |       : error(error),
20 |         message(std::make_shared<std::string>(message)) {}
21 | 
22 |     Error code() const noexcept
23 |     {
24 |       return error;
25 |     }
26 | 
27 |     const char* what() const noexcept override;
28 | 
29 |   private:
30 |     Error error;
31 | 
32 |     // Exceptions must have noexcept copy constructors, so we cannot use std::string directly
33 |     std::shared_ptr<std::string> message;
34 |   };
35 | 
36 | OIDN_NAMESPACE_END
37 | 


--------------------------------------------------------------------------------
/core/filter.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "filter.h"
 5 | 
 6 | OIDN_NAMESPACE_BEGIN
 7 | 
 8 |   Filter::Filter(const Ref<Device>& device)
 9 |     : device(device) {}
10 | 
11 |   Filter::~Filter()
12 |   {
13 |     // We trim the scratch heaps only here to make filter resolution changes more efficient
14 |     device->trimScratch();
15 |   }
16 | 
17 |   void Filter::setProgressMonitorFunction(ProgressMonitorFunction func, void* userPtr)
18 |   {
19 |     progressFunc = func;
20 |     progressUserPtr = userPtr;
21 |   }
22 | 
23 |   void Filter::setParam(int& dst, int src)
24 |   {
25 |     dirtyParam |= dst != src;
26 |     dst = src;
27 |   }
28 | 
29 |   void Filter::setParam(bool& dst, int src)
30 |   {
31 |     dirtyParam |= dst != bool(src);
32 |     dst = src;
33 |   }
34 | 
35 |   void Filter::setParam(Quality& dst, Quality src)
36 |   {
37 |     dirtyParam |= dst != src;
38 |     dst = src;
39 |   }
40 | 
41 |   void Filter::setParam(Ref<Image>& dst, const Ref<Image>& src)
42 |   {
43 |     // Check whether the image is accessible by the device
44 |     if (src && *src && !device->isSystemMemorySupported())
45 |     {
46 |       const Storage storage = src->getBuffer() ? src->getBuffer()->getStorage()
47 |                                                : device->getPtrStorage(src->getPtr());
48 |       if (storage == Storage::Undefined)
49 |         throw Exception(Error::InvalidArgument, "image data not accessible by the device, please use OIDNBuffer or device allocator for storage");
50 |     }
51 | 
52 |     // The image parameter is *not* dirty if only the pointer and/or strides change (except to/from nullptr)
53 |     dirtyParam |= (!dst && src && *src) || (dst && (!src || !(*src))) ||
54 |                   (dst && src && *src &&
55 |                    ((dst->getW() != src->getW()) || (dst->getH() != src->getH()) ||
56 |                     (dst->getFormat() != src->getFormat())));
57 | 
58 |     if (src && *src)
59 |       dst = src;
60 |     else
61 |       dst = nullptr;
62 |   }
63 | 
64 |   void Filter::removeParam(Ref<Image>& dst)
65 |   {
66 |     dirtyParam |= bool(dst);
67 |     dst = nullptr;
68 |   }
69 | 
70 |   void Filter::setParam(Data& dst, const Data& src)
71 |   {
72 |     // Check whether the data is accessible to the host
73 |     if (src && device->getPtrStorage(src.ptr) == Storage::Device)
74 |       throw Exception(Error::InvalidArgument, "the specified data is not accessible to the host, please use host malloc");
75 | 
76 |     dirtyParam = dst || src;
77 |     dst = src;
78 |   }
79 | 
80 |   void Filter::removeParam(Data& dst)
81 |   {
82 |     dirtyParam |= dst;
83 |     dst = Data();
84 |   }
85 | 
86 | OIDN_NAMESPACE_END
87 | 


--------------------------------------------------------------------------------
/core/filter.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "device.h"
 7 | #include "image.h"
 8 | #include "data.h"
 9 | 
10 | OIDN_NAMESPACE_BEGIN
11 | 
12 |   class Filter : public RefCount
13 |   {
14 |   public:
15 |     explicit Filter(const Ref<Device>& device);
16 |     ~Filter();
17 | 
18 |     Device* getDevice() const { return device.get(); }
19 | 
20 |     virtual void setImage(const std::string& name, const Ref<Image>& image) = 0;
21 |     virtual void unsetImage(const std::string& name) = 0;
22 |     virtual void setData(const std::string& name, const Data& data) = 0;
23 |     virtual void updateData(const std::string& name) = 0;
24 |     virtual void unsetData(const std::string& name) = 0;
25 |     virtual void setInt(const std::string& name, int value) = 0;
26 |     virtual int getInt(const std::string& name) = 0;
27 |     virtual void setFloat(const std::string& name, float value) = 0;
28 |     virtual float getFloat(const std::string& name) = 0;
29 | 
30 |     void setProgressMonitorFunction(ProgressMonitorFunction func, void* userPtr);
31 | 
32 |     virtual void commit() = 0;
33 |     virtual void execute(SyncMode sync = SyncMode::Blocking) = 0;
34 | 
35 |   protected:
36 |     void setParam(int& dst, int src);
37 |     void setParam(bool& dst, int src);
38 |     void setParam(Quality& dst, Quality src);
39 |     void setParam(Ref<Image>& dst, const Ref<Image>& src);
40 |     void removeParam(Ref<Image>& dst);
41 |     void setParam(Data& dst, const Data& src);
42 |     void removeParam(Data& dst);
43 | 
44 |     Ref<Device> device;
45 | 
46 |     ProgressMonitorFunction progressFunc = nullptr;
47 |     void* progressUserPtr = nullptr;
48 | 
49 |     bool dirty = true;
50 |     bool dirtyParam = true;
51 |   };
52 | 
53 | OIDN_NAMESPACE_END
54 | 


--------------------------------------------------------------------------------
/core/heap.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "heap.h"
 5 | #include "buffer.h"
 6 | #include "engine.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   // -----------------------------------------------------------------------------------------------
11 |   // Heap
12 |   // -----------------------------------------------------------------------------------------------
13 | 
14 |   void Heap::attach(Buffer* buffer)
15 |   {
16 |     buffers.insert(buffer);
17 |   }
18 | 
19 |   void Heap::detach(Buffer* buffer)
20 |   {
21 |     buffers.erase(buffer);
22 |   }
23 | 
24 |   void Heap::preRealloc()
25 |   {
26 |     for (auto buffer : buffers)
27 |       buffer->preRealloc();
28 |   }
29 | 
30 |   void Heap::postRealloc()
31 |   {
32 |     for (auto buffer : buffers)
33 |       buffer->postRealloc();
34 |   }
35 | 
36 |   // -----------------------------------------------------------------------------------------------
37 |   // USMHeap
38 |   // -----------------------------------------------------------------------------------------------
39 | 
40 |   USMHeap::USMHeap(Engine* engine, size_t byteSize, Storage storage)
41 |     : engine(engine),
42 |       ptr(nullptr),
43 |       byteSize(byteSize),
44 |       storage(storage)
45 |   {
46 |     if (storage == Storage::Undefined)
47 |       this->storage = Storage::Device;
48 | 
49 |     ptr = static_cast<char*>(engine->usmAlloc(byteSize, this->storage));
50 |   }
51 | 
52 |   USMHeap::~USMHeap()
53 |   {
54 |     try
55 |     {
56 |       engine->usmFree(ptr, storage);
57 |     }
58 |     catch (...) {}
59 |   }
60 | 
61 |   void USMHeap::realloc(size_t newByteSize)
62 |   {
63 |     if (newByteSize == byteSize)
64 |       return;
65 | 
66 |     preRealloc();
67 | 
68 |     engine->usmFree(ptr, storage);
69 |     ptr = static_cast<char*>(engine->usmAlloc(newByteSize, storage));
70 |     byteSize = newByteSize;
71 | 
72 |     postRealloc();
73 |   }
74 | 
75 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/core/heap.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "common/common.h"
 7 | #include "ref.h"
 8 | #include <unordered_set>
 9 | 
10 | OIDN_NAMESPACE_BEGIN
11 | 
12 |   class Engine;
13 |   class Buffer;
14 | 
15 |   // -----------------------------------------------------------------------------------------------
16 |   // Heap
17 |   // -----------------------------------------------------------------------------------------------
18 | 
19 |   class Heap : public RefCount
20 |   {
21 |     friend class Buffer;
22 | 
23 |   public:
24 |     virtual Engine* getEngine() const = 0;
25 |     virtual size_t getByteSize() const = 0;
26 |     virtual Storage getStorage() const = 0;
27 | 
28 |     virtual void realloc(size_t newByteSize) = 0;
29 | 
30 |   protected:
31 |     void preRealloc();
32 |     void postRealloc();
33 | 
34 |   private:
35 |     void attach(Buffer* buffer);
36 |     void detach(Buffer* buffer);
37 | 
38 |     std::unordered_set<Buffer*> buffers;
39 |   };
40 | 
41 |   // -----------------------------------------------------------------------------------------------
42 |   // USMHeap
43 |   // -----------------------------------------------------------------------------------------------
44 | 
45 |   class USMBuffer;
46 | 
47 |   // Unified shared memory (USM) based heap
48 |   class USMHeap : public Heap
49 |   {
50 |     friend class USMBuffer;
51 | 
52 |   public:
53 |     USMHeap(Engine* engine, size_t byteSize, Storage storage);
54 |     ~USMHeap();
55 | 
56 |     Engine* getEngine() const override { return engine; }
57 |     size_t getByteSize() const override { return byteSize; }
58 |     Storage getStorage() const override { return storage; }
59 | 
60 |     void realloc(size_t newByteSize) override;
61 | 
62 |   private:
63 |     Engine* engine;
64 |     char* ptr;
65 |     size_t byteSize;
66 |     Storage storage;
67 |   };
68 | 
69 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/core/image_copy.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "op.h"
 7 | #include "image.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   class ImageCopy : public BaseOp
12 |   {
13 |   public:
14 |     void setSrc(const Ref<Image>& src) { this->src = src; }
15 |     void setDst(const Ref<Image>& dst) { this->dst = dst; }
16 | 
17 |   protected:
18 |     void check()
19 |     {
20 |       if (!src || !dst)
21 |         throw std::logic_error("image copy source/destination not set");
22 |       if (dst->getH() < src->getH() || dst->getW() < src->getW())
23 |         throw std::out_of_range("image copy destination smaller than the source");
24 |     }
25 | 
26 |     Ref<Image> src;
27 |     Ref<Image> dst;
28 |   };
29 | 
30 | OIDN_NAMESPACE_END
31 | 


--------------------------------------------------------------------------------
/core/input_process.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "input_process.h"
 5 | #include "engine.h"
 6 | 
 7 | OIDN_NAMESPACE_BEGIN
 8 | 
 9 |   InputProcess::InputProcess(Engine* engine, const InputProcessDesc& desc)
10 |     : InputProcessDesc(desc)
11 |   {
12 |     if (srcDims.size() != 3)
13 |       throw std::invalid_argument("invalid input processing source shape");
14 | 
15 |     TensorDims dstDims = srcDims;
16 | 
17 |     TensorDims dstPaddedDims {
18 |       round_up(srcDims[0], engine->getDevice()->getTensorBlockC()), // round up C
19 |       dstDims[1],
20 |       dstDims[2]
21 |     };
22 | 
23 |     dstDesc = {dstDims, dstPaddedDims, engine->getDevice()->getTensorLayout(), engine->getDevice()->getTensorDataType()};
24 | 
25 |     setTile(0, 0, 0, 0, 0, 0);
26 |   }
27 | 
28 |   void InputProcess::setSrc(const Ref<Image>& color,
29 |                             const Ref<Image>& albedo,
30 |                             const Ref<Image>& normal)
31 |   {
32 |     int C = 0;
33 |     if (color)  C += 3; // always broadcast to 3 channels
34 |     if (albedo) C += 3;
35 |     if (normal) C += 3;
36 |     if (C != srcDims[0])
37 |       throw std::invalid_argument("invalid input processing source");
38 | 
39 |     this->color  = color;
40 |     this->albedo = albedo;
41 |     this->normal = normal;
42 |     updateSrc();
43 |   }
44 | 
45 |   void InputProcess::setDst(const Ref<Tensor>& dst)
46 |   {
47 |     if (!dst || dst->getDesc() != dstDesc)
48 |       throw std::invalid_argument("invalid input processing destination");
49 | 
50 |     this->dst = dst;
51 |   }
52 | 
53 |   void InputProcess::setTile(int hSrc, int wSrc, int hDst, int wDst, int H, int W)
54 |   {
55 |     tile.hSrcBegin = hSrc;
56 |     tile.wSrcBegin = wSrc;
57 |     tile.hDstBegin = hDst;
58 |     tile.wDstBegin = wDst;
59 |     tile.H = H;
60 |     tile.W = W;
61 |   }
62 | 
63 |   void InputProcess::check()
64 |   {
65 |     if (!getMainSrc() || !dst)
66 |       throw std::logic_error("input processing source/destination not set");
67 |     if (tile.hSrcBegin + tile.H > getMainSrc()->getH() ||
68 |         tile.wSrcBegin + tile.W > getMainSrc()->getW() ||
69 |         tile.hDstBegin + tile.H > dst->getH() ||
70 |         tile.wDstBegin + tile.W > dst->getW())
71 |       throw std::out_of_range("input processing source/destination out of bounds");
72 |   }
73 | 
74 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/core/input_process.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "op.h"
 7 | #include "image.h"
 8 | #include "tensor.h"
 9 | #include "color.h"
10 | #include "tile.h"
11 | 
12 | OIDN_NAMESPACE_BEGIN
13 | 
14 |   struct InputProcessDesc
15 |   {
16 |     TensorDims srcDims;
17 |     std::shared_ptr<TransferFunction> transferFunc;
18 |     bool hdr;
19 |     bool snorm;
20 |   };
21 | 
22 |   class InputProcess : public BaseOp, protected InputProcessDesc
23 |   {
24 |   public:
25 |     InputProcess(Engine* engine, const InputProcessDesc& desc);
26 | 
27 |     TensorDesc getDstDesc() const { return dstDesc; }
28 |     Ref<Tensor> getDst() const { return dst; }
29 | 
30 |     void setSrc(const Ref<Image>& color,
31 |                 const Ref<Image>& albedo,
32 |                 const Ref<Image>& normal);
33 |     void setDst(const Ref<Tensor>& dst);
34 |     void setTile(int hSrc, int wSrc, int hDst, int wDst, int H, int W);
35 | 
36 |   protected:
37 |     virtual void updateSrc() {}
38 |     void check();
39 | 
40 |     Image* getMainSrc()
41 |     {
42 |       return color ? color.get() : (albedo ? albedo.get() : normal.get());
43 |     }
44 | 
45 |     TensorDesc dstDesc;
46 |     Ref<Image> color;
47 |     Ref<Image> albedo;
48 |     Ref<Image> normal;
49 |     Ref<Tensor> dst;
50 |     Tile tile;
51 |   };
52 | 
53 | OIDN_NAMESPACE_END
54 | 


--------------------------------------------------------------------------------
/core/math.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "common/platform.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | namespace math {
10 | 
11 | #if defined(OIDN_COMPILE_SYCL_DEVICE)
12 |   // Use the SYCL math functions
13 |   using sycl::min;
14 |   using sycl::max;
15 |   using sycl::isfinite;
16 |   using sycl::isnan;
17 |   using sycl::pow;
18 |   using sycl::log;
19 |   using sycl::log2;
20 |   using sycl::exp;
21 |   using sycl::exp2;
22 | #elif defined(OIDN_COMPILE_CUDA_DEVICE) || defined(OIDN_COMPILE_HIP_DEVICE)
23 |   // Use the CUDA/HIP math functions
24 |   template<typename T> oidn_host_device_inline T min(T a, T b) { return ::min(a, b); }
25 |   template<typename T> oidn_host_device_inline T max(T a, T b) { return ::max(a, b); }
26 |   using ::isfinite;
27 |   using ::isnan;
28 |   using ::pow;
29 |   using ::log;
30 |   using ::log2;
31 |   using ::exp;
32 |   using ::exp2;
33 | #elif defined(OIDN_COMPILE_METAL_DEVICE)
34 |   // Use the Metal math functions
35 |   using metal::min;
36 |   using metal::max;
37 |   using metal::isfinite;
38 |   using metal::isnan;
39 |   using metal::pow;
40 |   using metal::log;
41 |   using metal::log2;
42 |   using metal::exp;
43 |   using metal::exp2;
44 | #else
45 |   using OIDN_NAMESPACE::min;
46 |   using OIDN_NAMESPACE::max;
47 |   using std::isfinite;
48 |   using std::isnan;
49 |   using std::pow;
50 |   using std::log;
51 |   using std::log2;
52 |   using std::exp;
53 |   using std::exp2;
54 | #endif
55 | 
56 |   // CUDA and HIP do not provide min/max overloads for half
57 | #if defined(OIDN_COMPILE_CUDA_DEVICE) && (__CUDA_ARCH__ >= 800)
58 |   oidn_device_inline half min(half a, half b) { return __hmin(a, b); }
59 |   oidn_device_inline half max(half a, half b) { return __hmax(a, b); }
60 | #elif (defined(OIDN_COMPILE_CUDA_DEVICE) && (__CUDA_ARCH__ >= 530)) || defined(OIDN_COMPILE_HIP_DEVICE)
61 |   oidn_device_inline half min(half a, half b) { return (b < a) ? b : a; }
62 |   oidn_device_inline half max(half a, half b) { return (a < b) ? b : a; }
63 | #endif
64 | 
65 |   template<typename T>
66 |   oidn_host_device_inline T clamp(T x, T minVal, T maxVal)
67 |   {
68 |     return min(max(x, minVal), maxVal);
69 |   }
70 | 
71 |   oidn_host_device_inline float to_float_unorm(uint32_t x)
72 |   {
73 |     return float(x) * 2.3283064365386962890625e-10f; // x / 2^32
74 |   }
75 | 
76 |   // Maps nan to zero
77 |   oidn_host_device_inline float nan_to_zero(float x)
78 |   {
79 |     return isnan(x) ? 0.f : x;
80 |   }
81 | 
82 | } // namespace math
83 | OIDN_NAMESPACE_END
84 | 


--------------------------------------------------------------------------------
/core/module.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "common/common.h"
 7 | #include <unordered_set>
 8 | 
 9 | #if defined(_WIN32)
10 |   #define OIDN_MODULE_EXPORT extern "C" __declspec(dllexport)
11 | #else
12 |   #define OIDN_MODULE_EXPORT extern "C" __attribute__ ((visibility ("default")))
13 | #endif
14 | 
15 | #define OIDN_DECLARE_INIT_MODULE(name) \
16 |   OIDN_MODULE_EXPORT void OIDN_CONCAT(OIDN_NAMESPACE_C, OIDN_CONCAT(_init_module_##name##_v, OIDN_VERSION))()
17 | 
18 | #if defined(OIDN_STATIC_LIB)
19 |   #define OIDN_DECLARE_INIT_STATIC_MODULE(name) void init_##name()
20 | #else
21 |   #define OIDN_DECLARE_INIT_STATIC_MODULE(name) OIDN_DECLARE_INIT_MODULE(name)
22 | #endif
23 | 
24 | OIDN_NAMESPACE_BEGIN
25 | 
26 |   class ModuleLoader
27 |   {
28 |   public:
29 |     ModuleLoader();
30 | 
31 |     bool load(const std::string& name);
32 | 
33 |   private:
34 |   #if defined(_WIN32)
35 |     using Path = std::wstring;
36 |     static constexpr const wchar_t* pathSeps = L"/\\";
37 |   #else
38 |     using Path = std::string;
39 |     static constexpr const char* pathSeps = "/\\";
40 |   #endif
41 | 
42 |     static void* getSymbolAddress(void* module, const std::string& name);
43 |     static void closeModule(void* module);
44 | 
45 |     // Returns the absolute path of the module that contains the given address
46 |     // If address is nullptr, returns the path of this module
47 |     static Path getModulePath(void* address = nullptr);
48 | 
49 |     Path modulePathPrefix; // absolute path of the module directory with trailing path separator
50 |     std::unordered_set<std::string> modules; // loaded module names
51 |   };
52 | 
53 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/core/op.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "op.h"
 5 | 
 6 | OIDN_NAMESPACE_BEGIN
 7 | 
 8 |   void BaseOp::submit(const Ref<Progress>& progress)
 9 |   {
10 |     Engine* engine = nullptr;
11 | 
12 |     if (progress)
13 |     {
14 |       engine = getEngine();
15 |       Progress::submitUpdate(engine, progress);
16 |     }
17 | 
18 |     submitKernels(progress);
19 | 
20 |     if (progress)
21 |       Progress::submitUpdate(engine, progress, getWorkAmount());
22 |   }
23 | 
24 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/core/op.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "engine.h"
 7 | #include "buffer.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   // Abstract operation class
12 |   class Op : public RefCount
13 |   {
14 |   public:
15 |     virtual ~Op() = default;
16 | 
17 |     virtual Engine* getEngine() const = 0;
18 | 
19 |     // Support must be checked before getting the scratch size or submission
20 |     virtual bool isSupported() const { return true; }
21 | 
22 |     // Scratch memory
23 |     virtual size_t getScratchByteSize() { return 0; }
24 |     virtual void setScratch(const Ref<Buffer>& scratch) {}
25 | 
26 |     // Finalization is required before submission
27 |     virtual void finalize() {}
28 | 
29 |     // Enqueues the operation to the engine, optionally updating the progress as well
30 |     virtual void submit(const Ref<Progress>& progress = nullptr) = 0;
31 | 
32 |     // Returns the estimated amount of work for progress monitoring
33 |     virtual size_t getWorkAmount() const { return 1; }
34 | 
35 |     // Name for debugging purposes
36 |     std::string getName() const { return name; }
37 |     void setName(const std::string& name) { this->name = name; }
38 | 
39 |   private:
40 |     std::string name;
41 |   };
42 | 
43 |   // Base class for most operations (except compound operations, e.g. Graph)
44 |   class BaseOp : public Op
45 |   {
46 |   public:
47 |     void submit(const Ref<Progress>& progress) final;
48 | 
49 |     // Enqueues the kernel(s) of the operation to the engine, which may be cancelled if supported
50 |     virtual void submitKernels(const Ref<CancellationToken>& ct = nullptr) = 0;
51 |   };
52 | 
53 | OIDN_NAMESPACE_END
54 | 


--------------------------------------------------------------------------------
/core/output_process.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "output_process.h"
 5 | 
 6 | OIDN_NAMESPACE_BEGIN
 7 | 
 8 |   OutputProcess::OutputProcess(const OutputProcessDesc& desc)
 9 |     : OutputProcessDesc(desc)
10 |   {
11 |     if (srcDesc.getRank() != 3)
12 |       throw std::invalid_argument("invalid output processing source shape");
13 | 
14 |     setTile(0, 0, 0, 0, 0, 0);
15 |   }
16 | 
17 |   void OutputProcess::setSrc(const Ref<Tensor>& src)
18 |   {
19 |     if (!src || src->getDesc() != srcDesc)
20 |       throw std::invalid_argument("invalid output processing source");
21 | 
22 |     this->src = src;
23 |   }
24 | 
25 |   void OutputProcess::setDst(const Ref<Image>& dst)
26 |   {
27 |     if (!dst || dst->getC() > srcDesc.getC())
28 |       throw std::invalid_argument("invalid output processing destination");
29 | 
30 |     this->dst = dst;
31 |   }
32 | 
33 |   void OutputProcess::setTile(int hSrc, int wSrc, int hDst, int wDst, int H, int W)
34 |   {
35 |     tile.hSrcBegin = hSrc;
36 |     tile.wSrcBegin = wSrc;
37 |     tile.hDstBegin = hDst;
38 |     tile.wDstBegin = wDst;
39 |     tile.H = H;
40 |     tile.W = W;
41 |   }
42 | 
43 |   void OutputProcess::check()
44 |   {
45 |     if (!src || !dst)
46 |       throw std::logic_error("output processing source/destination not set");
47 |     if (tile.hSrcBegin + tile.H > src->getH() ||
48 |         tile.wSrcBegin + tile.W > src->getW() ||
49 |         tile.hDstBegin + tile.H > dst->getH() ||
50 |         tile.wDstBegin + tile.W > dst->getW())
51 |       throw std::out_of_range("output processing source/destination out of bounds");
52 |   }
53 | 
54 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/core/output_process.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "op.h"
 7 | #include "image.h"
 8 | #include "tensor.h"
 9 | #include "color.h"
10 | #include "tile.h"
11 | 
12 | OIDN_NAMESPACE_BEGIN
13 | 
14 |   struct OutputProcessDesc
15 |   {
16 |     TensorDesc srcDesc;
17 |     std::shared_ptr<TransferFunction> transferFunc;
18 |     bool hdr;
19 |     bool snorm;
20 |   };
21 | 
22 |   class OutputProcess : public BaseOp, protected OutputProcessDesc
23 |   {
24 |   public:
25 |     OutputProcess(const OutputProcessDesc& desc);
26 | 
27 |     TensorDesc getSrcDesc() const { return srcDesc; }
28 |     Ref<Tensor> getSrc() const { return src; }
29 | 
30 |     void setSrc(const Ref<Tensor>& src);
31 |     void setDst(const Ref<Image>& dst);
32 |     void setTile(int hSrc, int wSrc, int hDst, int wDst, int H, int W);
33 | 
34 |   protected:
35 |     void check();
36 | 
37 |     Ref<Tensor> src;
38 |     Ref<Image> dst;
39 |     Tile tile;
40 |   };
41 | 
42 | OIDN_NAMESPACE_END
43 | 


--------------------------------------------------------------------------------
/core/pool.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "pool.h"
 5 | 
 6 | OIDN_NAMESPACE_BEGIN
 7 | 
 8 |   Pool::Pool(const PoolDesc& desc)
 9 |     : PoolDesc(desc)
10 |   {
11 |     if (srcDesc.getRank() != 3 || srcDesc.getH() % 2 != 0 || srcDesc.getW() % 2 != 0)
12 |       throw std::invalid_argument("invalid pooling source shape");
13 | 
14 |     TensorDims dstDims{srcDesc.getC(), srcDesc.getH() / 2, srcDesc.getW() / 2};
15 |     TensorDims dstPaddedDims{srcDesc.getPaddedC(), dstDims[1], dstDims[2]};
16 |     dstDesc = {dstDims, dstPaddedDims, srcDesc.layout, srcDesc.dataType};
17 |   }
18 | 
19 |   void Pool::setSrc(const Ref<Tensor>& src)
20 |   {
21 |     if (!src || src->getDesc() != srcDesc)
22 |       throw std::invalid_argument("invalid pooling source");
23 | 
24 |     this->src = src;
25 |     updateSrc();
26 |   }
27 | 
28 |   void Pool::setDst(const Ref<Tensor>& dst)
29 |   {
30 |     if (!dst || dst->getDesc() != dstDesc)
31 |       throw std::invalid_argument("invalid pooling destination");
32 | 
33 |     this->dst = dst;
34 |     updateDst();
35 |   }
36 | 
37 | OIDN_NAMESPACE_END
38 | 


--------------------------------------------------------------------------------
/core/pool.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "op.h"
 7 | #include "tensor.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   // 2x2 max pooling descriptor
12 |   struct PoolDesc
13 |   {
14 |     TensorDesc srcDesc;
15 |   };
16 | 
17 |   // 2x2 max pooling
18 |   class Pool : public BaseOp, protected PoolDesc
19 |   {
20 |   public:
21 |     Pool(const PoolDesc& desc);
22 | 
23 |     TensorDesc getDstDesc() const { return dstDesc; }
24 |     Ref<Tensor> getDst() const { return dst; }
25 | 
26 |     void setSrc(const Ref<Tensor>& src);
27 |     void setDst(const Ref<Tensor>& dst);
28 | 
29 |   protected:
30 |     virtual void updateSrc() {}
31 |     virtual void updateDst() {}
32 | 
33 |     TensorDesc dstDesc;
34 |     Ref<Tensor> src;
35 |     Ref<Tensor> dst;
36 |   };
37 | 
38 | OIDN_NAMESPACE_END
39 | 


--------------------------------------------------------------------------------
/core/progress.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "progress.h"
 5 | #include "engine.h"
 6 | 
 7 | OIDN_NAMESPACE_BEGIN
 8 | 
 9 |   Progress::Progress(ProgressMonitorFunction func, void* userPtr, size_t total)
10 |     : func(func),
11 |       userPtr(userPtr),
12 |       total(total),
13 |       current(0),
14 |       started(false)
15 |   {
16 |     if (!func)
17 |       throw std::invalid_argument("progress monitor function is null");
18 |   }
19 | 
20 |   void Progress::update(size_t delta)
21 |   {
22 |     std::lock_guard<std::mutex> lock(mutex);
23 |     current = std::min(current + delta, total);
24 |     if (!func(userPtr, double(current) / double(total)))
25 |       cancel();
26 |   }
27 | 
28 |   void Progress::submitUpdate(Engine* engine, const Ref<Progress>& progress, size_t delta)
29 |   {
30 |     if (progress->isCancelled())
31 |       throw Exception(Error::Cancelled, "execution was cancelled");
32 | 
33 |     if (!progress->started || delta != 0) // always submit the first update
34 |     {
35 |       engine->submitHostFunc([progress, delta]() { progress->update(delta); }, progress);
36 |       progress->started = true;
37 |     }
38 |   }
39 | 
40 | OIDN_NAMESPACE_END
41 | 


--------------------------------------------------------------------------------
/core/progress.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "common/common.h"
 7 | #include "ref.h"
 8 | #include <mutex>
 9 | 
10 | OIDN_NAMESPACE_BEGIN
11 | 
12 |   class Engine;
13 | 
14 |   // Cancellation request state for asynchronous operations
15 |   class CancellationToken : public RefCount
16 |   {
17 |   public:
18 |     CancellationToken() : cancelled(false) {}
19 | 
20 |     bool isCancelled() const { return cancelled; }
21 |     void cancel() { cancelled = true; }
22 | 
23 |   protected:
24 |     std::atomic<bool> cancelled;
25 |   };
26 | 
27 |   // Progress monitoring for asynchronous operations
28 |   class Progress : public CancellationToken
29 |   {
30 |   public:
31 |     Progress(ProgressMonitorFunction func, void* userPtr, size_t total);
32 | 
33 |     // Enqueues a progress update, advancing the progress with the specified amount, and calling
34 |     // the progress monitor function
35 |     static void submitUpdate(Engine* engine, const Ref<Progress>& progress, size_t delta = 0);
36 | 
37 |   private:
38 |     ProgressMonitorFunction func;
39 |     void* userPtr;
40 |     size_t total;     // maximum progress value
41 |     size_t current;   // current progress value
42 |     bool started;     // whether any progress updates have been submitted yet
43 |     std::mutex mutex;
44 | 
45 |     void update(size_t delta);
46 |   };
47 | 
48 | OIDN_NAMESPACE_END
49 | 


--------------------------------------------------------------------------------
/core/record.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "buffer.h"
 7 | #include "exception.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   // Plain value or structure stored in a buffer
12 |   template<typename T>
13 |   class Record final : public Memory
14 |   {
15 |     static_assert(std::is_trivial<T>::value, "record can be used only for trivial types");
16 | 
17 |   public:
18 |     Record(const Ref<Buffer>& buffer, size_t byteOffset = 0)
19 |       : Memory(buffer, byteOffset)
20 |     {
21 |       if (byteOffset + sizeof(T) > buffer->getByteSize())
22 |         throw Exception(Error::InvalidArgument, "buffer region is out of bounds");
23 |     }
24 | 
25 |     T* getPtr() const
26 |     {
27 |       return (T*)((char*)buffer->getPtr() + byteOffset);
28 |     }
29 |   };
30 | 
31 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/core/rt_filter.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "unet_filter.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   // RT: Generic ray tracing denoiser
11 |   class RTFilter final : public UNetFilter
12 |   {
13 |   public:
14 |     explicit RTFilter(const Ref<Device>& device);
15 | 
16 |     void setImage(const std::string& name, const Ref<Image>& image) override;
17 |     void unsetImage(const std::string& name) override;
18 |     void setInt(const std::string& name, int value) override;
19 |     int getInt(const std::string& name) override;
20 | 
21 |   protected:
22 |     std::shared_ptr<TransferFunction> newTransferFunc() override;
23 |   };
24 | 
25 | OIDN_NAMESPACE_END
26 | 


--------------------------------------------------------------------------------
/core/rtlightmap_filter.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "rtlightmap_filter.h"
 5 | 
 6 | #if defined(OIDN_FILTER_RTLIGHTMAP)
 7 |   #include "weights/rtlightmap_hdr.h"
 8 |   #include "weights/rtlightmap_dir.h"
 9 | #endif
10 | 
11 | OIDN_NAMESPACE_BEGIN
12 | 
13 |   RTLightmapFilter::RTLightmapFilter(const Ref<Device>& device)
14 |     : UNetFilter(device)
15 |   {
16 |     hdr = true;
17 | 
18 |   #if defined(OIDN_FILTER_RTLIGHTMAP)
19 |     models.hdr = {blobs::weights::rtlightmap_hdr};
20 |     models.dir = {blobs::weights::rtlightmap_dir};
21 |   #endif
22 |   }
23 | 
24 |   std::shared_ptr<TransferFunction> RTLightmapFilter::newTransferFunc()
25 |   {
26 |     if (hdr)
27 |       return std::make_shared<TransferFunction>(TransferFunction::Type::Log);
28 |     else
29 |       return std::make_shared<TransferFunction>(TransferFunction::Type::Linear);
30 |   }
31 | 
32 |   void RTLightmapFilter::setImage(const std::string& name, const Ref<Image>& image)
33 |   {
34 |     if (name == "color")
35 |       setParam(color, image);
36 |     else if (name == "output")
37 |       setParam(output, image);
38 |     else
39 |       device->printWarning("unknown filter parameter or type mismatch: '" + name + "'");
40 | 
41 |     dirty = true;
42 |   }
43 | 
44 |   void RTLightmapFilter::unsetImage(const std::string& name)
45 |   {
46 |     if (name == "color")
47 |       removeParam(color);
48 |     else if (name == "output")
49 |       removeParam(output);
50 |     else
51 |       device->printWarning("unknown filter parameter or type mismatch: '" + name + "'");
52 | 
53 |     dirty = true;
54 |   }
55 | 
56 |   void RTLightmapFilter::setInt(const std::string& name, int value)
57 |   {
58 |     if (name == "directional")
59 |     {
60 |       setParam(directional, value);
61 |       hdr = !directional;
62 |     }
63 |     else
64 |       UNetFilter::setInt(name, value);
65 | 
66 |     dirty = true;
67 |   }
68 | 
69 |   int RTLightmapFilter::getInt(const std::string& name)
70 |   {
71 |     if (name == "directional")
72 |       return directional;
73 |     else
74 |       return UNetFilter::getInt(name);
75 |   }
76 | 
77 | OIDN_NAMESPACE_END
78 | 


--------------------------------------------------------------------------------
/core/rtlightmap_filter.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "unet_filter.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   // RTLightmap: Ray traced lightmap denoiser
11 |   class RTLightmapFilter final : public UNetFilter
12 |   {
13 |   public:
14 |     explicit RTLightmapFilter(const Ref<Device>& device);
15 | 
16 |     void setImage(const std::string& name, const Ref<Image>& image) override;
17 |     void unsetImage(const std::string& name) override;
18 |     void setInt(const std::string& name, int value) override;
19 |     int getInt(const std::string& name) override;
20 | 
21 |   protected:
22 |     std::shared_ptr<TransferFunction> newTransferFunc() override;
23 |   };
24 | 
25 | OIDN_NAMESPACE_END
26 | 


--------------------------------------------------------------------------------
/core/subdevice.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "subdevice.h"
 5 | #include "engine.h"
 6 | 
 7 | OIDN_NAMESPACE_BEGIN
 8 | 
 9 |   Subdevice::Subdevice(std::unique_ptr<Engine>&& engine)
10 |     : engine(std::move(engine))
11 |   {
12 |     this->engine->setSubdevice(this);
13 |   }
14 | 
15 |   Ref<Arena> Subdevice::newScratchArena(size_t byteSize, const std::string& name)
16 |   {
17 |     if (!scratchArenaManager)
18 |       scratchArenaManager.reset(new ScratchArenaManager(engine.get()));
19 |     return makeRef<ScratchArena>(scratchArenaManager.get(), byteSize, name);
20 |   }
21 | 
22 |   void Subdevice::trimScratch()
23 |   {
24 |     if (scratchArenaManager)
25 |       scratchArenaManager->trim();
26 |   }
27 | 
28 |   std::shared_ptr<TensorMap> Subdevice::getCachedTensors(const void* key)
29 |   {
30 |     std::shared_ptr<TensorMap>& tensorMap = cachedTensors[key];
31 |     if (!tensorMap)
32 |       tensorMap = std::make_shared<TensorMap>();
33 |     return tensorMap;
34 |   }
35 | 
36 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/core/subdevice.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "device.h"
 7 | #include "arena.h"
 8 | #include "tensor.h"
 9 | 
10 | OIDN_NAMESPACE_BEGIN
11 | 
12 |   // Subdevice consisting of an engine and some shared resources
13 |   class Subdevice final
14 |   {
15 |   public:
16 |     explicit Subdevice(std::unique_ptr<Engine>&& engine);
17 | 
18 |     Engine* getEngine() const { return engine.get(); }
19 | 
20 |     // Scratch
21 |     Ref<Arena> newScratchArena(size_t byteSize, const std::string& name = "");
22 |     void trimScratch();
23 | 
24 |     // Tensor cache
25 |     std::shared_ptr<TensorMap> getCachedTensors(const void* key);
26 | 
27 |   private:
28 |     // Disable copying
29 |     Subdevice(const Subdevice&) = delete;
30 |     Subdevice& operator =(const Subdevice&) = delete;
31 | 
32 |     std::unique_ptr<Engine> engine; // must be declared first / destroyed last
33 | 
34 |     // Resources
35 |     std::unique_ptr<ScratchArenaManager> scratchArenaManager;
36 |     std::unordered_map<const void*, std::shared_ptr<TensorMap>> cachedTensors; // cached weights
37 |   };
38 | 
39 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/core/tensor_accessor.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "tensor_layout.h"
 7 | #include "vec.h"
 8 | 
 9 | // ISPC forward declarations
10 | namespace ispc
11 | {
12 |   struct TensorAccessor1D;
13 |   struct TensorAccessor3D;
14 |   struct TensorAccessor4D;
15 | };
16 | 
17 | OIDN_NAMESPACE_BEGIN
18 | 
19 |   template<typename T>
20 |   struct TensorAccessor1D
21 |   {
22 |     TensorByteOffset<T, TensorLayout::x> getByteOffset;
23 |     oidn_global char* ptr;
24 |     int X; // padded dimensions
25 | 
26 |     TensorAccessor1D() = default;
27 | 
28 |     oidn_host_device_inline TensorAccessor1D(oidn_global void* data, int X)
29 |       : ptr(static_cast<oidn_global char*>(data)),
30 |         X(X) {}
31 | 
32 |     oidn_host_device_inline oidn_global T& operator ()(int x) const
33 |     {
34 |       return *reinterpret_cast<oidn_global T*>(ptr + getByteOffset(x));
35 |     }
36 |   };
37 | 
38 |   template<typename T, TensorLayout layout>
39 |   struct TensorAccessor3D
40 |   {
41 |     TensorByteOffset<T, layout> getByteOffset;
42 |     oidn_global char* ptr;
43 |     int C, H, W; // padded dimensions
44 | 
45 |     TensorAccessor3D() = default;
46 | 
47 |     oidn_host_device_inline TensorAccessor3D(oidn_global void* data, int C, int H, int W)
48 |       : getByteOffset(C, H, W),
49 |         ptr(static_cast<oidn_global char*>(data)), C(C), H(H), W(W) {}
50 | 
51 |     oidn_host_device_inline oidn_global T& operator ()(int c, int h, int w) const
52 |     {
53 |       return *reinterpret_cast<oidn_global T*>(ptr + getByteOffset(c, h, w));
54 |     }
55 | 
56 |     oidn_host_device_inline vec3<T> get3(int c, int h, int w) const
57 |     {
58 |       return vec3<T>((*this)(c,   h, w),
59 |                      (*this)(c+1, h, w),
60 |                      (*this)(c+2, h, w));
61 |     }
62 | 
63 |     oidn_host_device_inline void set3(int c, int h, int w, vec3<T> value) const
64 |     {
65 |       (*this)(c,   h, w) = value.x;
66 |       (*this)(c+1, h, w) = value.y;
67 |       (*this)(c+2, h, w) = value.z;
68 |     }
69 |   };
70 | 
71 |   template<typename T, TensorLayout layout>
72 |   struct TensorAccessor4D
73 |   {
74 |     TensorByteOffset<T, layout> getByteOffset;
75 |     oidn_global char* ptr;
76 |     int O, I, H, W; // padded dimensions
77 | 
78 |     TensorAccessor4D() = default;
79 | 
80 |     oidn_host_device_inline TensorAccessor4D(oidn_global void* data, int O, int I, int H, int W)
81 |       : getByteOffset(O, I, H, W),
82 |         ptr(static_cast<oidn_global char*>(data)), O(O), I(I), H(H), W(W) {}
83 | 
84 |     oidn_host_device_inline oidn_global T& operator ()(int o, int i, int h, int w) const
85 |     {
86 |       return *reinterpret_cast<oidn_global T*>(ptr + getByteOffset(o, i, h, w));
87 |     }
88 |   };
89 | 
90 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/core/tensor_reorder.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "tensor.h"
 5 | 
 6 | OIDN_NAMESPACE_BEGIN
 7 | 
 8 |   void reorderWeight(Tensor& src, int srcBeginI, int srcI, Tensor& dst, int dstBeginI, int dstI);
 9 |   void reorderWeight(Tensor& src, Tensor& dst);
10 |   void reorderBias(Tensor& src, Tensor& dst);
11 | 
12 | OIDN_NAMESPACE_END
13 | 


--------------------------------------------------------------------------------
/core/tile.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "common/platform.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   struct Tile
11 |   {
12 |     int hSrcBegin;
13 |     int wSrcBegin;
14 |     int hDstBegin;
15 |     int wDstBegin;
16 |     int H;
17 |     int W;
18 |   };
19 | 
20 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/core/tza.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "tensor.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   // Parses tensors from a Tensor Archive (TZA)
11 |   std::shared_ptr<TensorMap> parseTZA(const void* buffer, size_t size);
12 | 
13 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/core/upsample.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "upsample.h"
 5 | 
 6 | OIDN_NAMESPACE_BEGIN
 7 | 
 8 |   Upsample::Upsample(const UpsampleDesc& desc)
 9 |     : UpsampleDesc(desc)
10 |   {
11 |     if (srcDesc.getRank() != 3)
12 |       throw std::invalid_argument("invalid upsampling source shape");
13 | 
14 |     TensorDims dstDims{srcDesc.getC(), srcDesc.getH() * 2, srcDesc.getW() * 2};
15 |     TensorDims dstPaddedDims{srcDesc.getPaddedC(), dstDims[1], dstDims[2]};
16 |     dstDesc = {dstDims, dstPaddedDims, srcDesc.layout, srcDesc.dataType};
17 |   }
18 | 
19 |   void Upsample::setSrc(const Ref<Tensor>& src)
20 |   {
21 |     if (!src || src->getDesc() != srcDesc)
22 |       throw std::invalid_argument("invalid upsampling source");
23 | 
24 |     this->src = src;
25 |     updateSrc();
26 |   }
27 | 
28 |   void Upsample::setDst(const Ref<Tensor>& dst)
29 |   {
30 |     if (!dst || dst->getDesc() != dstDesc)
31 |       throw std::invalid_argument("invalid upsampling destination");
32 | 
33 |     this->dst = dst;
34 |     updateDst();
35 |   }
36 | 
37 | OIDN_NAMESPACE_END
38 | 


--------------------------------------------------------------------------------
/core/upsample.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "op.h"
 7 | #include "tensor.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   struct UpsampleDesc
12 |   {
13 |     TensorDesc srcDesc;
14 |   };
15 | 
16 |   // 2x2 nearest-neighbor upsampling
17 |   class Upsample : public BaseOp, protected UpsampleDesc
18 |   {
19 |   public:
20 |     Upsample(const UpsampleDesc& desc);
21 | 
22 |     TensorDesc getDstDesc() const { return dstDesc; }
23 |     Ref<Tensor> getDst() const { return dst; }
24 | 
25 |     void setSrc(const Ref<Tensor>& src);
26 |     void setDst(const Ref<Tensor>& dst);
27 | 
28 |   protected:
29 |     virtual void updateSrc() {}
30 |     virtual void updateDst() {}
31 | 
32 |     TensorDesc dstDesc;
33 |     Ref<Tensor> src;
34 |     Ref<Tensor> dst;
35 |   };
36 | 
37 | OIDN_NAMESPACE_END
38 | 


--------------------------------------------------------------------------------
/core/verbose.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "common/platform.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   // Base class for verbose classes
11 |   class Verbose
12 |   {
13 |   public:
14 |     Verbose(int v = 0) : verbose(v) {}
15 | 
16 |     void setVerbose(int v) { verbose = v; }
17 |     bool isVerbose(int v = 1) const { return v <= verbose; }
18 | 
19 |     void print(const std::string& message)
20 |     {
21 |       if (isVerbose())
22 |         std::cout << message << std::endl;
23 |     }
24 | 
25 |     void printWarning(const std::string& message)
26 |     {
27 |       if (isVerbose())
28 |         std::cerr << "Warning: " << message << std::endl;
29 |     }
30 | 
31 |     void printError(const std::string& message)
32 |     {
33 |       if (isVerbose())
34 |         std::cerr << "Error: " << message << std::endl;
35 |     }
36 | 
37 |     void printDebug(const std::string& message)
38 |     {
39 |       if (isVerbose(2))
40 |         std::cout << message << std::endl;
41 |     }
42 | 
43 |   protected:
44 |     int verbose;
45 |   };
46 | 
47 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/cpu/bnns/bnns_common.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "bnns_common.h"
 5 | 
 6 | OIDN_NAMESPACE_BEGIN
 7 | 
 8 |   BNNSNDArrayDescriptor toBNNS(const TensorDesc& td)
 9 |   {
10 |     BNNSNDArrayDescriptor res;
11 | 
12 |     switch (td.layout)
13 |     {
14 |     case TensorLayout::x:
15 |       assert(td.getRank() == 1);
16 |       res = BNNSNDArrayDescriptor({
17 |         .layout = BNNSDataLayoutVector,
18 |         .size   = {size_t(td.dims[0])}
19 |       });
20 |       break;
21 |     case TensorLayout::chw:
22 |       assert(td.getRank() == 3);
23 |       res = BNNSNDArrayDescriptor({
24 |         .layout = BNNSDataLayoutImageCHW,
25 |         .size   = {size_t(td.dims[2]), size_t(td.dims[1]), size_t(td.dims[0])}
26 |       });
27 |       break;
28 |     case TensorLayout::oihw:
29 |       assert(td.getRank() == 4);
30 |       res = BNNSNDArrayDescriptor({
31 |         .layout = BNNSDataLayoutConvolutionWeightsOIHW,
32 |         .size   = {size_t(td.dims[3]), size_t(td.dims[2]), size_t(td.dims[1]), size_t(td.dims[0])}
33 |       });
34 |       break;
35 |     default:
36 |       throw std::invalid_argument("unsupported tensor layout");
37 |     }
38 | 
39 |     switch (td.dataType)
40 |     {
41 |     case DataType::Float32:
42 |       res.data_type = BNNSDataTypeFloat32;
43 |       break;
44 |     case DataType::Float16:
45 |       res.data_type = BNNSDataTypeFloat16;
46 |       break;
47 |     case DataType::UInt8:
48 |       res.data_type = BNNSDataTypeUInt8;
49 |       break;
50 |     default:
51 |       throw std::invalid_argument("unsupported data type");
52 |     }
53 | 
54 |     res.data = nullptr;
55 |     return res;
56 |   }
57 | 
58 |   BNNSNDArrayDescriptor toBNNS(const Ref<Tensor>& t)
59 |   {
60 |     BNNSNDArrayDescriptor res = toBNNS(t->getDesc());
61 |     res.data = t->getPtr();
62 |     return res;
63 |   }
64 | 
65 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/cpu/bnns/bnns_common.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <Accelerate/Accelerate.h>
 7 | #include "core/tensor.h"
 8 | #include "bnns_engine.h"
 9 | 
10 | OIDN_NAMESPACE_BEGIN
11 | 
12 |   BNNSNDArrayDescriptor toBNNS(const TensorDesc& td);
13 |   BNNSNDArrayDescriptor toBNNS(const Ref<Tensor>& t);
14 | 
15 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/cpu/bnns/bnns_conv.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "bnns_conv.h"
 5 | 
 6 | OIDN_NAMESPACE_BEGIN
 7 | 
 8 |   BNNSConv::BNNSConv(BNNSEngine* engine, const ConvDesc& desc)
 9 |     : Conv(desc),
10 |       engine(engine)
11 |   {}
12 | 
13 |   BNNSConv::~BNNSConv()
14 |   {
15 |     if (filter)
16 |       BNNSFilterDestroy(filter);
17 |   }
18 | 
19 |   void BNNSConv::updateWeight()
20 |   {
21 |     if (filter)
22 |       throw std::logic_error("convolution weight cannot be set after finalization");
23 |   }
24 | 
25 |   void BNNSConv::updateBias()
26 |   {
27 |     if (filter)
28 |       throw std::logic_error("convolution bias cannot be set after finalization");
29 |   }
30 | 
31 |   void BNNSConv::finalize()
32 |   {
33 |     if (filter)
34 |       throw std::logic_error("convolution already finalized");
35 |     if (!weight || !bias)
36 |       throw std::logic_error("convolution weight/bias not set before finalization");
37 | 
38 |     BNNSLayerParametersConvolution params = {
39 |       .i_desc = toBNNS(srcDesc),
40 |       .w_desc = toBNNS(weight),
41 |       .o_desc = toBNNS(dstDesc),
42 |       .bias   = toBNNS(bias),
43 |       .x_stride = 1,
44 |       .y_stride = 1,
45 |       .x_dilation_stride = 1,
46 |       .y_dilation_stride = 1,
47 |       .x_padding = 1,
48 |       .y_padding = 1,
49 |     };
50 | 
51 |     if (activation == Activation::ReLU)
52 |       params.activation.function = BNNSActivationFunctionRectifiedLinear;
53 |     else
54 |       params.activation.function = BNNSActivationFunctionIdentity;
55 | 
56 |     filter = BNNSFilterCreateLayerConvolution(&params, nullptr);
57 |     if (!filter)
58 |       throw std::runtime_error("BNNSFilterCreateLayerConvolution failed");
59 |   }
60 | 
61 |   void BNNSConv::submitKernels(const Ref<CancellationToken>& ct)
62 |   {
63 |     if (!filter)
64 |       throw std::logic_error("convolution not finalized");
65 |     if (!src || !dst)
66 |       throw std::logic_error("convolution source/destination not set");
67 | 
68 |     void* srcPtr = src->getPtr();
69 |     void* dstPtr = dst->getPtr();
70 |     engine->submitFunc([=] { BNNSFilterApply(filter, srcPtr, dstPtr); }, ct);
71 |   }
72 | 
73 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/cpu/bnns/bnns_conv.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/conv.h"
 7 | #include "bnns_common.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   class BNNSConv : public Conv
12 |   {
13 |   public:
14 |     BNNSConv(BNNSEngine* engine, const ConvDesc& desc);
15 |     ~BNNSConv();
16 | 
17 |     Engine* getEngine() const override { return engine; }
18 |     void finalize() override;
19 |     void submitKernels(const Ref<CancellationToken>& ct) override;
20 | 
21 |   private:
22 |     void updateWeight() override;
23 |     void updateBias() override;
24 | 
25 |     BNNSEngine* engine;
26 |     BNNSFilter filter = nullptr;
27 |   };
28 | 
29 | OIDN_NAMESPACE_END
30 | 


--------------------------------------------------------------------------------
/devices/cpu/bnns/bnns_engine.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "bnns_engine.h"
 5 | #include "bnns_conv.h"
 6 | #include "bnns_pool.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   BNNSEngine::BNNSEngine(CPUDevice* device, int numThreads)
11 |     : CPUEngine(device, numThreads)
12 |   {}
13 | 
14 |   Ref<Conv> BNNSEngine::newConv(const ConvDesc& desc)
15 |   {
16 |     return makeRef<BNNSConv>(this, desc);
17 |   }
18 | 
19 |   Ref<Pool> BNNSEngine::newPool(const PoolDesc& desc)
20 |   {
21 |     return makeRef<BNNSPool>(this, desc);
22 |   }
23 | 
24 | OIDN_NAMESPACE_END
25 | 


--------------------------------------------------------------------------------
/devices/cpu/bnns/bnns_engine.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "../cpu_engine.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   class BNNSEngine final : public CPUEngine
11 |   {
12 |   public:
13 |     BNNSEngine(CPUDevice* device, int numThreads);
14 | 
15 |     // Ops
16 |     Ref<Conv> newConv(const ConvDesc& desc) override;
17 |     Ref<Pool> newPool(const PoolDesc& desc) override;
18 |   };
19 | 
20 | OIDN_NAMESPACE_END
21 | 


--------------------------------------------------------------------------------
/devices/cpu/bnns/bnns_pool.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "bnns_pool.h"
 5 | 
 6 | OIDN_NAMESPACE_BEGIN
 7 | 
 8 |   BNNSPool::BNNSPool(BNNSEngine* engine, const PoolDesc& desc)
 9 |     : Pool(desc),
10 |       engine(engine)
11 |   {}
12 | 
13 |   BNNSPool::~BNNSPool()
14 |   {
15 |     if (filter)
16 |       BNNSFilterDestroy(filter);
17 |   }
18 | 
19 |   void BNNSPool::finalize()
20 |   {
21 |     if (filter)
22 |       throw std::logic_error("pooling already finalized");
23 | 
24 |     BNNSLayerParametersPooling params = {
25 |       .i_desc = toBNNS(srcDesc),
26 |       .o_desc = toBNNS(dstDesc),
27 |       .pooling_function = BNNSPoolingFunctionMax,
28 |       .k_width  = 2,
29 |       .k_height = 2,
30 |       .x_stride = 2,
31 |       .y_stride = 2
32 |     };
33 | 
34 |     filter = BNNSFilterCreateLayerPooling(&params, nullptr);
35 |     if (!filter)
36 |       throw std::runtime_error("BNNSFilterCreateLayerPooling failed");
37 |   }
38 | 
39 |   void BNNSPool::submitKernels(const Ref<CancellationToken>& ct)
40 |   {
41 |     if (!filter)
42 |       throw std::logic_error("pooling not finalized");
43 |     if (!src || !dst)
44 |       throw std::logic_error("pooling source/destination not set");
45 | 
46 |     void* srcPtr = src->getPtr();
47 |     void* dstPtr = dst->getPtr();
48 |     engine->submitFunc([=] { BNNSFilterApply(filter, srcPtr, dstPtr); }, ct);
49 |   }
50 | 
51 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/cpu/bnns/bnns_pool.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/pool.h"
 7 | #include "bnns_common.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   class BNNSPool : public Pool
12 |   {
13 |   public:
14 |     BNNSPool(BNNSEngine* engine, const PoolDesc& desc);
15 |     ~BNNSPool();
16 | 
17 |     Engine* getEngine() const override { return engine; }
18 |     void finalize() override;
19 |     void submitKernels(const Ref<CancellationToken>& ct) override;
20 | 
21 |   private:
22 |     BNNSEngine* engine;
23 |     BNNSFilter filter = nullptr;
24 |   };
25 | 
26 | OIDN_NAMESPACE_END
27 | 


--------------------------------------------------------------------------------
/devices/cpu/color.isph:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "vec.isph"
 5 | 
 6 | struct TransferFunction
 7 | {
 8 |   // Forward and inverse functions
 9 |   vec3f (*uniform forward)(const uniform TransferFunction* uniform self, vec3f y);
10 |   vec3f (*uniform inverse)(const uniform TransferFunction* uniform self, vec3f x);
11 | 
12 |   // Input and output scales (*not* applied by the forward/inverse functions!)
13 |   uniform const float* uniform inputScalePtr;
14 |   uniform float inputScale;
15 |   uniform float outputScale;
16 | 
17 |   // Normalization scale (HDR only)
18 |   uniform float normScale;
19 |   uniform float rcpNormScale;
20 | };
21 | 
22 | inline uniform float TransferFunction_getInputScale(const uniform TransferFunction* uniform self)
23 | {
24 |   return self->inputScalePtr ? *self->inputScalePtr : self->inputScale;
25 | }
26 | 
27 | inline uniform float TransferFunction_getOutputScale(const uniform TransferFunction* uniform self)
28 | {
29 |   if (self->inputScalePtr)
30 |   {
31 |     const uniform float inputScale = *self->inputScalePtr;
32 |     return (inputScale != 0.f) ? (1.f / inputScale) : 0.f;
33 |   }
34 |   return self->outputScale;
35 | }
36 | 
37 | // Computes the luminance of an RGB color
38 | inline float luminance(vec3f c)
39 | {
40 |   return 0.212671f * c.x + 0.715160f * c.y + 0.072169f * c.z;
41 | }


--------------------------------------------------------------------------------
/devices/cpu/cpu_autoexposure.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "cpu_autoexposure.h"
 5 | #include "cpu_autoexposure_ispc.h"
 6 | #include "cpu_common.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   CPUAutoexposure::CPUAutoexposure(CPUEngine* engine, const ImageDesc& srcDesc)
11 |     : Autoexposure(srcDesc),
12 |       engine(engine)
13 |   {}
14 | 
15 |   void CPUAutoexposure::submitKernels(const Ref<CancellationToken>& ct)
16 |   {
17 |     if (!src)
18 |       throw std::logic_error("autoexposure source not set");
19 |     if (!dst)
20 |       throw std::logic_error("autoexposure destination not set");
21 | 
22 |     // Downsample the image to minimize sensitivity to noise
23 |     ispc::ImageAccessor srcAcc = *src;
24 |     float* dstPtr = getDstPtr();
25 | 
26 |     engine->submitFunc([=]()
27 |     {
28 |       // Compute the average log luminance of the downsampled image
29 |       using Sum = std::pair<float, int>;
30 | 
31 |       Sum sum =
32 |         tbb::parallel_deterministic_reduce(
33 |           tbb::blocked_range2d<int>(0, numBinsH, 0, numBinsW),
34 |           Sum(0.f, 0),
35 |           [&](const tbb::blocked_range2d<int>& r, Sum sum) -> Sum
36 |           {
37 |             // Iterate over bins
38 |             for (int i = r.rows().begin(); i != r.rows().end(); ++i)
39 |             {
40 |               for (int j = r.cols().begin(); j != r.cols().end(); ++j)
41 |               {
42 |                 // Compute the average luminance in the current bin
43 |                 const int beginH = int(ptrdiff_t(i)   * srcAcc.H / numBinsH);
44 |                 const int beginW = int(ptrdiff_t(j)   * srcAcc.W / numBinsW);
45 |                 const int endH   = int(ptrdiff_t(i+1) * srcAcc.H / numBinsH);
46 |                 const int endW   = int(ptrdiff_t(j+1) * srcAcc.W / numBinsW);
47 | 
48 |                 const float L = ispc::autoexposureDownsample(srcAcc, beginH, endH, beginW, endW);
49 | 
50 |                 // Accumulate the log luminance
51 |                 if (L > eps)
52 |                 {
53 |                   sum.first += math::log2(L);
54 |                   sum.second++;
55 |                 }
56 |               }
57 |             }
58 | 
59 |             return sum;
60 |           },
61 |           [](Sum a, Sum b) -> Sum { return Sum(a.first+b.first, a.second+b.second); }
62 |         );
63 | 
64 |       *dstPtr = (sum.second > 0) ? (key / math::exp2(sum.first / float(sum.second))) : 1.f;
65 |     }, ct);
66 |   }
67 | 
68 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/cpu/cpu_autoexposure.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/autoexposure.h"
 7 | #include "cpu_engine.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   class CPUAutoexposure final : public Autoexposure
12 |   {
13 |   public:
14 |     CPUAutoexposure(CPUEngine* engine, const ImageDesc& srcDesc);
15 | 
16 |     Engine* getEngine() const override { return engine; }
17 |     void submitKernels(const Ref<CancellationToken>& ct) override;
18 | 
19 |   private:
20 |     CPUEngine* engine;
21 |   };
22 | 
23 | OIDN_NAMESPACE_END
24 | 


--------------------------------------------------------------------------------
/devices/cpu/cpu_autoexposure.ispc:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "image_accessor.isph"
 5 | #include "color.isph"
 6 | 
 7 | // Returns the average luminance of the specified image bin
 8 | export uniform float autoexposureDownsample(const uniform ImageAccessor& color,
 9 |                                             uniform int beginH, uniform int endH,
10 |                                             uniform int beginW, uniform int endW)
11 | {
12 |   float L = 0.f;
13 | 
14 |   for (uniform int h = beginH; h < endH; ++h)
15 |   {
16 |     foreach (w = beginW ... endW)
17 |     {
18 |       vec3f c = Image_get3(color, h, w);
19 |       c = clamp(nan_to_zero(c), 0.f, pos_max); // sanitize
20 |       L += luminance(c);
21 |     }
22 |   }
23 | 
24 |   return reduce_add(L) / ((endH - beginH) * (endW - beginW));
25 | }
26 | 


--------------------------------------------------------------------------------
/devices/cpu/cpu_common.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/image.h"
 7 | #include "core/tensor.h"
 8 | #include "core/tile.h"
 9 | #include "core/color.h"
10 | #include "cpu_input_process_ispc.h"
11 | #include "color_ispc.h"
12 | 
13 | OIDN_NAMESPACE_BEGIN
14 | 
15 |   ispc::Tile toISPC(const Tile& tile);
16 |   ispc::TransferFunction toISPC(const TransferFunction& tf);
17 | 
18 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/cpu/cpu_conv.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/conv.h"
 7 | #include "cpu_engine.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   class CPUConv final : public Conv
12 |   {
13 |   public:
14 |     CPUConv(CPUEngine* engine, const ConvDesc& desc);
15 | 
16 |     Engine* getEngine() const override { return engine; }
17 |     void submitKernels(const Ref<CancellationToken>& ct) override;
18 | 
19 |   private:
20 |     CPUEngine* engine;
21 |     int blockOCB; // block of output channel blocks
22 |     int blockOW;  // block of output width
23 |     int OCBB;     // number of output channel block blocks
24 |     int OWT;      // number of output width tiles
25 |   };
26 | 
27 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/cpu/cpu_conv_compute.isph:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | unmasked void CPUConvKernel_compute(T, blockOCB)(const uniform CPUConvKernel* uniform self,
 5 |                                                  uniform int ocb, uniform int oh,
 6 |                                                  uniform int owBegin, uniform int owEnd)
 7 | {
 8 |   const uniform int oc = ocb * blockC;
 9 | 
10 | #if KH == 3 && PH == 1
11 |   const uniform int khBegin = oh > 0 ? 0 : 1;
12 |   const uniform int khEnd   = oh < self->dst.H-1 ? 3 : 2;
13 | #else
14 |   const uniform int khBegin = max(PH - oh, 0);
15 |   const uniform int khEnd   = KH - max(PH + oh - (self->dst.H-1), 0);
16 | #endif
17 | 
18 |   for (uniform int ic = 0; ic < self->src.C; ic += blockC)
19 |   {
20 |     const uniform uint8* uniform srcPtr    = Tensor_getPtr(self->src, ic, oh + khBegin - PH, owBegin);
21 |     const uniform uint8* uniform weightPtr = Tensor_getPtr(self->weight, oc, ic, khBegin, 0);
22 |     const uniform uint8* uniform biasPtr   = (ic == 0) ? Tensor_getPtr(self->bias, oc) : NULL;
23 |     uniform uint8* uniform dstPtr          = Tensor_getPtr(self->dst, oc, oh, owBegin);
24 |     const uniform bool relu = self->relu && ic == (self->src.C - blockC);
25 | 
26 |     uniform int ow = owBegin; // owBegin/owEnd *must* be aligned to block boundaries
27 |     while (ow < owEnd)
28 |     {
29 |       if (ow > PW - 1 && ow + blockOW + PW - 1 < self->dst.W)
30 |       {
31 |         // Fast path (no padding, width blocking)
32 |         CPUConvKernel_computeBlock(T, blockOCB, blockOW)(
33 |           srcPtr, self->src.hByteStride,
34 |           weightPtr, biasPtr,
35 |           dstPtr, self->dst.CByteStride,
36 |           khEnd - khBegin,
37 |           0, KW,
38 |           relu);
39 | 
40 |         srcPtr += blockOW * blockC * sizeof(uniform T);
41 |         dstPtr += blockOW * blockC * sizeof(uniform T);
42 |         ow += blockOW;
43 |       }
44 |       else
45 |       {
46 |         // Slow path (padding, no width blocking)
47 |         CPUConvKernel_computeBlock(T, blockOCB, 1)(
48 |           srcPtr, self->src.hByteStride,
49 |           weightPtr, biasPtr,
50 |           dstPtr, self->dst.CByteStride,
51 |           khEnd - khBegin,
52 |         #if KW == 3 && PW == 1
53 |           ow > 0 ? 0 : 1,
54 |           ow < self->dst.W-1 ? 3 : 2,
55 |         #else
56 |           max(PW - ow, 0),
57 |           KW - max(PW + ow - (self->dst.W-1), 0),
58 |         #endif
59 |           relu);
60 | 
61 |         srcPtr += blockC * sizeof(uniform T);
62 |         dstPtr += blockC * sizeof(uniform T);
63 |         ow++;
64 |       }
65 |     }
66 |   }
67 | }


--------------------------------------------------------------------------------
/devices/cpu/cpu_device.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/device.h"
 7 | #include "tasking.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   class CPUEngine;
12 | 
13 |   // CPU instruction set
14 |   enum class CPUArch
15 |   {
16 |     Unknown,
17 |     SSE2,
18 |     SSE41,
19 |     AVX2,
20 |     AVX512,
21 |     NEON
22 |   };
23 | 
24 |   class CPUPhysicalDevice final : public PhysicalDevice
25 |   {
26 |   public:
27 |     explicit CPUPhysicalDevice(int score);
28 |   };
29 | 
30 |   class CPUDevice final : public Device
31 |   {
32 |     friend class CPUEngine;
33 |     friend class DNNLEngine;
34 | 
35 |   public:
36 |     static std::vector<Ref<PhysicalDevice>> getPhysicalDevices();
37 |     static std::string getName();
38 |     static CPUArch getArch();
39 | 
40 |     CPUDevice();
41 | 
42 |     DeviceType getType() const override { return DeviceType::CPU; }
43 | 
44 |   #if !defined(OIDN_DNNL)
45 |     bool needWeightAndBiasOnDevice() const override { return false; } // no need to copy
46 |   #endif
47 |     Storage getPtrStorage(const void* ptr) override;
48 | 
49 |     int getInt(const std::string& name) override;
50 |     void setInt(const std::string& name, int value) override;
51 | 
52 |     void wait() override;
53 | 
54 |   protected:
55 |     void init() override;
56 | 
57 |   private:
58 |     CPUArch arch = CPUArch::Unknown;
59 | 
60 |     int numThreads = 0; // autodetect by default
61 |     bool setAffinity = true;
62 |   };
63 | 
64 | OIDN_NAMESPACE_END
65 | 


--------------------------------------------------------------------------------
/devices/cpu/cpu_image_copy.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "cpu_image_copy.h"
 5 | #include "cpu_image_copy_ispc.h"
 6 | #include "cpu_common.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   CPUImageCopy::CPUImageCopy(CPUEngine* engine)
11 |     : engine(engine)
12 |   {}
13 | 
14 |   void CPUImageCopy::submitKernels(const Ref<CancellationToken>& ct)
15 |   {
16 |     check();
17 | 
18 |     ispc::CPUImageCopyKernel kernel;
19 |     kernel.src = *src;
20 |     kernel.dst = *dst;
21 | 
22 |     engine->submitFunc([=]
23 |     {
24 |       parallel_for(kernel.dst.H, [&](int h)
25 |       {
26 |         ispc::CPUImageCopyKernel_run(&kernel, h);
27 |       });
28 |     }, ct);
29 |   }
30 | 
31 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/cpu/cpu_image_copy.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/image_copy.h"
 7 | #include "cpu_engine.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   class CPUImageCopy final : public ImageCopy
12 |   {
13 |   public:
14 |     explicit CPUImageCopy(CPUEngine* engine);
15 | 
16 |     Engine* getEngine() const override { return engine; }
17 |     void submitKernels(const Ref<CancellationToken>& ct) override;
18 | 
19 |   private:
20 |     CPUEngine* engine;
21 |   };
22 | 
23 | OIDN_NAMESPACE_END
24 | 


--------------------------------------------------------------------------------
/devices/cpu/cpu_image_copy.ispc:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "image_accessor.isph"
 5 | 
 6 | struct CPUImageCopyKernel
 7 | {
 8 |   uniform ImageAccessor src;
 9 |   uniform ImageAccessor dst;
10 | };
11 | 
12 | export void CPUImageCopyKernel_run(const uniform CPUImageCopyKernel* uniform self, uniform int h)
13 | {
14 |   foreach (w = 0 ... self->dst.W)
15 |   {
16 |     vec3f value = Image_get3(self->src, h, w);
17 |     Image_set3(self->dst, h, w, value);
18 |   }
19 | }


--------------------------------------------------------------------------------
/devices/cpu/cpu_input_process.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "cpu_input_process.h"
 5 | #include "cpu_input_process_ispc.h"
 6 | #include "cpu_common.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   CPUInputProcess::CPUInputProcess(CPUEngine* engine, const InputProcessDesc& desc)
11 |     : InputProcess(engine, desc),
12 |       engine(engine)
13 |   {}
14 | 
15 |   void CPUInputProcess::submitKernels(const Ref<CancellationToken>& ct)
16 |   {
17 |     check();
18 | 
19 |     ispc::CPUInputProcessKernel kernel;
20 |     Image nullImage;
21 | 
22 |     kernel.input  = color ? *color : (albedo ? *albedo : *normal);
23 |     kernel.albedo = (color && albedo) ? *albedo : nullImage;
24 |     kernel.normal = (color && normal) ? *normal : nullImage;
25 |     kernel.dst    = *dst;
26 |     kernel.tile   = toISPC(tile);
27 |     kernel.transferFunc = toISPC(*transferFunc);
28 |     kernel.hdr   = hdr;
29 |     kernel.snorm = snorm;
30 | 
31 |     engine->submitFunc([=]
32 |     {
33 |       parallel_for(kernel.dst.H, [&](int hDst)
34 |       {
35 |         ispc::CPUInputProcessKernel_run(&kernel, hDst);
36 |       });
37 |     }, ct);
38 |   }
39 | 
40 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/cpu/cpu_input_process.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/input_process.h"
 7 | #include "cpu_engine.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   class CPUInputProcess final : public InputProcess
12 |   {
13 |   public:
14 |     CPUInputProcess(CPUEngine* engine, const InputProcessDesc& desc);
15 | 
16 |     Engine* getEngine() const override { return engine; }
17 |     void submitKernels(const Ref<CancellationToken>& ct) override;
18 | 
19 |   private:
20 |     CPUEngine* engine;
21 |   };
22 | 
23 | OIDN_NAMESPACE_END
24 | 


--------------------------------------------------------------------------------
/devices/cpu/cpu_module.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "core/context.h"
 5 | #include "cpu_device.h"
 6 | 
 7 | OIDN_NAMESPACE_BEGIN
 8 | 
 9 |   class CPUDeviceFactory : public DeviceFactory
10 |   {
11 |   public:
12 |     Ref<Device> newDevice(const Ref<PhysicalDevice>& physicalDevice) override
13 |     {
14 |       assert(physicalDevice->type == DeviceType::CPU);
15 |       return makeRef<CPUDevice>();
16 |     }
17 |   };
18 | 
19 |   OIDN_DECLARE_INIT_STATIC_MODULE(device_cpu)
20 |   {
21 |     Context::registerDeviceType<CPUDeviceFactory>(DeviceType::CPU, CPUDevice::getPhysicalDevices());
22 |   }
23 | 
24 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/cpu/cpu_output_process.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "cpu_output_process.h"
 5 | #include "cpu_output_process_ispc.h"
 6 | #include "cpu_common.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   CPUOutputProcess::CPUOutputProcess(CPUEngine* engine, const OutputProcessDesc& desc)
11 |     : OutputProcess(desc),
12 |       engine(engine)
13 |   {}
14 | 
15 |   void CPUOutputProcess::submitKernels(const Ref<CancellationToken>& ct)
16 |   {
17 |     check();
18 | 
19 |     ispc::CPUOutputProcessKernel kernel;
20 | 
21 |     kernel.src = *src;
22 |     kernel.dst = *dst;
23 |     kernel.tile = toISPC(tile);
24 |     kernel.transferFunc = toISPC(*transferFunc);
25 |     kernel.hdr = hdr;
26 |     kernel.snorm = snorm;
27 | 
28 |     engine->submitFunc([=]
29 |     {
30 |       parallel_for(kernel.tile.H, [&](int h)
31 |       {
32 |         ispc::CPUOutputProcessKernel_run(&kernel, h);
33 |       });
34 |     }, ct);
35 |   }
36 | 
37 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/cpu/cpu_output_process.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/output_process.h"
 7 | #include "cpu_engine.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   class CPUOutputProcess final : public OutputProcess
12 |   {
13 |   public:
14 |     CPUOutputProcess(CPUEngine* engine, const OutputProcessDesc& desc);
15 | 
16 |     Engine* getEngine() const override { return engine; }
17 |     void submitKernels(const Ref<CancellationToken>& ct) override;
18 | 
19 |   private:
20 |     CPUEngine* engine;
21 |   };
22 | 
23 | OIDN_NAMESPACE_END
24 | 


--------------------------------------------------------------------------------
/devices/cpu/cpu_output_process.ispc:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "tensor_accessor.isph"
 5 | #include "image_accessor.isph"
 6 | #include "color.isph"
 7 | #include "tile.isph"
 8 | 
 9 | struct CPUOutputProcessKernel
10 | {
11 |   // Source
12 |   uniform TensorAccessor3D src;
13 | 
14 |   // Destination
15 |   uniform ImageAccessor dst;
16 | 
17 |   // Tile
18 |   uniform Tile tile;
19 | 
20 |   // Transfer function
21 |   uniform TransferFunction transferFunc;
22 |   uniform bool hdr;
23 |   uniform bool snorm; // signed normalized ([-1..1])
24 | };
25 | 
26 | export void CPUOutputProcessKernel_run(const uniform CPUOutputProcessKernel* uniform self,
27 |                                        uniform int h)
28 | {
29 |   const uniform int hSrc = h + self->tile.hSrcBegin;
30 |   const uniform int hDst = h + self->tile.hDstBegin;
31 | 
32 |   const uniform float outputScale = TransferFunction_getOutputScale(&self->transferFunc);
33 | 
34 |   foreach (w = 0 ... self->tile.W)
35 |   {
36 |     const int wSrc = w + self->tile.wSrcBegin;
37 |     const int wDst = w + self->tile.wDstBegin;
38 | 
39 |     // Load
40 |     vec3f value = Tensor_get3(self->src, 0, hSrc, wSrc);
41 | 
42 |     // The CNN output may contain negative values or even NaNs, so it must be sanitized
43 |     value = clamp(nan_to_zero(value), 0.f, pos_max);
44 | 
45 |     // Apply the inverse transfer function
46 |     value = self->transferFunc.inverse(&self->transferFunc, value);
47 | 
48 |     // Average the channels if there is only one output channel
49 |     if (self->dst.C == 1)
50 |       value = make_vec3f((value.x + value.y + value.z) * (1.f / 3.f));
51 | 
52 |     // Sanitize
53 |     if (self->snorm)
54 |     {
55 |       // Transform to [-1..1]
56 |       value = value * 2.f - 1.f;
57 |       value = max(value, -1.f);
58 |     }
59 |     if (!self->hdr)
60 |       value = min(value, 1.f);
61 | 
62 |     // Scale
63 |     value = value * outputScale;
64 | 
65 |     // Store
66 |     Image_set3(self->dst, hDst, wDst, value);
67 |   }
68 | }


--------------------------------------------------------------------------------
/devices/cpu/cpu_pool.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "cpu_pool.h"
 5 | #include "cpu_pool_ispc.h"
 6 | #include "cpu_common.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   CPUPool::CPUPool(CPUEngine* engine, const PoolDesc& desc)
11 |     : Pool(desc),
12 |       engine(engine)
13 |   {
14 |     if (srcDesc.layout != TensorLayout::Chw8c &&
15 |         srcDesc.layout != TensorLayout::Chw16c)
16 |       throw std::invalid_argument("unsupported pooling source layout");
17 |   }
18 | 
19 |   void CPUPool::submitKernels(const Ref<CancellationToken>& ct)
20 |   {
21 |     if (!src || !dst)
22 |       throw std::logic_error("pooling source/destination not set");
23 | 
24 |     const int blockC = getTensorLayoutInfo(dstDesc.layout).blockC;
25 | 
26 |     ispc::CPUPoolKernel kernel;
27 |     kernel.src = *src;
28 |     kernel.dst = *dst;
29 | 
30 |     engine->submitFunc([=]
31 |     {
32 |       parallel_for(kernel.dst.C / blockC, kernel.dst.H, [&](int cb, int h)
33 |       {
34 |         ispc::CPUPoolKernel_run(&kernel, cb, h);
35 |       });
36 |     }, ct);
37 |   }
38 | 
39 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/cpu/cpu_pool.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/pool.h"
 7 | #include "cpu_engine.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   class CPUPool final : public Pool
12 |   {
13 |   public:
14 |     CPUPool(CPUEngine* engine, const PoolDesc& desc);
15 | 
16 |     Engine* getEngine() const override { return engine; }
17 |     void submitKernels(const Ref<CancellationToken>& ct) override;
18 | 
19 |   private:
20 |     CPUEngine* engine;
21 |   };
22 | 
23 | OIDN_NAMESPACE_END
24 | 


--------------------------------------------------------------------------------
/devices/cpu/cpu_pool.ispc:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "tensor_accessor.isph"
 5 | 
 6 | struct CPUPoolKernel
 7 | {
 8 |   uniform TensorAccessor3D src;
 9 |   uniform TensorAccessor3D dst;
10 | };
11 | 
12 | export void CPUPoolKernel_run(const uniform CPUPoolKernel* uniform self,
13 |                               uniform int cb, uniform int h)
14 | {
15 |   const uniform size_t H = (uniform size_t)self->dst.H;
16 |   const uniform size_t W = (uniform size_t)self->dst.W;
17 | 
18 |   const uniform size_t offset = (cb*H + h) * (W*B);
19 |   uniform float* const uniform dstPtr_line  = (uniform float* uniform)self->dst.ptr + offset;
20 |   uniform float* const uniform srcPtr_line0 = (uniform float* uniform)self->src.ptr + offset * 4;
21 |   uniform float* const uniform srcPtr_line1 = srcPtr_line0 + W*2*B; // next line
22 | 
23 |   for (uniform size_t w = 0; w < W; ++w)
24 |   {
25 |     const float value0 = *((varying float* uniform)&srcPtr_line0[w*2*B  ]);
26 |     const float value1 = *((varying float* uniform)&srcPtr_line0[w*2*B+B]);
27 |     const float value2 = *((varying float* uniform)&srcPtr_line1[w*2*B  ]);
28 |     const float value3 = *((varying float* uniform)&srcPtr_line1[w*2*B+B]);
29 | 
30 |     const float value = max(max(value0, value1), max(value2, value3));
31 |     streaming_store(&dstPtr_line[w*B], value);
32 |   }
33 | }


--------------------------------------------------------------------------------
/devices/cpu/cpu_upsample.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "cpu_upsample.h"
 5 | #include "cpu_upsample_ispc.h"
 6 | #include "cpu_common.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   CPUUpsample::CPUUpsample(CPUEngine* engine, const UpsampleDesc& desc)
11 |     : Upsample(desc),
12 |       engine(engine)
13 |   {
14 |     if (srcDesc.layout != TensorLayout::chw &&
15 |         srcDesc.layout != TensorLayout::Chw8c &&
16 |         srcDesc.layout != TensorLayout::Chw16c)
17 |       throw std::invalid_argument("unsupported upsampling source layout");
18 |   }
19 | 
20 |   void CPUUpsample::submitKernels(const Ref<CancellationToken>& ct)
21 |   {
22 |     if (!src || !dst)
23 |       throw std::logic_error("upsampling source/destination not set");
24 | 
25 |     if (srcDesc.layout != TensorLayout::chw)
26 |     {
27 |       const int blockC = getTensorLayoutInfo(srcDesc.layout).blockC;
28 | 
29 |       ispc::CPUUpsampleKernel kernel;
30 |       kernel.src = *src;
31 |       kernel.dst = *dst;
32 | 
33 |       engine->submitFunc([=]
34 |       {
35 |         parallel_for(kernel.src.C / blockC, kernel.src.H, [&](int cb, int h)
36 |         {
37 |           ispc::CPUUpsampleKernel_run(&kernel, cb, h);
38 |         });
39 |       }, ct);
40 |     }
41 |     else
42 |     {
43 |       const int C = src->getPaddedC();
44 |       const size_t H = src->getH();
45 |       const size_t W = src->getW();
46 |       const float* srcPtr = (float*)src->getPtr();
47 |       float* dstPtr = (float*)dst->getPtr();
48 | 
49 |       engine->submitFunc([=]
50 |       {
51 |         parallel_for(C, H, [&](int c, size_t h)
52 |         {
53 |           const size_t offset = (c*H + h) * W;
54 |           const float* srcPtr_line = srcPtr + offset;
55 |           float* dstPtr_line0 = dstPtr + offset * 4;
56 |           float* dstPtr_line1 = dstPtr_line0 + W*2; // next line
57 | 
58 |           #pragma unroll(16)
59 |           for (size_t w = 0; w < W; ++w)
60 |           {
61 |             // Load value
62 |             const float value = srcPtr_line[w];
63 | 
64 |             // Store value 2x2
65 |             dstPtr_line0[w*2  ] = value;
66 |             dstPtr_line0[w*2+1] = value;
67 |             dstPtr_line1[w*2  ] = value;
68 |             dstPtr_line1[w*2+1] = value;
69 |           }
70 |         });
71 |       }, ct);
72 |     }
73 |   }
74 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/cpu/cpu_upsample.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/upsample.h"
 7 | #include "cpu_engine.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   class CPUUpsample final : public Upsample
12 |   {
13 |   public:
14 |     CPUUpsample(CPUEngine* engine, const UpsampleDesc& desc);
15 | 
16 |     Engine* getEngine() const override { return engine; }
17 |     void submitKernels(const Ref<CancellationToken>& ct) override;
18 | 
19 |   private:
20 |     CPUEngine* engine;
21 |   };
22 | 
23 | OIDN_NAMESPACE_END
24 | 


--------------------------------------------------------------------------------
/devices/cpu/cpu_upsample.ispc:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "tensor_accessor.isph"
 5 | 
 6 | struct CPUUpsampleKernel
 7 | {
 8 |   uniform TensorAccessor3D src;
 9 |   uniform TensorAccessor3D dst;
10 | };
11 | 
12 | export void CPUUpsampleKernel_run(const uniform CPUUpsampleKernel* uniform self,
13 |                                   uniform int cb, uniform int h)
14 | {
15 |   const uniform size_t H = (uniform size_t)self->src.H;
16 |   const uniform size_t W = (uniform size_t)self->src.W;
17 | 
18 |   const uniform size_t offset = (cb*H + h) * (W*B);
19 |   uniform float* const uniform srcPtr_line  = (uniform float* uniform)self->src.ptr + offset;
20 |   uniform float* const uniform dstPtr_line0 = (uniform float* uniform)self->dst.ptr + offset * 4;
21 |   uniform float* const uniform dstPtr_line1 = dstPtr_line0 + W*2*B; // next line
22 | 
23 |   for (uniform size_t w = 0; w < W; ++w)
24 |   {
25 |     const float value = *((varying float* uniform)&srcPtr_line[w*B]);
26 | 
27 |     streaming_store(&dstPtr_line0[w*2*B  ], value);
28 |     streaming_store(&dstPtr_line0[w*2*B+B], value);
29 |     streaming_store(&dstPtr_line1[w*2*B  ], value);
30 |     streaming_store(&dstPtr_line1[w*2*B+B], value);
31 |   }
32 | }


--------------------------------------------------------------------------------
/devices/cpu/dnnl/dnnl_common.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/tensor.h"
 7 | #include "dnnl_engine.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   dnnl::memory::data_type toDNNL(DataType dataType);
12 |   dnnl::memory::desc toDNNL(const TensorDesc& td);
13 | 
14 |   // Creates a DNNL memory structure for a buffer
15 |   dnnl::memory toDNNL(const Ref<Buffer>& buffer);
16 | 
17 |   // Returns the internal DNNL memory structure of a DNNLTensor
18 |   const dnnl::memory& getDNNL(const Ref<Tensor>& tensor);
19 | 
20 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/cpu/dnnl/dnnl_conv.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "dnnl_conv.h"
 5 | #include "dnnl_tensor.h"
 6 | 
 7 | OIDN_NAMESPACE_BEGIN
 8 | 
 9 |   DNNLConv::DNNLConv(DNNLEngine* engine, const ConvDesc& desc)
10 |     : Conv(desc),
11 |       engine(engine)
12 |   {
13 |     const dnnl::memory::dims strides = {1, 1};
14 |     const dnnl::memory::dims padding = {1, 1};
15 | 
16 |     // Incorporate activation
17 |     dnnl::primitive_attr attr;
18 |     if (activation == Activation::ReLU)
19 |     {
20 |       dnnl::post_ops ops;
21 |       ops.append_eltwise(
22 |         dnnl::algorithm::eltwise_relu,
23 |         0.f, // alpha
24 |         0.f  // beta
25 |       );
26 |       attr.set_post_ops(ops);
27 |     }
28 |     attr.set_scratchpad_mode(dnnl::scratchpad_mode::user);
29 | 
30 |     primDesc = dnnl::convolution_forward::primitive_desc(
31 |       engine->getDNNLEngine(),
32 |       dnnl::prop_kind::forward_inference, dnnl::algorithm::convolution_direct,
33 |       toDNNL(srcDesc),
34 |       toDNNL(weightDesc),
35 |       toDNNL(biasDesc),
36 |       toDNNL(dstDesc),
37 |       strides, padding, padding,
38 |       attr);
39 |   }
40 | 
41 |   size_t DNNLConv::getScratchByteSize()
42 |   {
43 |     return primDesc.scratchpad_desc().get_size();
44 |   }
45 | 
46 |   void DNNLConv::setScratch(const Ref<Buffer>& scratch)
47 |   {
48 |     this->scratch = scratch;
49 |     args[DNNL_ARG_SCRATCHPAD] = toDNNL(scratch);
50 |   }
51 | 
52 |   void DNNLConv::updateSrc()
53 |   {
54 |     args[DNNL_ARG_SRC] = getDNNL(src);
55 |   }
56 | 
57 |   void DNNLConv::updateWeight()
58 |   {
59 |     args[DNNL_ARG_WEIGHTS] = getDNNL(weight);
60 |   }
61 | 
62 |   void DNNLConv::updateBias()
63 |   {
64 |     args[DNNL_ARG_BIAS] = getDNNL(bias);
65 |   }
66 | 
67 |   void DNNLConv::updateDst()
68 |   {
69 |     args[DNNL_ARG_DST] = getDNNL(dst);
70 |   }
71 | 
72 |   void DNNLConv::finalize()
73 |   {
74 |     prim = dnnl::convolution_forward(primDesc);
75 |   }
76 | 
77 |   void DNNLConv::submitKernels(const Ref<CancellationToken>& ct)
78 |   {
79 |     if (!prim)
80 |       throw std::logic_error("convolution not finalized");
81 |     if (!src || !dst || !weight || !bias)
82 |       throw std::logic_error("convolution source/weight/bias/destination not set");
83 | 
84 |     engine->submitFunc([=] { prim.execute(engine->getDNNLStream(), args); }, ct);
85 |   }
86 | 
87 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/cpu/dnnl/dnnl_conv.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/conv.h"
 7 | #include "dnnl_common.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   class DNNLConv final : public Conv
12 |   {
13 |   public:
14 |     DNNLConv(DNNLEngine* engine, const ConvDesc& desc);
15 | 
16 |     Engine* getEngine() const override { return engine; }
17 | 
18 |     size_t getScratchByteSize() override;
19 |     void setScratch(const Ref<Buffer>& scratch) override;
20 | 
21 |     void finalize() override;
22 |     void submitKernels(const Ref<CancellationToken>& ct) override;
23 | 
24 |   private:
25 |     void updateSrc() override;
26 |     void updateWeight() override;
27 |     void updateBias() override;
28 |     void updateDst() override;
29 | 
30 |     DNNLEngine* engine;
31 |     dnnl::convolution_forward::primitive_desc primDesc;
32 |     dnnl::convolution_forward prim;
33 |     std::unordered_map<int, dnnl::memory> args;
34 |     Ref<Buffer> scratch;
35 |   };
36 | 
37 | OIDN_NAMESPACE_END
38 | 


--------------------------------------------------------------------------------
/devices/cpu/dnnl/dnnl_engine.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "dnnl_engine.h"
 5 | #include "dnnl_tensor.h"
 6 | #include "dnnl_conv.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   DNNLEngine::DNNLEngine(CPUDevice* device, int numThreads)
11 |     : CPUEngine(device, numThreads)
12 |   {
13 |     dnnl_set_verbose(clamp(device->verbose - 2, 0, 2)); // unfortunately this is not per-device but global
14 |     dnnlEngine = dnnl::engine(dnnl::engine::kind::cpu, 0);
15 |     dnnlStream = dnnl::stream(dnnlEngine);
16 |   }
17 | 
18 |   Ref<Tensor> DNNLEngine::newTensor(const TensorDesc& desc, Storage storage)
19 |   {
20 |     if (!isSupported(desc))
21 |       throw std::invalid_argument("unsupported tensor descriptor");
22 | 
23 |     return makeRef<DNNLTensor>(this, desc, storage);
24 |   }
25 | 
26 |   Ref<Tensor> DNNLEngine::newTensor(const Ref<Buffer>& buffer, const TensorDesc& desc, size_t byteOffset)
27 |   {
28 |     if (!isSupported(desc))
29 |       throw std::invalid_argument("unsupported tensor descriptor");
30 |     if (buffer->getEngine() != this)
31 |       throw std::invalid_argument("buffer was created by a different engine");
32 | 
33 |     return makeRef<DNNLTensor>(buffer, desc, byteOffset);
34 |   }
35 | 
36 |   Ref<Conv> DNNLEngine::newConv(const ConvDesc& desc)
37 |   {
38 |     return makeRef<DNNLConv>(this, desc);
39 |   }
40 | 
41 | OIDN_NAMESPACE_END
42 | 


--------------------------------------------------------------------------------
/devices/cpu/dnnl/dnnl_engine.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "../cpu_engine.h"
 7 | #include "mkl-dnn/include/dnnl.hpp"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   class DNNLEngine final : public CPUEngine
12 |   {
13 |   public:
14 |     DNNLEngine(CPUDevice* device, int numThreads);
15 | 
16 |     oidn_inline dnnl::engine& getDNNLEngine() { return dnnlEngine; }
17 |     oidn_inline dnnl::stream& getDNNLStream() { return dnnlStream; }
18 | 
19 |     Ref<Tensor> newTensor(const TensorDesc& desc, Storage storage) override;
20 |     Ref<Tensor> newTensor(const Ref<Buffer>& buffer, const TensorDesc& desc, size_t byteOffset) override;
21 | 
22 |     // Ops
23 |     Ref<Conv> newConv(const ConvDesc& desc) override;
24 | 
25 |   private:
26 |     dnnl::engine dnnlEngine;
27 |     dnnl::stream dnnlStream;
28 |   };
29 | 
30 | OIDN_NAMESPACE_END
31 | 


--------------------------------------------------------------------------------
/devices/cpu/dnnl/dnnl_tensor.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "dnnl_tensor.h"
 5 | 
 6 | OIDN_NAMESPACE_BEGIN
 7 | 
 8 |   DNNLTensor::DNNLTensor(DNNLEngine* engine, const TensorDesc& desc, Storage storage)
 9 |     : Tensor(engine->newBuffer(desc.getByteSize(), storage), desc)
10 |   {
11 |     mem = dnnl::memory(toDNNL(getDesc()), engine->getDNNLEngine(), buffer->getPtr());
12 |   }
13 | 
14 |   DNNLTensor::DNNLTensor(const Ref<Buffer>& buffer, const TensorDesc& desc, size_t byteOffset)
15 |     : Tensor(buffer, desc, byteOffset)
16 |   {
17 |     if (byteOffset + getByteSize() > buffer->getByteSize())
18 |       throw Exception(Error::InvalidArgument, "buffer region is out of bounds");
19 | 
20 |     mem = dnnl::memory(toDNNL(getDesc()),
21 |                        static_cast<DNNLEngine*>(buffer->getEngine())->getDNNLEngine(),
22 |                        static_cast<char*>(buffer->getPtr()) + byteOffset);
23 |   }
24 | 
25 |   void DNNLTensor::postRealloc()
26 |   {
27 |     if (buffer)
28 |       mem.set_data_handle(static_cast<char*>(buffer->getPtr()) + byteOffset);
29 |   }
30 | 
31 | OIDN_NAMESPACE_END
32 | 


--------------------------------------------------------------------------------
/devices/cpu/dnnl/dnnl_tensor.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/tensor.h"
 7 | #include "dnnl_common.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   // Native DNNL tensor
12 |   class DNNLTensor final : public Tensor
13 |   {
14 |   public:
15 |     DNNLTensor(DNNLEngine* engine, const TensorDesc& desc, Storage storage);
16 |     DNNLTensor(const Ref<Buffer>& buffer, const TensorDesc& desc, size_t byteOffset);
17 | 
18 |     void* getPtr() const override { return mem.get_data_handle(); }
19 |     const dnnl::memory& getDNNLMemory() const { return mem; }
20 | 
21 |   private:
22 |     void postRealloc() override;
23 | 
24 |     dnnl::memory mem;
25 |   };
26 | 
27 | OIDN_NAMESPACE_END
28 | 


--------------------------------------------------------------------------------
/devices/cpu/platform.ispc:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "platform.isph"
 5 | 
 6 | // -----------------------------------------------------------------------------------------------
 7 | // System information
 8 | // -----------------------------------------------------------------------------------------------
 9 | 
10 | enum CPUArch
11 | {
12 |   CPUArch_Unknown,
13 |   CPUArch_SSE2,
14 |   CPUArch_SSE4,
15 |   CPUArch_AVX2,
16 |   CPUArch_AVX512,
17 |   CPUArch_NEON
18 | };
19 | 
20 | export uniform CPUArch getCPUArch()
21 | {
22 | #if defined(ISPC_TARGET_SSE2)
23 |   return CPUArch_SSE2;
24 | #elif defined(ISPC_TARGET_SSE4)
25 |   return CPUArch_SSE4;
26 | #elif defined(ISPC_TARGET_AVX2)
27 |   return CPUArch_AVX2;
28 | #elif defined(ISPC_TARGET_AVX512SKX) || defined(ISPC_TARGET_AVX512SPR)
29 |   return CPUArch_AVX512;
30 | #elif defined(ISPC_TARGET_NEON)
31 |   return CPUArch_NEON;
32 | #endif
33 |   return CPUArch_Unknown;
34 | }


--------------------------------------------------------------------------------
/devices/cpu/platform.isph:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | // -------------------------------------------------------------------------------------------------
 7 | // Macros
 8 | // -------------------------------------------------------------------------------------------------
 9 | 
10 | #define _OIDN_CONCAT(a, b) a##b
11 | #define OIDN_CONCAT(a, b) _OIDN_CONCAT(a, b)
12 | 
13 | // -------------------------------------------------------------------------------------------------
14 | // Types
15 | // -------------------------------------------------------------------------------------------------
16 | 
17 | #ifndef ISPC_UINT_IS_DEFINED
18 | typedef unsigned int8  uint8;
19 | typedef unsigned int16 uint16;
20 | typedef unsigned int32 uint32;
21 | typedef unsigned int64 uint64;
22 | #endif


--------------------------------------------------------------------------------
/devices/cpu/tasking.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "tasking.h"
 5 | 
 6 | OIDN_NAMESPACE_BEGIN
 7 | 
 8 |   // -----------------------------------------------------------------------------------------------
 9 |   // PinningObserver
10 |   // -----------------------------------------------------------------------------------------------
11 | 
12 |   PinningObserver::PinningObserver(const std::shared_ptr<ThreadAffinity>& affinity)
13 |     : affinity(affinity)
14 |   {
15 |     observe(true);
16 |   }
17 | 
18 |   PinningObserver::PinningObserver(const std::shared_ptr<ThreadAffinity>& affinity, tbb::task_arena& arena)
19 |     : tbb::task_scheduler_observer(arena),
20 |       affinity(affinity)
21 |   {
22 |     observe(true);
23 |   }
24 | 
25 |   PinningObserver::~PinningObserver()
26 |   {
27 |     observe(false);
28 |   }
29 | 
30 |   void PinningObserver::on_scheduler_entry(bool isWorker)
31 |   {
32 |     const int threadIndex = tbb::this_task_arena::current_thread_index();
33 |     if (threadIndex >= 0)
34 |       affinity->set(threadIndex);
35 |   }
36 | 
37 |   void PinningObserver::on_scheduler_exit(bool isWorker)
38 |   {
39 |     const int threadIndex = tbb::this_task_arena::current_thread_index();
40 |     if (threadIndex >= 0)
41 |       affinity->restore(threadIndex);
42 |   }
43 | 
44 | OIDN_NAMESPACE_END
45 | 


--------------------------------------------------------------------------------
/devices/cpu/tile.isph:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | struct Tile
 5 | {
 6 |   uniform int hSrcBegin;
 7 |   uniform int wSrcBegin;
 8 |   uniform int hDstBegin;
 9 |   uniform int wDstBegin;
10 |   uniform int H;
11 |   uniform int W;
12 | };


--------------------------------------------------------------------------------
/devices/cuda/cuda_conv.cu:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "cuda_conv.h"
 5 | #include "cutlass_conv.h"
 6 | 
 7 | OIDN_NAMESPACE_BEGIN
 8 | 
 9 |   Ref<Conv> newCUDAConv(CUDAEngine* engine, const ConvDesc& desc)
10 |   {
11 |     // Get the list of kernels supported by the engine
12 |     std::vector<CutlassConvFactory> kernels;
13 |     const int smArch = engine->getSMArch();
14 |     if (smArch >= 80)
15 |       kernels = getCutlassConvInstances<80>();
16 |     else if (smArch >= 75)
17 |       kernels = getCutlassConvInstances<75>();
18 |     else if (smArch >= 70)
19 |       kernels = getCutlassConvInstances<70>();
20 |     else
21 |       throw std::runtime_error("unsupported convolution");
22 | 
23 |     // Select the likely fastest compatible kernel
24 |     const auto problemSize = toCutlassProblemSize(desc);
25 |     const auto gemmSize = cutlass::conv::implicit_gemm_problem_size(cutlass::conv::Operator::kFprop, problemSize);
26 |     const size_t M = gemmSize.m();
27 |     const size_t N = gemmSize.n();
28 |     const size_t K = gemmSize.k();
29 | 
30 |     const DataType accumType = desc.fastMath ? desc.srcDesc.dataType : DataType::Float32;
31 | 
32 |     const CutlassConvFactory* bestKernel = nullptr;
33 |     int bestBlockSize = 0;
34 |     size_t bestCost = std::numeric_limits<size_t>::max();
35 | 
36 |     for (const auto& kernel : kernels)
37 |     {
38 |       if (kernel.dataType != desc.srcDesc.dataType || kernel.accumType < accumType)
39 |         continue;
40 | 
41 |       const int blockSize = kernel.blockM * kernel.blockN * kernel.blockK;
42 |       const size_t cost = round_up(M, kernel.blockM) * round_up(N, kernel.blockN) * round_up(K, kernel.blockK);
43 | 
44 |       if ((cost < bestCost) ||
45 |           (cost == bestCost && blockSize > bestBlockSize) ||
46 |           (cost == bestCost && blockSize == bestBlockSize && kernel.accumType == accumType))
47 |       {
48 |         bestKernel = &kernel;
49 |         bestBlockSize = blockSize;
50 |         bestCost = cost;
51 |       }
52 |     }
53 | 
54 |     if (!bestKernel)
55 |       throw std::runtime_error("unsupported convolution");
56 | 
57 |     return bestKernel->make(engine, desc);
58 |   }
59 | 
60 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/cuda/cuda_conv.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/conv.h"
 7 | #include "cuda_engine.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   Ref<Conv> newCUDAConv(CUDAEngine* engine, const ConvDesc& desc);
12 | 
13 | OIDN_NAMESPACE_END
14 | 


--------------------------------------------------------------------------------
/devices/cuda/cuda_device.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/device.h"
 7 | #if defined(OIDN_DEVICE_CUDA_API_DRIVER)
 8 |   #include "curtn.h"
 9 | #endif
10 | #include <cuda_runtime.h>
11 | 
12 | OIDN_NAMESPACE_BEGIN
13 | 
14 |   void checkError(cudaError_t error);
15 | 
16 |   class CUDAEngine;
17 | 
18 |   class CUDAPhysicalDevice : public PhysicalDevice
19 |   {
20 |   public:
21 |     int deviceID;
22 | 
23 |     CUDAPhysicalDevice(int deviceID, const cudaDeviceProp& prop, int score);
24 |   };
25 | 
26 |   class CUDADevice final : public Device
27 |   {
28 |     friend class CUDAEngine;
29 | 
30 |   public:
31 |     static std::vector<Ref<PhysicalDevice>> getPhysicalDevices();
32 |     static bool isSupported(const cudaDeviceProp& prop);
33 |     static bool isSupported(int deviceID);
34 | 
35 |     CUDADevice(int deviceID, cudaStream_t stream);
36 |     explicit CUDADevice(const Ref<CUDAPhysicalDevice>& physicalDevice);
37 |     ~CUDADevice();
38 | 
39 |     void enter() override;
40 |     void leave() override;
41 | 
42 |     DeviceType getType() const override { return DeviceType::CUDA; }
43 | 
44 |     Storage getPtrStorage(const void* ptr) override;
45 | 
46 |     void wait() override;
47 | 
48 |   private:
49 |     void init() override;
50 | 
51 |     int deviceID = 0;
52 |   #if defined(OIDN_DEVICE_CUDA_API_DRIVER)
53 |     CUdevice deviceHandle = -1;
54 |     CUcontext context = nullptr;
55 |   #else
56 |     int prevDeviceID = -1;
57 |   #endif
58 |     cudaStream_t stream = nullptr;
59 | 
60 |     int maxWorkGroupSize = 0;
61 |     int subgroupSize = 0;
62 |     int smArch = 0; // compute capability
63 |   };
64 | 
65 | OIDN_NAMESPACE_END
66 | 


--------------------------------------------------------------------------------
/devices/cuda/cuda_external_buffer.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/buffer.h"
 7 | #include "cuda_engine.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   class CUDAExternalBuffer : public USMBuffer
12 |   {
13 |   public:
14 |     CUDAExternalBuffer(Engine* engine,
15 |                        ExternalMemoryTypeFlag fdType,
16 |                        int fd, size_t byteSize);
17 | 
18 |     CUDAExternalBuffer(Engine* engine,
19 |                        ExternalMemoryTypeFlag handleType,
20 |                        void* handle, const void* name, size_t byteSize);
21 | 
22 |     ~CUDAExternalBuffer();
23 | 
24 |   private:
25 |     cudaExternalMemory_t extMem;
26 | 
27 |     void init(const cudaExternalMemoryHandleDesc& handleDesc);
28 |   };
29 | 
30 | OIDN_NAMESPACE_END
31 | 


--------------------------------------------------------------------------------
/devices/cuda/cuda_module.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "core/context.h"
 5 | #include "cuda_device.h"
 6 | 
 7 | OIDN_NAMESPACE_BEGIN
 8 | 
 9 |   class CUDADeviceFactory : public CUDADeviceFactoryBase
10 |   {
11 |   public:
12 |     bool isDeviceSupported(int deviceID) override
13 |     {
14 |       return CUDADevice::isSupported(deviceID);
15 |     }
16 | 
17 |     Ref<Device> newDevice(const int* deviceIDs, const cudaStream_t* streams, int numPairs) override
18 |     {
19 |       if (numPairs != 1)
20 |         throw Exception(Error::InvalidArgument, "invalid number of CUDA devices/streams");
21 |       if (deviceIDs == nullptr)
22 |         throw Exception(Error::InvalidArgument, "array of CUDA devices is null");
23 |       if (streams == nullptr)
24 |         throw Exception(Error::InvalidArgument, "array of CUDA streams is null");
25 | 
26 |       return makeRef<CUDADevice>(deviceIDs[0], streams[0]);
27 |     }
28 | 
29 |     Ref<Device> newDevice(const Ref<PhysicalDevice>& physicalDevice) override
30 |     {
31 |       assert(physicalDevice->type == DeviceType::CUDA);
32 |       return makeRef<CUDADevice>(staticRefCast<CUDAPhysicalDevice>(physicalDevice));
33 |     }
34 |   };
35 | 
36 |   OIDN_DECLARE_INIT_MODULE(device_cuda)
37 |   {
38 |   #if defined(OIDN_DEVICE_CUDA_API_DRIVER)
39 |     if (curtn::init() != cudaSuccess)
40 |       return;
41 |   #endif
42 | 
43 |     Context::registerDeviceType<CUDADeviceFactory>(DeviceType::CUDA, CUDADevice::getPhysicalDevices());
44 |   }
45 | 
46 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/cuda/curtn.h:
--------------------------------------------------------------------------------
 1 | // CURTN: a nano implementation of the CUDA Runtime API on top of the Driver API
 2 | // Copyright 2024 Intel Corporation
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | #include <cuda.h>
 6 | #include <cuda_runtime.h>
 7 | 
 8 | namespace curtn
 9 | {
10 |   // Unlike CUDA Runtime, CURTN requires explicit initialization before the first API call
11 |   cudaError_t init();
12 | 
13 |   // Unlike CUDA Runtime, CURTN requires explicit initialization and cleanup of the current context
14 |   cudaError_t initContext();
15 |   cudaError_t cleanupContext();
16 | }


--------------------------------------------------------------------------------
/devices/cuda/cutlass_conv_sm70.cu:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "cutlass_conv.h"
 5 | 
 6 | OIDN_NAMESPACE_BEGIN
 7 | 
 8 | // Volta (SM 7.0)
 9 | template<>
10 | std::vector<CutlassConvFactory> getCutlassConvInstances<70>()
11 | {
12 |   using namespace cutlass::arch;
13 |   using cutlass::gemm::GemmShape;
14 | 
15 |   return
16 |   {
17 |     CutlassConvInstance<half, float, Sm70, GemmShape<256, 32, 32>, GemmShape<64, 32, 32>, 2>::get(),
18 |     CutlassConvInstance<half, float, Sm70, GemmShape<256, 64, 32>, GemmShape<64, 64, 32>, 2>::get(),
19 | 
20 |     CutlassConvInstance<half, half,  Sm70, GemmShape<256, 32, 32>, GemmShape<64, 32, 32>, 2>::get(),
21 |     CutlassConvInstance<half, half,  Sm70, GemmShape<256, 64, 32>, GemmShape<64, 64, 32>, 2>::get(),
22 |   };
23 | }
24 | 
25 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/cuda/cutlass_conv_sm75.cu:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "cutlass_conv.h"
 5 | 
 6 | OIDN_NAMESPACE_BEGIN
 7 | 
 8 | // Turing (SM 7.5)
 9 | template<>
10 | std::vector<CutlassConvFactory> getCutlassConvInstances<75>()
11 | {
12 |   using namespace cutlass::arch;
13 |   using cutlass::gemm::GemmShape;
14 | 
15 |   return
16 |   {
17 |     CutlassConvInstance<half, float, Sm75, GemmShape<256, 32, 32>, GemmShape<64, 32, 32>, 2>::get(),
18 |     CutlassConvInstance<half, float, Sm75, GemmShape<256, 64, 32>, GemmShape<64, 64, 32>, 2>::get(),
19 | 
20 |     CutlassConvInstance<half, half,  Sm75, GemmShape<256, 32, 32>, GemmShape<64, 32, 32>, 2>::get(),
21 |     CutlassConvInstance<half, half,  Sm75, GemmShape<256, 64, 32>, GemmShape<64, 64, 32>, 2>::get(),
22 |   };
23 | }
24 | 
25 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/cuda/cutlass_conv_sm80.cu:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "cutlass_conv.h"
 5 | 
 6 | OIDN_NAMESPACE_BEGIN
 7 | 
 8 | // Ampere (SM 8.0), Ada Lovelace (SM 8.9), Hopper (SM 9.0), Blackwell (SM 10.0, 12.0)
 9 | template<>
10 | std::vector<CutlassConvFactory> getCutlassConvInstances<80>()
11 | {
12 |   using namespace cutlass::arch;
13 |   using cutlass::gemm::GemmShape;
14 | 
15 |   return
16 |   {
17 |     CutlassConvInstance<half, float, Sm80, GemmShape<256, 32, 32>, GemmShape<64, 32, 32>, 3 /*4*/>::get(),
18 |     CutlassConvInstance<half, float, Sm80, GemmShape<256, 64, 32>, GemmShape<64, 64, 32>, 3>::get(),
19 | 
20 |     CutlassConvInstance<half, half,  Sm80, GemmShape<256, 32, 32>, GemmShape<64, 32, 32>, 3 /*4*/>::get(),
21 |     CutlassConvInstance<half, half,  Sm80, GemmShape<256, 64, 32>, GemmShape<64, 64, 32>, 3>::get(),
22 |   };
23 | }
24 | 
25 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/gpu/gpu_image_copy.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/kernel.h"
 7 | #include "core/image_accessor.h"
 8 | 
 9 | #if !defined(OIDN_COMPILE_METAL_DEVICE)
10 |   #include "core/image_copy.h"
11 | #endif
12 | 
13 | OIDN_NAMESPACE_BEGIN
14 | 
15 |   struct GPUImageCopyKernel
16 |   {
17 |     ImageAccessor src;
18 |     ImageAccessor dst;
19 | 
20 |     oidn_device_inline void operator ()(const oidn_private WorkItem<2>& it) const
21 |     {
22 |       const int h = it.getGlobalID<0>();
23 |       const int w = it.getGlobalID<1>();
24 |       const vec3f value = src.get3(h, w);
25 |       dst.set3(h, w, value);
26 |     }
27 |   };
28 | 
29 | #if !defined(OIDN_COMPILE_METAL_DEVICE)
30 | 
31 |   template<typename EngineT>
32 |   class GPUImageCopy final : public ImageCopy
33 |   {
34 |   public:
35 |     explicit GPUImageCopy(EngineT* engine)
36 |       : engine(engine) {}
37 | 
38 |     Engine* getEngine() const override { return engine; }
39 | 
40 |   #if defined(OIDN_COMPILE_METAL)
41 |     void finalize() override
42 |     {
43 |       pipeline = engine->newPipeline("imageCopy");
44 |     }
45 |   #endif
46 | 
47 |     void submitKernels(const Ref<CancellationToken>& ct) override
48 |     {
49 |       check();
50 | 
51 |       GPUImageCopyKernel kernel;
52 |       kernel.src = *src;
53 |       kernel.dst = *dst;
54 | 
55 |     #if defined(OIDN_COMPILE_METAL)
56 |       engine->submitKernel(WorkDim<2>(dst->getH(), dst->getW()), kernel,
57 |                            pipeline, {src->getBuffer(), dst->getBuffer()});
58 |     #else
59 |       engine->submitKernel(WorkDim<2>(dst->getH(), dst->getW()), kernel);
60 |     #endif
61 |     }
62 | 
63 |   private:
64 |     EngineT* engine;
65 | 
66 |   #if defined(OIDN_COMPILE_METAL)
67 |     Ref<MetalPipeline> pipeline;
68 |   #endif
69 |   };
70 | 
71 | #endif // !defined(OIDN_COMPILE_METAL_DEVICE)
72 | 
73 | OIDN_NAMESPACE_END
74 | 


--------------------------------------------------------------------------------
/devices/gpu/gpu_pool.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/pool.h"
 7 | #include "core/tensor_accessor.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   template<typename T, TensorLayout layout>
12 |   struct GPUPoolKernel
13 |   {
14 |     TensorAccessor3D<T, layout> src;
15 |     TensorAccessor3D<T, layout> dst;
16 | 
17 |     oidn_device_inline void operator ()(const WorkItem<3>& it) const
18 |     {
19 |       const int c = it.getGlobalID<0>();
20 |       const int h = it.getGlobalID<1>();
21 |       const int w = it.getGlobalID<2>();
22 | 
23 |       const T x0 = src(c, h*2,   w*2);
24 |       const T x1 = src(c, h*2,   w*2+1);
25 |       const T x2 = src(c, h*2+1, w*2);
26 |       const T x3 = src(c, h*2+1, w*2+1);
27 | 
28 |       dst(c, h, w) = math::max(math::max(x0, x1), math::max(x2, x3));
29 |     }
30 |   };
31 | 
32 |   // Optimized for HWC layout (memory coalescing)
33 |   template<typename T>
34 |   struct GPUPoolKernel<T, TensorLayout::hwc>
35 |   {
36 |     TensorAccessor3D<T, TensorLayout::hwc> src;
37 |     TensorAccessor3D<T, TensorLayout::hwc> dst;
38 | 
39 |     oidn_device_inline void operator ()(const WorkItem<3>& it) const
40 |     {
41 |       const int h = it.getGlobalID<0>();
42 |       const int w = it.getGlobalID<1>();
43 |       const int c = it.getGlobalID<2>();
44 | 
45 |       const T x0 = src(c, h*2,   w*2);
46 |       const T x1 = src(c, h*2,   w*2+1);
47 |       const T x2 = src(c, h*2+1, w*2);
48 |       const T x3 = src(c, h*2+1, w*2+1);
49 | 
50 |       dst(c, h, w) = math::max(math::max(x0, x1), math::max(x2, x3));
51 |     }
52 |   };
53 | 
54 |   template<typename EngineT, typename SrcDstT, TensorLayout srcDstLayout>
55 |   class GPUPool : public Pool
56 |   {
57 |   public:
58 |     GPUPool(EngineT* engine,
59 |             const PoolDesc& desc)
60 |       : Pool(desc),
61 |         engine(engine) {}
62 | 
63 |     Engine* getEngine() const override { return engine; }
64 | 
65 |     void submitKernels(const Ref<CancellationToken>& ct) override
66 |     {
67 |       if (!src || !dst)
68 |         throw std::logic_error("pooling source/destination not set");
69 | 
70 |       GPUPoolKernel<SrcDstT, srcDstLayout> kernel;
71 |       kernel.src = *src;
72 |       kernel.dst = *dst;
73 | 
74 |       if (srcDstLayout == TensorLayout::hwc)
75 |         engine->submitKernel(WorkDim<3>(dst->getH(), dst->getW(), dst->getPaddedC()), kernel);
76 |       else
77 |         engine->submitKernel(WorkDim<3>(dst->getPaddedC(), dst->getH(), dst->getW()), kernel);
78 |     }
79 | 
80 |   private:
81 |     EngineT* engine;
82 |   };
83 | 
84 | OIDN_NAMESPACE_END
85 | 


--------------------------------------------------------------------------------
/devices/gpu/gpu_upsample.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/upsample.h"
 7 | #include "core/tensor_accessor.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   template<typename T, TensorLayout layout>
12 |   struct GPUUpsampleKernel
13 |   {
14 |     TensorAccessor3D<T, layout> src;
15 |     TensorAccessor3D<T, layout> dst;
16 | 
17 |     oidn_device_inline void operator ()(const WorkItem<3>& it) const
18 |     {
19 |       const int c = it.getGlobalID<0>();
20 |       const int h = it.getGlobalID<1>();
21 |       const int w = it.getGlobalID<2>();
22 | 
23 |       const T x = src(c, h, w);
24 | 
25 |       dst(c, h*2,   w*2)   = x;
26 |       dst(c, h*2,   w*2+1) = x;
27 |       dst(c, h*2+1, w*2)   = x;
28 |       dst(c, h*2+1, w*2+1) = x;
29 |     }
30 |   };
31 | 
32 |   // Optimized for HWC layout (memory coalescing)
33 |   template<typename T>
34 |   struct GPUUpsampleKernel<T, TensorLayout::hwc>
35 |   {
36 |     TensorAccessor3D<T, TensorLayout::hwc> src;
37 |     TensorAccessor3D<T, TensorLayout::hwc> dst;
38 | 
39 |     oidn_device_inline void operator ()(const WorkItem<3>& it) const
40 |     {
41 |       const int h = it.getGlobalID<0>();
42 |       const int w = it.getGlobalID<1>();
43 |       const int c = it.getGlobalID<2>();
44 | 
45 |       const T x = src(c, h, w);
46 | 
47 |       dst(c, h*2,   w*2)   = x;
48 |       dst(c, h*2,   w*2+1) = x;
49 |       dst(c, h*2+1, w*2)   = x;
50 |       dst(c, h*2+1, w*2+1) = x;
51 |     }
52 |   };
53 | 
54 |   template<typename EngineT, typename SrcDstT, TensorLayout srcDstLayout>
55 |   class GPUUpsample : public Upsample
56 |   {
57 |   public:
58 |     GPUUpsample(EngineT* engine,
59 |                 const UpsampleDesc& desc)
60 |       : Upsample(desc),
61 |         engine(engine) {}
62 | 
63 |     Engine* getEngine() const override { return engine; }
64 | 
65 |     void submitKernels(const Ref<CancellationToken>& ct) override
66 |     {
67 |       if (!src || !dst)
68 |         throw std::logic_error("upsampling source/destination not set");
69 | 
70 |       GPUUpsampleKernel<SrcDstT, srcDstLayout> kernel;
71 |       kernel.src = *src;
72 |       kernel.dst = *dst;
73 | 
74 |       if (srcDstLayout == TensorLayout::hwc)
75 |         engine->submitKernel(WorkDim<3>(src->getH(), src->getW(), src->getPaddedC()), kernel);
76 |       else
77 |         engine->submitKernel(WorkDim<3>(src->getPaddedC(), src->getH(), src->getW()), kernel);
78 |     }
79 | 
80 |   private:
81 |     EngineT* engine;
82 |   };
83 | 
84 | OIDN_NAMESPACE_END
85 | 


--------------------------------------------------------------------------------
/devices/hip/ck_conv.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | // FIXME: workaround for compile error when building for target unsupported by CK
 7 | #include "ck/ck.hpp"
 8 | #if !defined(CK_BUFFER_RESOURCE_3RD_DWORD)
 9 |   #define CK_BUFFER_RESOURCE_3RD_DWORD -1
10 | #endif
11 | 
12 | #include "core/conv.h"
13 | #include "hip_engine.h"
14 | #include "ck/utility/data_type.hpp"
15 | 
16 | OIDN_NAMESPACE_BEGIN
17 | 
18 |   template<typename T>
19 |   struct CKDataType { using Type = T; };
20 | 
21 |   template<>
22 |   struct CKDataType<half> { using Type = ck::half_t; };
23 | 
24 |   template<ck::index_t... Is>
25 |   using S = ck::Sequence<Is...>;
26 | 
27 |   inline std::array<ck::index_t, 5> getCKTensorLengths(const TensorDesc& td)
28 |   {
29 |     switch (td.layout)
30 |     {
31 |     case TensorLayout::x:
32 |       return {1, 1, td.getPaddedX(), 1, 1}; // GNCHW
33 |     case TensorLayout::hwc:
34 |       return {1, 1, td.getPaddedC(), td.getH(), td.getW()}; // GNCHW
35 |     case TensorLayout::ohwi:
36 |       return {1, td.getPaddedO(), td.getPaddedI(), td.getH(), td.getW()}; // GKCYX
37 |     default:
38 |       throw std::invalid_argument("unsupported tensor layout");
39 |     }
40 |   }
41 | 
42 |   inline std::array<ck::index_t, 5> getCKTensorStrides(const TensorDesc& td)
43 |   {
44 |     switch (td.layout)
45 |     {
46 |     case TensorLayout::x:
47 |       return {0, 0, 1, 0, 0}; // GNCHW
48 |     case TensorLayout::hwc:
49 |       return {0, 0, 1, td.getW() * td.getPaddedC(), td.getPaddedC()}; // GNCHW
50 |     case TensorLayout::ohwi:
51 |       return {0, td.getH() * td.getW() * td.getPaddedI(),
52 |               1, td.getW() * td.getPaddedI(), td.getPaddedI()}; // GKCYX
53 |     default:
54 |       throw std::invalid_argument("unsupported tensor layout");
55 |     }
56 |   }
57 | 
58 |   struct CKConvFactory
59 |   {
60 |     Ref<Conv> (*make)(HIPEngine*, const ConvDesc&);
61 | 
62 |     DataType dataType;
63 |     DataType accumType;
64 |     Activation activation;
65 |     int blockM, blockN, blockK; // threadblock size
66 |   };
67 | 
68 |   template<HIPArch arch>
69 |   std::vector<CKConvFactory> getCKConvInstances(Activation activation);
70 | 
71 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/hip/hip_conv.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2025 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "hip_conv.h"
 5 | #include "ck_conv.h"
 6 | 
 7 | OIDN_NAMESPACE_BEGIN
 8 | 
 9 |   Ref<Conv> newHIPConv(HIPEngine* engine, const ConvDesc& desc)
10 |   {
11 |     // Get the list of kernels supported by the engine
12 |     std::vector<CKConvFactory> kernels;
13 |     switch (engine->getArch())
14 |     {
15 |     case HIPArch::DL:
16 |       kernels = getCKConvInstances<HIPArch::DL>(desc.activation);
17 |       break;
18 |     case HIPArch::WMMA:
19 |       kernels = getCKConvInstances<HIPArch::WMMA>(desc.activation);
20 |       break;
21 |     default:
22 |       throw std::runtime_error("unsupported architecture");
23 |     }
24 | 
25 |     // Select the likely fastest compatible kernel based on the GEMM dimensions
26 |     const size_t M = desc.srcDesc.getH() * desc.srcDesc.getW(); // == destination dims
27 |     const size_t N = desc.weightDesc.getPaddedO();
28 |     const size_t K = desc.weightDesc.getPaddedI() * desc.weightDesc.getH() * desc.weightDesc.getW();
29 | 
30 |     const DataType accumType = DataType::Float32;
31 | 
32 |     const CKConvFactory* bestKernel = nullptr;
33 |     int bestBlockSize = 0;
34 |     size_t bestCost = std::numeric_limits<size_t>::max();
35 | 
36 |     for (const auto& kernel : kernels)
37 |     {
38 |       if (kernel.dataType != desc.srcDesc.dataType || kernel.accumType < accumType)
39 |         continue;
40 | 
41 |       const int blockSize = kernel.blockM * kernel.blockN * kernel.blockK;
42 |       const size_t cost = round_up(M, kernel.blockM) * round_up(N, kernel.blockN) * round_up(K, kernel.blockK);
43 | 
44 |       if ((cost < bestCost) ||
45 |           (cost == bestCost && blockSize > bestBlockSize) ||
46 |           (cost == bestCost && blockSize == bestBlockSize && kernel.accumType == accumType))
47 |       {
48 |         bestKernel = &kernel;
49 |         bestBlockSize = blockSize;
50 |         bestCost = cost;
51 |       }
52 |     }
53 | 
54 |     if (!bestKernel)
55 |       throw std::runtime_error("unsupported convolution");
56 | 
57 |     return bestKernel->make(engine, desc);
58 |   }
59 | 
60 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/hip/hip_conv.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/conv.h"
 7 | #include "hip_engine.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   Ref<Conv> newHIPConv(HIPEngine* engine, const ConvDesc& desc);
12 | 
13 | OIDN_NAMESPACE_END
14 | 


--------------------------------------------------------------------------------
/devices/hip/hip_device.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/device.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   void checkError(hipError_t error);
11 | 
12 |   class HIPEngine;
13 | 
14 |   // GPU matrix architecture
15 |   enum class HIPArch
16 |   {
17 |     Unknown,
18 |     DL,      // RDNA 2
19 |     WMMA,    // RDNA 3
20 |   };
21 | 
22 |   class HIPPhysicalDevice : public PhysicalDevice
23 |   {
24 |   public:
25 |     int deviceID;
26 | 
27 |     HIPPhysicalDevice(int deviceID, const hipDeviceProp_t& prop, int score);
28 |   };
29 | 
30 |   class HIPDevice final : public Device
31 |   {
32 |     friend class HIPEngine;
33 | 
34 |   public:
35 |     static std::vector<Ref<PhysicalDevice>> getPhysicalDevices();
36 |     static std::string getName(const hipDeviceProp_t& prop);
37 |     static std::string getArchName(const hipDeviceProp_t& prop);
38 |     static HIPArch getArch(const hipDeviceProp_t& prop);
39 |     static bool isSupported(int deviceID);
40 | 
41 |     HIPDevice(int deviceID, hipStream_t stream);
42 |     explicit HIPDevice(const Ref<HIPPhysicalDevice>& physicalDevice);
43 |     ~HIPDevice();
44 | 
45 |     void enter() override;
46 |     void leave() override;
47 | 
48 |     DeviceType getType() const override { return DeviceType::HIP; }
49 | 
50 |     Storage getPtrStorage(const void* ptr) override;
51 | 
52 |     void wait() override;
53 | 
54 |   private:
55 |     void init() override;
56 | 
57 |     int deviceID = 0;
58 |     int prevDeviceID = -1;
59 |     hipStream_t stream = nullptr;
60 | 
61 |     HIPArch arch = HIPArch::Unknown;
62 |     int maxWorkGroupSize = 0;
63 |     int subgroupSize = 0;
64 |   };
65 | 
66 | OIDN_NAMESPACE_END
67 | 


--------------------------------------------------------------------------------
/devices/hip/hip_external_buffer.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/buffer.h"
 7 | #include "hip_engine.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   class HIPExternalBuffer : public USMBuffer
12 |   {
13 |   public:
14 |     HIPExternalBuffer(Engine* engine,
15 |                       ExternalMemoryTypeFlag fdType,
16 |                       int fd, size_t byteSize);
17 | 
18 |     HIPExternalBuffer(Engine* engine,
19 |                       ExternalMemoryTypeFlag handleType,
20 |                       void* handle, const void* name, size_t byteSize);
21 | 
22 |     ~HIPExternalBuffer();
23 | 
24 |   private:
25 |     hipExternalMemory_t extMem;
26 | 
27 |     void init(const hipExternalMemoryHandleDesc& handleDesc);
28 |   };
29 | 
30 | OIDN_NAMESPACE_END
31 | 


--------------------------------------------------------------------------------
/devices/hip/hip_module.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "core/context.h"
 5 | #include "hip_device.h"
 6 | 
 7 | OIDN_NAMESPACE_BEGIN
 8 | 
 9 |   class HIPDeviceFactory : public HIPDeviceFactoryBase
10 |   {
11 |   public:
12 |     bool isDeviceSupported(int deviceID) override
13 |     {
14 |       return HIPDevice::isSupported(deviceID);
15 |     }
16 | 
17 |     Ref<Device> newDevice(const int* deviceIDs, const hipStream_t* streams, int numPairs) override
18 |     {
19 |       if (numPairs != 1)
20 |         throw Exception(Error::InvalidArgument, "invalid number of HIP devices/streams");
21 |       if (deviceIDs == nullptr)
22 |         throw Exception(Error::InvalidArgument, "array of HIP devices is null");
23 |       if (streams == nullptr)
24 |         throw Exception(Error::InvalidArgument, "array of HIP streams is null");
25 | 
26 |       return makeRef<HIPDevice>(deviceIDs[0], streams[0]);
27 |     }
28 | 
29 |     Ref<Device> newDevice(const Ref<PhysicalDevice>& physicalDevice) override
30 |     {
31 |       assert(physicalDevice->type == DeviceType::HIP);
32 |       return makeRef<HIPDevice>(staticRefCast<HIPPhysicalDevice>(physicalDevice));
33 |     }
34 |   };
35 | 
36 |   OIDN_DECLARE_INIT_MODULE(device_hip)
37 |   {
38 |     Context::registerDeviceType<HIPDeviceFactory>(DeviceType::HIP, HIPDevice::getPhysicalDevices());
39 |   }
40 | 
41 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/metal/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ## Copyright 2023 Apple Inc.
 2 | ## Copyright 2023 Intel Corporation
 3 | ## SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | cmake_minimum_required(VERSION 3.21)
 6 | 
 7 | include(oidn_metal)
 8 | 
 9 | set(OIDN_METAL_SOURCES
10 |   metal_buffer.mm
11 |   metal_buffer.h
12 |   metal_common.h
13 |   metal_common.mm
14 |   metal_device.mm
15 |   metal_device.h
16 |   metal_conv.mm
17 |   metal_conv.h
18 |   metal_engine.mm
19 |   metal_engine.h
20 |   metal_heap.h
21 |   metal_heap.mm
22 |   metal_module.mm
23 | )
24 | 
25 | add_library(OpenImageDenoise_device_metal ${OIDN_LIB_TYPE} ${OIDN_METAL_SOURCES} ${OIDN_RESOURCE_FILE})
26 | 
27 | metallib_target_add_sources(OpenImageDenoise_device_metal metal_kernels
28 |   metal_kernels.metal
29 |   INCLUDE_DIRECTORIES
30 |     ${PROJECT_SOURCE_DIR}
31 |   COMPILE_OPTIONS
32 |     -std=metal3.0 -fno-fast-math -m${OIDN_APPLE_SDK}-version-min=${OIDN_APPLE_SDK_VERSION_MIN}
33 | )
34 | 
35 | set_target_properties(OpenImageDenoise_device_metal PROPERTIES
36 |   OUTPUT_NAME ${OIDN_LIBRARY_NAME}_device_metal
37 |   CXX_STANDARD 17
38 | )
39 | if(OIDN_LIBRARY_VERSIONED)
40 |   set_target_properties(OpenImageDenoise_device_metal PROPERTIES VERSION ${PROJECT_VERSION})
41 | endif()
42 | 
43 | target_compile_definitions(OpenImageDenoise_device_metal PRIVATE OIDN_COMPILE_METAL_HOST)
44 | 
45 | target_link_libraries(OpenImageDenoise_device_metal PRIVATE "-framework Foundation")
46 | target_link_libraries(OpenImageDenoise_device_metal PRIVATE "-framework Metal")
47 | target_link_libraries(OpenImageDenoise_device_metal PRIVATE "-framework MetalPerformanceShadersGraph")
48 | target_link_libraries(OpenImageDenoise_device_metal PRIVATE "-framework MetalPerformanceShaders")
49 | target_link_libraries(OpenImageDenoise_device_metal PRIVATE OpenImageDenoise_core)
50 | 
51 | if(OIDN_STATIC_LIB)
52 |   oidn_install_static_module(OpenImageDenoise_device_metal)
53 |   target_link_libraries(OpenImageDenoise PRIVATE OpenImageDenoise_device_metal)
54 | else()
55 |   oidn_strip_symbols(OpenImageDenoise_device_metal)
56 |   oidn_install_module(OpenImageDenoise_device_metal)
57 | endif()
58 | 


--------------------------------------------------------------------------------
/devices/metal/metal_buffer.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Apple Inc.
 2 | // Copyright 2023 Intel Corporation
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | #pragma once
 6 | 
 7 | #include "core/buffer.h"
 8 | #include "metal_engine.h"
 9 | 
10 | OIDN_NAMESPACE_BEGIN
11 | 
12 |   class MetalBuffer : public Buffer
13 |   {
14 |   public:
15 |     MetalBuffer(MetalEngine* engine, size_t byteSize, Storage storage);
16 |     MetalBuffer(const Ref<Arena>& arena, size_t byteSize, size_t byteOffset);
17 |     MetalBuffer(MetalEngine* engine, void* data, size_t byteSize);
18 |     MetalBuffer(MetalEngine* engine, id<MTLBuffer> buffer);
19 |     ~MetalBuffer();
20 | 
21 |     Engine* getEngine() const override { return engine; }
22 |     id<MTLBuffer> getMTLBuffer() const { return buffer; }
23 |     void* getPtr() const override;
24 |     void* getHostPtr() const override;
25 |     size_t getByteSize() const override { return byteSize; }
26 |     bool isShared() const override { return shared; }
27 |     Storage getStorage() const override { return storage; }
28 | 
29 |     void read(size_t byteOffset, size_t byteSize, void* dstHostPtr,
30 |               SyncMode sync = SyncMode::Blocking) override;
31 | 
32 |     void write(size_t byteOffset, size_t byteSize, const void* srcHostPtr,
33 |                SyncMode sync = SyncMode::Blocking) override;
34 | 
35 |   protected:
36 |     void preRealloc() override;
37 |     void postRealloc() override;
38 | 
39 |   private:
40 |     void init();
41 |     void free();
42 | 
43 |     MetalEngine* engine;
44 |     id<MTLBuffer> buffer;
45 |     size_t byteSize;
46 |     bool shared;
47 |     Storage storage;
48 |   };
49 | 
50 | OIDN_NAMESPACE_END
51 | 


--------------------------------------------------------------------------------
/devices/metal/metal_common.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Apple Inc.
 2 | // Copyright 2023 Intel Corporation
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | #pragma once
 6 | 
 7 | #include "core/tensor.h"
 8 | 
 9 | #include <Foundation/Foundation.h>
10 | #include <Metal/Metal.h>
11 | #include <MetalPerformanceShaders/MetalPerformanceShaders.h>
12 | #include <MetalPerformanceShadersGraph/MetalPerformanceShadersGraph.h>
13 | 
14 | OIDN_NAMESPACE_BEGIN
15 | 
16 |   class MetalDevice;
17 |   struct PoolDesc;
18 |   struct ConvDesc;
19 | 
20 |   MTLResourceOptions toMTLResourceOptions(Storage storage);
21 | 
22 |   MPSDataType toMPSDataType(DataType dataType);
23 |   MPSShape* toMPSShape(const TensorDesc& td);
24 | 
25 |   MPSGraphTensor* toMPSGraphConst(MPSGraph* graph, const Ref<Tensor>& t);
26 |   MPSGraphTensor* toMPSGraphPlaceholder(MPSGraph* graph, TensorDesc td);
27 |   MPSGraphTensor* toMPSGraphPlaceholder(MPSGraph* graph, ImageDesc imd);
28 |   MPSGraphTensorData* newMPSGraphTensorData(const Ref<Tensor>& tensor);
29 | 
30 |   id<MTLBuffer> getMTLBuffer(Ref<Buffer> buffer);
31 | 
32 | OIDN_NAMESPACE_END
33 | 


--------------------------------------------------------------------------------
/devices/metal/metal_conv.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/conv.h"
 7 | #include "metal_engine.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   class MetalConv final : public Conv
12 |   {
13 |   public:
14 |     MetalConv(MetalEngine* engine, const ConvDesc& desc);
15 |     ~MetalConv();
16 | 
17 |     Engine* getEngine() const override { return engine; }
18 |     void finalize() override;
19 |     void submitKernels(const Ref<CancellationToken>& ct) override;
20 | 
21 |   private:
22 |     void updateWeight() override;
23 |     void updateBias() override;
24 | 
25 |     MetalEngine* engine;
26 |     MPSGraph* mpsGraph = nullptr;
27 |     MPSGraphTensor* mpsSrc = nullptr;
28 |     MPSGraphTensor* mpsWeight = nullptr;
29 |     MPSGraphTensor* mpsBias = nullptr;
30 |     MPSGraphTensor* mpsDst = nullptr;
31 |   };
32 | 
33 | OIDN_NAMESPACE_END
34 | 


--------------------------------------------------------------------------------
/devices/metal/metal_device.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Apple Inc.
 2 | // Copyright 2023 Intel Corporation
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | #pragma once
 6 | 
 7 | #include "core/device.h"
 8 | #include "metal_common.h"
 9 | 
10 | OIDN_NAMESPACE_BEGIN
11 | 
12 |   class MetalEngine;
13 | 
14 |   class MetalPhysicalDevice : public PhysicalDevice
15 |   {
16 |   public:
17 |     id<MTLDevice> device;
18 | 
19 |     MetalPhysicalDevice(id<MTLDevice> device, int score);
20 |   };
21 | 
22 |   class MetalDevice final : public Device
23 |   {
24 |     friend class MetalEngine;
25 | 
26 |   public:
27 |     static std::vector<Ref<PhysicalDevice>> getPhysicalDevices();
28 |     static bool isSupported(id<MTLDevice> device);
29 | 
30 |     explicit MetalDevice(const Ref<MetalPhysicalDevice>& physicalDevice);
31 |     explicit MetalDevice(id<MTLCommandQueue> commandQueue);
32 |     ~MetalDevice();
33 | 
34 |     DeviceType getType() const override { return DeviceType::Metal; }
35 |     id<MTLDevice> getMTLDevice() const { return device; }
36 | 
37 |     bool needWeightAndBiasOnDevice() const override { return false; } // due to MPSGraph
38 |     Storage getPtrStorage(const void* ptr) override;
39 | 
40 |     void execute(std::function<void()>&& f, SyncMode sync) override;
41 |     void flush() override;
42 |     void wait() override;
43 | 
44 |   protected:
45 |     void init() override;
46 | 
47 |   private:
48 |     id<MTLDevice> device;
49 |     id<MTLCommandQueue> userCommandQueue = nil; // user-provided command queue (optional)
50 |   };
51 | 
52 | OIDN_NAMESPACE_END
53 | 


--------------------------------------------------------------------------------
/devices/metal/metal_heap.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/heap.h"
 7 | #include "metal_engine.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   class MetalHeap : public Heap
12 |   {
13 |     friend class MetalBuffer;
14 | 
15 |   public:
16 |     MetalHeap(MetalEngine* engine, size_t byteSize, Storage storage);
17 |     ~MetalHeap();
18 | 
19 |     Engine* getEngine() const override { return engine; }
20 |     size_t getByteSize() const override { return byteSize; }
21 |     Storage getStorage() const override { return storage; }
22 | 
23 |     void realloc(size_t newByteSize) override;
24 | 
25 |   private:
26 |     void init();
27 |     void free();
28 | 
29 |     MetalEngine* engine;
30 |     id<MTLHeap> heap;
31 |     size_t byteSize;
32 |     Storage storage;
33 |   };
34 | 
35 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/metal/metal_heap.mm:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "metal_heap.h"
 5 | 
 6 | OIDN_NAMESPACE_BEGIN
 7 | 
 8 |   MetalHeap::MetalHeap(MetalEngine* engine, size_t byteSize, Storage storage)
 9 |     : engine(engine),
10 |       heap(nullptr),
11 |       byteSize(byteSize),
12 |       storage((storage == Storage::Undefined) ? Storage::Device : storage)
13 |   {
14 |     init();
15 |   }
16 | 
17 |   MetalHeap::~MetalHeap()
18 |   {
19 |     free();
20 |   }
21 | 
22 |   void MetalHeap::init()
23 |   {
24 |     if (byteSize == 0)
25 |       return;
26 | 
27 |     MTLHeapDescriptor* desc = [MTLHeapDescriptor new];
28 |     desc.type = MTLHeapTypePlacement;
29 |     desc.resourceOptions = toMTLResourceOptions(storage) | MTLResourceHazardTrackingModeTracked;
30 |     desc.size = engine->getBufferByteSizeAndAlignment(byteSize, storage).size;
31 | 
32 |     heap = [engine->getMTLDevice() newHeapWithDescriptor: desc];
33 |     [desc release];
34 | 
35 |     if (!heap)
36 |       throw Exception(Error::OutOfMemory, "failed to create heap");
37 |   }
38 | 
39 |   void MetalHeap::free()
40 |   {
41 |     if (heap)
42 |       [heap release];
43 |     heap = nullptr;
44 |   }
45 | 
46 |   void MetalHeap::realloc(size_t newByteSize)
47 |   {
48 |     if (newByteSize == byteSize)
49 |       return;
50 | 
51 |     preRealloc();
52 |     free();
53 |     byteSize = newByteSize;
54 |     init();
55 |     postRealloc();
56 |   }
57 | 
58 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/metal/metal_module.mm:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Apple Inc.
 2 | // Copyright 2023 Intel Corporation
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | #include "core/context.h"
 6 | #include "metal_device.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   class MetalDeviceFactory : public MetalDeviceFactoryBase
11 |   {
12 |   public:
13 |     bool isDeviceSupported(MTLDevice_id device) override
14 |     {
15 |       return MetalDevice::isSupported(device);
16 |     }
17 | 
18 |     Ref<Device> newDevice(const Ref<PhysicalDevice>& physicalDevice) override
19 |     {
20 |       assert(physicalDevice->type == DeviceType::Metal);
21 |       return makeRef<MetalDevice>(staticRefCast<MetalPhysicalDevice>(physicalDevice));
22 |     }
23 | 
24 |     Ref<Device> newDevice(const MTLCommandQueue_id* commandQueues, int numQueues) override
25 |     {
26 |       if (numQueues != 1)
27 |         throw Exception(Error::InvalidArgument, "invalid number of Metal command queues");
28 |       return makeRef<MetalDevice>(commandQueues[0]);
29 |     }
30 |   };
31 | 
32 |   OIDN_DECLARE_INIT_STATIC_MODULE(device_metal)
33 |   {
34 |     Context::registerDeviceType<MetalDeviceFactory>(DeviceType::Metal, MetalDevice::getPhysicalDevices());
35 |   }
36 | 
37 | OIDN_NAMESPACE_END
38 | 


--------------------------------------------------------------------------------
/devices/sycl/sycl_conv.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/conv.h"
 7 | #include "sycl_engine.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   namespace xelp {
12 |     Ref<Conv> newSYCLConv(SYCLEngine* engine, const ConvDesc& desc);
13 |   }
14 | 
15 |   namespace xehpg {
16 |     Ref<Conv> newSYCLConv(SYCLEngine* engine, const ConvDesc& desc);
17 |   }
18 | 
19 | #if defined(__linux__)
20 |   namespace xehpc {
21 |     Ref<Conv> newSYCLConv(SYCLEngine* engine, const ConvDesc& desc);
22 |   }
23 | #endif
24 | 
25 |   namespace xe2 {
26 |     Ref<Conv> newSYCLConv(SYCLEngine* engine, const ConvDesc& desc);
27 |   }
28 | 
29 | OIDN_NAMESPACE_END
30 | 


--------------------------------------------------------------------------------
/devices/sycl/sycl_conv_xe2.cpp:
--------------------------------------------------------------------------------
1 | // Copyright 2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 | 
4 | #define OIDN_ARCH_XE2
5 | 
6 | #include "sycl_conv_xe.h"


--------------------------------------------------------------------------------
/devices/sycl/sycl_conv_xehpc.cpp:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 | 
4 | #define OIDN_ARCH_XEHPC
5 | 
6 | #include "sycl_conv_xe.h"


--------------------------------------------------------------------------------
/devices/sycl/sycl_conv_xehpg.cpp:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 | 
4 | #define OIDN_ARCH_XEHPG
5 | 
6 | #include "sycl_conv_xe.h"


--------------------------------------------------------------------------------
/devices/sycl/sycl_conv_xelp.cpp:
--------------------------------------------------------------------------------
1 | // Copyright 2022 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 | 
4 | #define OIDN_ARCH_XELP
5 | 
6 | #include "sycl_conv_xe.h"


--------------------------------------------------------------------------------
/devices/sycl/sycl_device.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2021 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/device.h"
 7 | #include <level_zero/ze_api.h>
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   class SYCLEngine;
12 | 
13 |   // GPU architecture
14 |   enum class SYCLArch
15 |   {
16 |     Unknown,
17 |     XeLP,
18 |     XeLPG,
19 |     XeLPGplus,
20 |     XeHPG,
21 |     XeHPC,
22 |     XeHPC_NoDPAS,
23 |     Xe2LPG,
24 |     Xe2HPG,
25 |     Xe3LPG,
26 |   };
27 | 
28 |   class SYCLPhysicalDevice : public PhysicalDevice
29 |   {
30 |   public:
31 |     sycl::device syclDevice;
32 | 
33 |     SYCLPhysicalDevice(const sycl::device& syclDevice, int score);
34 |   };
35 | 
36 |   class SYCLDevice : public SYCLDeviceBase
37 |   {
38 |   public:
39 |     static std::vector<Ref<PhysicalDevice>> getPhysicalDevices();
40 |     static bool isSupported(const sycl::device& syclDevice);
41 |     static SYCLArch getArch(const sycl::device& syclDevice);
42 |     static int getScore(const sycl::device& syclDevice);
43 | 
44 |     SYCLDevice(const std::vector<sycl::queue>& syclQueues);
45 |     explicit SYCLDevice(const Ref<SYCLPhysicalDevice>& physicalDevice);
46 | 
47 |     DeviceType getType() const override { return DeviceType::SYCL; }
48 |     ze_context_handle_t getZeContext() const { return zeContext; }
49 | 
50 |     int getInt(const std::string& name) override;
51 |     void setInt(const std::string& name, int value) override;
52 | 
53 |     Storage getPtrStorage(const void* ptr) override;
54 | 
55 |     void submitBarrier() override;
56 |     void wait() override;
57 | 
58 |     // Manually sets the dependent events for the next command on all engines
59 |     void setDepEvents(const std::vector<sycl::event>& events);
60 |     void setDepEvents(const sycl::event* events, int numEvents) override;
61 | 
62 |     // Gets the list of events corresponding to the completion of all commands
63 |     std::vector<sycl::event> getDoneEvents();
64 |     void getDoneEvent(sycl::event& event) override;
65 | 
66 |     SYCLArch getArch() const { return arch; }
67 | 
68 |   private:
69 |     void preinit();
70 |     void init() override;
71 | 
72 |     SYCLEngine* getSYCLEngine(int i) const;
73 | 
74 |     sycl::context syclContext;
75 |     ze_context_handle_t zeContext = nullptr; // Level Zero context
76 |     SYCLArch arch = SYCLArch::Unknown;
77 |     int numSubdevices = 0; // autodetect by default
78 | 
79 |     // Used only for initialization
80 |     Ref<SYCLPhysicalDevice> physicalDevice;
81 |     std::vector<sycl::queue> syclQueues;
82 |   };
83 | 
84 | OIDN_NAMESPACE_END
85 | 


--------------------------------------------------------------------------------
/devices/sycl/sycl_device_table.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "sycl_device.h"
 7 | 
 8 | OIDN_NAMESPACE_BEGIN
 9 | 
10 |   // Table of supported architectures and corresponding IP versions with revisions masked out
11 |   // These should match the AOT targets defined in CMakeLists.txt
12 |   // https://github.com/intel/compute-runtime/blob/14251c3d96e71e97e397b0c4fcb01557fca47f0e/shared/source/helpers/hw_ip_version.h
13 |   // https://github.com/intel/compute-runtime/blob/master/third_party/aot_config_headers/platforms.h
14 |   struct SYCLDeviceTableEntry
15 |   {
16 |     SYCLArch arch;
17 |     std::vector<uint32_t> ipVersions;
18 |   };
19 | 
20 |   constexpr uint32_t syclDeviceIPVersionMask = 0xffffffc0;
21 | 
22 |   inline const std::vector<SYCLDeviceTableEntry> syclDeviceTable =
23 |   {
24 |     {
25 |       SYCLArch::XeLP,
26 |       {
27 |         0x03000000, // tgllp
28 |         0x03004000, // rkl
29 |         0x03008000, // adl-s
30 |         0x0300c000, // adl-p
31 |         0x03010000, // adl-n
32 |         0x03028000, // dg1
33 |       }
34 |     },
35 |     {
36 |       SYCLArch::XeLPG,
37 |       {
38 |         0x03118000, // mtl-u
39 |         0x0311c000, // mtl-h
40 |       }
41 |     },
42 |     {
43 |       SYCLArch::XeLPGplus,
44 |       {
45 |         0x03128000, // arl-h
46 |       }
47 |     },
48 |     {
49 |       SYCLArch::XeHPG,
50 |       {
51 |         0x030dc000, // acm-g10
52 |         0x030e0000, // acm-g11
53 |         0x030e4000, // acm-g12
54 |       }
55 |     },
56 |     #if defined(__linux__)
57 |     {
58 |       SYCLArch::XeHPC,
59 |       {
60 |         0x030f0000, // pvc-sdv, pvc
61 |       }
62 |     },
63 |     {
64 |       SYCLArch::XeHPC_NoDPAS,
65 |       {
66 |         0x030f4000, // pvc-vg
67 |       }
68 |     },
69 |     #endif
70 |     {
71 |       SYCLArch::Xe2LPG,
72 |       {
73 |         0x05010000, // lnl-m
74 |       }
75 |     },
76 |     {
77 |       SYCLArch::Xe2HPG,
78 |       {
79 |         0x05004000, // bmg-g21
80 |       }
81 |     },
82 |     {
83 |       SYCLArch::Xe3LPG,
84 |       {
85 |         0x07800000, // ptl-h
86 |         0x07804000, // ptl-u
87 |       }
88 |     },
89 |   };
90 | 
91 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/sycl/sycl_external_buffer.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "sycl_external_buffer.h"
 5 | 
 6 | OIDN_NAMESPACE_BEGIN
 7 | 
 8 |   SYCLExternalBuffer::SYCLExternalBuffer(SYCLEngine* engine,
 9 |                                          ExternalMemoryTypeFlag fdType,
10 |                                          int fd, size_t byteSize)
11 |     : USMBuffer(engine)
12 |   {
13 |     if (fdType != ExternalMemoryTypeFlag::DMABuf)
14 |       throw Exception(Error::InvalidArgument, "external memory type not supported by the device");
15 | 
16 |     ze_external_memory_import_fd_t importDesc =
17 |     {
18 |       ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_FD,
19 |       nullptr, // pNext
20 |       ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF,
21 |       fd
22 |     };
23 | 
24 |     init(engine, &importDesc, byteSize);
25 |   }
26 | 
27 |   SYCLExternalBuffer::SYCLExternalBuffer(SYCLEngine* engine,
28 |                                          ExternalMemoryTypeFlag handleType,
29 |                                          void* handle, const void* name, size_t byteSize)
30 |     : USMBuffer(engine)
31 |   {
32 |     if (handleType != ExternalMemoryTypeFlag::OpaqueWin32)
33 |       throw Exception(Error::InvalidArgument, "external memory type not supported by the device");
34 | 
35 |     ze_external_memory_import_win32_handle_t importDesc =
36 |     {
37 |       ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_WIN32,
38 |       nullptr, // pNext
39 |       ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32,
40 |       handle,
41 |       name
42 |     };
43 | 
44 |     init(engine, &importDesc, byteSize);
45 |   }
46 | 
47 |   void SYCLExternalBuffer::init(SYCLEngine* engine, const void* importDesc, size_t byteSize)
48 |   {
49 |     void* ptr = nullptr;
50 | 
51 |     ze_device_mem_alloc_desc_t allocDesc{};
52 |     allocDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
53 |     allocDesc.pNext = importDesc;
54 | 
55 |     auto result = zeMemAllocDevice(static_cast<SYCLDevice*>(engine->getDevice())->getZeContext(),
56 |                                    &allocDesc,
57 |                                    byteSize,
58 |                                    0,
59 |                                    engine->getZeDevice(),
60 |                                    &ptr);
61 | 
62 |     if (result != ZE_RESULT_SUCCESS)
63 |         throw Exception(Error::InvalidOperation, "failed to import external memory");
64 | 
65 |     this->ptr      = (char*)ptr;
66 |     this->byteSize = byteSize;
67 |     this->shared   = true;
68 |     this->storage  = Storage::Device;
69 |   }
70 | 
71 |   SYCLExternalBuffer::~SYCLExternalBuffer()
72 |   {
73 |     zeMemFree(static_cast<SYCLDevice*>(getDevice())->getZeContext(), ptr);
74 |   }
75 | 
76 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/devices/sycl/sycl_external_buffer.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "core/buffer.h"
 7 | #include "sycl_engine.h"
 8 | 
 9 | OIDN_NAMESPACE_BEGIN
10 | 
11 |   class SYCLExternalBuffer : public USMBuffer
12 |   {
13 |   public:
14 |     SYCLExternalBuffer(SYCLEngine* engine,
15 |                        ExternalMemoryTypeFlag fdType,
16 |                        int fd, size_t byteSize);
17 | 
18 |     SYCLExternalBuffer(SYCLEngine* engine,
19 |                        ExternalMemoryTypeFlag handleType,
20 |                        void* handle, const void* name, size_t byteSize);
21 | 
22 |     ~SYCLExternalBuffer();
23 | 
24 |   private:
25 |     void init(SYCLEngine* engine, const void* importDesc, size_t byteSize);
26 |   };
27 | 
28 | OIDN_NAMESPACE_END
29 | 


--------------------------------------------------------------------------------
/devices/sycl/sycl_module.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Intel Corporation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #include "core/context.h"
 5 | #include "sycl_device.h"
 6 | 
 7 | OIDN_NAMESPACE_BEGIN
 8 | 
 9 |   class SYCLDeviceFactory : public SYCLDeviceFactoryBase
10 |   {
11 |   public:
12 |     bool isDeviceSupported(const sycl::device* device) override
13 |     {
14 |       if (device == nullptr)
15 |         throw Exception(Error::InvalidArgument, "SYCL device is null");
16 |       return SYCLDevice::isSupported(*device);
17 |     }
18 | 
19 |     Ref<Device> newDevice(const sycl::queue* queues, int numQueues) override
20 |     {
21 |       if (numQueues < 1)
22 |         throw Exception(Error::InvalidArgument, "invalid number of SYCL queues");
23 |       if (queues == nullptr)
24 |         throw Exception(Error::InvalidArgument, "array of SYCL queues is null");
25 | 
26 |       return makeRef<SYCLDevice>(std::vector<sycl::queue>{queues, queues + numQueues});
27 |     }
28 | 
29 |     Ref<Device> newDevice(const Ref<PhysicalDevice>& physicalDevice) override
30 |     {
31 |       assert(physicalDevice->type == DeviceType::SYCL);
32 |       return makeRef<SYCLDevice>(staticRefCast<SYCLPhysicalDevice>(physicalDevice));
33 |     }
34 |   };
35 | 
36 |   OIDN_DECLARE_INIT_MODULE(device_sycl)
37 |   {
38 |   #if defined(OIDN_DEVICE_SYCL_JIT_CACHE)
39 |     // Enable persistent JIT cache if not disabled explicitly
40 |     setEnvVar("SYCL_CACHE_PERSISTENT", 1, false);
41 |     setEnvVar("NEO_CACHE_PERSISTENT",  1, false);
42 |   #else
43 |     // Disable persistent JIT cache if not enabled explicitly
44 |     setEnvVar("SYCL_CACHE_PERSISTENT", 0, false);
45 |     setEnvVar("NEO_CACHE_PERSISTENT",  0, false);
46 |   #endif
47 | 
48 |     Context::registerDeviceType<SYCLDeviceFactory>(DeviceType::SYCL, SYCLDevice::getPhysicalDevices());
49 |   }
50 | 
51 | OIDN_NAMESPACE_END


--------------------------------------------------------------------------------
/doc/.gitignore:
--------------------------------------------------------------------------------
1 | # Generated files and folders
2 | changelog.md
3 | www
4 | tmp
5 | __pycache__
6 | images
7 | 


--------------------------------------------------------------------------------
/doc/documentation.md:
--------------------------------------------------------------------------------
1 | Documentation
2 | =============
3 | 
4 | The following [documentation][OIDNReadme] of Intel Open Image Denoise can
5 | also be found as a [pdf document][OIDNReadme].
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/doc/downloads.md:
--------------------------------------------------------------------------------
 1 | Download Precompiled Intel Open Image Denoise Binary Packages
 2 | =============================================================
 3 | 
 4 | For Linux we provide Intel Open Image Denoise precompiled for 64-bit as a TGZ
 5 | archive:
 6 | 
 7 | [oidn-<OIDN_VERSION>.x86_64.linux.tar.gz](https://github.com/OpenImageDenoise/oidn/releases/download/v<OIDN_VERSION>/oidn-<OIDN_VERSION>.x86_64.linux.tar.gz)
 8 | 
 9 | For macOS we provide Intel Open Image Denoise precompiled for Intel processors and Apple silicon as separate TGZ archives:
10 | 
11 | [oidn-<OIDN_VERSION>.x86_64.macos.tar.gz](https://github.com/OpenImageDenoise/oidn/releases/download/v<OIDN_VERSION>/oidn-<OIDN_VERSION>.x86_64.macos.tar.gz)
12 | 
13 | [oidn-<OIDN_VERSION>.arm64.macos.tar.gz](https://github.com/OpenImageDenoise/oidn/releases/download/v<OIDN_VERSION>/oidn-<OIDN_VERSION>.arm64.macos.tar.gz)
14 | 
15 | For Windows we provide Intel Open Image Denoise binaries precompiled for 64-bit as a ZIP archive:
16 | 
17 | [oidn-<OIDN_VERSION>.x64.windows.zip](https://github.com/OpenImageDenoise/oidn/releases/download/v<OIDN_VERSION>/oidn-<OIDN_VERSION>.x64.windows.zip)
18 | 
19 | The source code of the latest Intel Open Image Denoise version can be downloaded here:
20 | 
21 | [oidn-<OIDN_VERSION>.src.zip](https://github.com/OpenImageDenoise/oidn/releases/download/v<OIDN_VERSION>/oidn-<OIDN_VERSION>.src.zip)
22 | 
23 | [oidn-<OIDN_VERSION>.src.tar.gz](https://github.com/OpenImageDenoise/oidn/releases/download/v<OIDN_VERSION>/oidn-<OIDN_VERSION>.src.tar.gz)
24 | 
25 | The TGZ/ZIP packages contain most needed 3rd party dependencies.
26 | 
27 | You can also access [old Intel Open Image Denoise releases](https://github.com/OpenImageDenoise/oidn/releases).
28 | 


--------------------------------------------------------------------------------
/doc/examples.md:
--------------------------------------------------------------------------------
 1 | Examples
 2 | ========
 3 | 
 4 | Intel Open Image Denoise ships with a couple of simple example applications.
 5 | 
 6 | oidnDenoise
 7 | -----------
 8 | 
 9 | `oidnDenoise` is a minimal working example demonstrating how to use Intel Open
10 | Image Denoise, which can be found at `apps/oidnDenoise.cpp`. It uses the C++11
11 | convenience wrappers of the C99 API.
12 | 
13 | This example is a simple command-line application that denoises the provided
14 | image, which can optionally have auxiliary feature images as well (e.g. albedo
15 | and normal). By default the images must be stored in the [Portable
16 | FloatMap](http://www.pauldebevec.com/Research/HDR/PFM/) (PFM) format, and the
17 | color values must be encoded in little-endian format. To enable other image
18 | formats (e.g. OpenEXR, PNG) as well, the project has to be rebuilt with
19 | OpenImageIO support enabled.
20 | 
21 | Running `oidnDenoise` without any arguments or the `-h` argument will bring up
22 | a list of command-line options.
23 | 
24 | oidnBenchmark
25 | -------------
26 | 
27 | `oidnBenchmark` is a basic command-line benchmarking application for measuring
28 | denoising speed, which can be found at `apps/oidnBenchmark.cpp`.
29 | 
30 | Running `oidnBenchmark` with the `-h` argument will bring up a list of
31 | command-line options.
32 | 


--------------------------------------------------------------------------------
/doc/filter-latex.py:
--------------------------------------------------------------------------------
 1 | # 1. convert tables to use 'tabu'
 2 | # 2. always add hypertargets, before headings, to workaround issue #2719
 3 | # Based on Wagner Macedo's filter.py posted at
 4 | # https://groups.google.com/forum/#!msg/pandoc-discuss/RUC-tuu_qf0/h-H3RRVt1coJ
 5 | import pandocfilters as pf
 6 | 
 7 | def latex(s):
 8 |     return pf.RawBlock('latex', s)
 9 | 
10 | def inlatex(s):
11 |     return pf.RawInline('latex', s)
12 | 
13 | def tbl_caption(s):
14 |     return pf.Para([inlatex(r'\caption{')] + s + [inlatex(r'}')])
15 | 
16 | def tbl_alignment(a, w):
17 |     aligns = {
18 |         "AlignDefault": 'l',
19 |         "AlignLeft": 'l',
20 |         "AlignCenter": 'c',
21 |         "AlignRight": 'r',
22 |     }
23 |     s = '';
24 |     for i in range(len(a)):
25 |         s += 'X[%.3f,' % -w[i] + aligns[a[i]['t']] + ']'
26 |     return s;
27 | 
28 | def tbl_headers(s):
29 |     result = s[0][0]['c'][:]
30 |     for i in range(1, len(s)):
31 |         result.append(inlatex(' & '))
32 |         result.extend(s[i][0]['c'])
33 |     result.append(inlatex(r'\\' '\n'))
34 |     return pf.Para(result)
35 | 
36 | def tbl_contents(s):
37 |     result = []
38 |     for row in s:
39 |         para = []
40 |         for col in row:
41 |             if col:
42 |                 para.extend(col[0]['c'])
43 |             para.append(inlatex(' & '))
44 |         result.extend(para)
45 |         result[-1] = inlatex(r'\\' '\n')
46 |     return pf.Para(result)
47 | 
48 | def do_filter(k, v, f, m):
49 |     if k == "Table":
50 |         w = v[2]
51 |         if sum(w) == 0:
52 |             w = [1 for e in w]
53 |             wd = ''
54 |             ha = r'\centering'
55 |         else:
56 |             wd = '*'
57 |             ha = r'\raggedright'
58 |         return [latex(r'\begin{table'+wd+'}[!h]'),
59 |                 tbl_caption(v[0]),
60 |                 latex(ha),
61 |                 latex(r'\begin{tabu} spread 0pt {' + tbl_alignment(v[1], w) + '}'),
62 |                 latex(r'\toprule'),
63 |                 tbl_headers(v[3]),
64 |                 latex(r'\midrule'),
65 |                 tbl_contents(v[4]),
66 |                 latex(r'\bottomrule' '\n' r'\end{tabu}'),
67 |                 latex(r'\end{table'+wd+'}')]
68 |     if k == "Header":
69 |         return [latex(r'\hypertarget{' + v[1][0] + r'}{}'),
70 |                 pf.Header(v[0], v[1], v[2])]
71 | 
72 | if __name__ == "__main__":
73 |     pf.toJSONFilter(do_filter)
74 | 


--------------------------------------------------------------------------------
/doc/filter-sectionnumbers.py:
--------------------------------------------------------------------------------
 1 | # remove section numbers for subheadings
 2 | # Based on Wagner Macedo's filter.py posted at
 3 | # https://groups.google.com/forum/#!msg/pandoc-discuss/RUC-tuu_qf0/h-H3RRVt1coJ
 4 | import pandocfilters as pf
 5 | 
 6 | def do_filter(k, v, f, m):
 7 |     if k == "Header" and v[0] > 2:
 8 |         v[1][1].append('unnumbered')
 9 |         return pf.Header(v[0], v[1], v[2])
10 | 
11 | if __name__ == "__main__":
12 |     pf.toJSONFilter(do_filter)
13 | 


--------------------------------------------------------------------------------
/doc/images.md:
--------------------------------------------------------------------------------
1 | [imgMazdaColor]: mazda_4spp_input.jpg { width=90% }
2 | [imgMazdaDenoised]: mazda_4spp_oidn.jpg { width=90% }
3 | [imgMazdaAlbedoFirstHit]: mazda_firsthit_512spp_albedo.jpg { width=90% }
4 | [imgMazdaAlbedoNonDeltaHit]: mazda_nondeltahit_512spp_albedo.jpg { width=90% }
5 | [imgMazdaNormalFirstHit]: mazda_firsthit_512spp_normal.jpg { width=90% }
6 | [imgMazdaNormalNonDeltaHit]: mazda_nondeltahit_512spp_normal.jpg { width=90% }
7 | 


--------------------------------------------------------------------------------
/doc/legal.md:
--------------------------------------------------------------------------------
 1 | Disclaimer and Legal Information
 2 | ================================
 3 | 
 4 | © 2018-2025 Intel Corporation
 5 | 
 6 | [Privacy Notice](https://www.intel.com/privacy)
 7 | 
 8 | Intel, the Intel logo, Xeon, Intel Xeon Phi, and Intel Core are
 9 | trademarks of Intel Corporation in the U.S. and/or other countries.
10 | *Other names and brands may be claimed as the property of others.
11 | 
12 | 
13 | Optimization Notice: Intel's compilers may or may not optimize to the
14 | same degree for non-Intel microprocessors for optimizations that are not
15 | unique to Intel microprocessors. These optimizations include SSE2, SSE3,
16 | and SSSE3 instruction sets and other optimizations. Intel does not
17 | guarantee the availability, functionality, or effectiveness of any
18 | optimization on microprocessors not manufactured by Intel.
19 | Microprocessor-dependent optimizations in this product are intended for
20 | use with Intel microprocessors. Certain optimizations not specific to
21 | Intel microarchitecture are reserved for Intel microprocessors. Please
22 | refer to the applicable product User and Reference Guides for more
23 | information regarding the specific instruction sets covered by this
24 | notice.
25 | Notice Revision #20110804
26 | 


--------------------------------------------------------------------------------
/doc/links.md:
--------------------------------------------------------------------------------
1 | 
2 | [OIDNReadme]: https://github.com/OpenImageDenoise/oidn/blob/master/readme.pdf "Intel Open Image Denoise Documentation"
3 | [Training]: #training
4 | 


--------------------------------------------------------------------------------
/doc/readme.tex:
--------------------------------------------------------------------------------
 1 | \IfFileExists{oidn-doc/intel-spec.cls}
 2 | {
 3 |   \documentclass[oneside]{oidn-doc/intel-spec}
 4 | }{
 5 |   \documentclass[oneside]{report}
 6 |   \newcommand{\copyrightyears}[1] {}
 7 |   \newcommand{\trademarkacknowledgment}[1] {}
 8 |   \newcommand{\performancedisclaimer}{}
 9 |   \newcommand{\optimizationdisclaimer}{}
10 |   \newcommand{\makedisclaimers}{}
11 |   \newcommand{\version}[1] { \author{Version ##1} }
12 | }
13 | 
14 | \include{preamble}
15 | 
16 | \begin{document}
17 | \title{Intel® Open Image Denoise\vskip0.3\baselineskip\IntelFontOneRegular\LARGE
18 | \noindent High-Performance Denoising Library\\for Ray Tracing}
19 | \version{\oidnversion}
20 | 
21 | \maketitle
22 | \tableofcontents
23 | 
24 | \input{tmp/overview}
25 | \input{tmp/changelog}
26 | \input{tmp/compilation}
27 | \addtocontents{toc}{\protect\setcounter{tocdepth}{2}}
28 | \hypersetup{bookmarksdepth=2}
29 | \input{tmp/api}
30 | \addtocontents{toc}{\protect\setcounter{tocdepth}{1}}
31 | \hypersetup{bookmarksdepth=1}
32 | \input{tmp/examples}
33 | \input{tmp/training}
34 | 
35 | \makedisclaimers
36 | 
37 | \end{document}
38 | 


--------------------------------------------------------------------------------
/doc/readme_head.md:
--------------------------------------------------------------------------------
1 | Intel® Open Image Denoise
2 | =========================
3 | 
4 | This is release v<OIDN_VERSION> of Intel Open Image Denoise. For changes and new
5 | features see the [changelog](CHANGELOG.md). Visit
6 | https://www.openimagedenoise.org for more information.
7 | 
8 | 


--------------------------------------------------------------------------------
/doc/related_projects.md:
--------------------------------------------------------------------------------
 1 | Projects that make use of Intel Open Image Denoise
 2 | ==================================================
 3 | 
 4 | This page gives a brief (and incomplete) list of other projects that
 5 | make use of Intel Open Image Denoise, as well as a set of related links to other
 6 | projects and related information.
 7 | 
 8 | If you have a project that makes use of Intel Open Image Denoise and would like
 9 | this to be listed here, please let us know.
10 | 
11 | -   [Intel® OSPRay](https://www.ospray.org), a ray tracing based rendering engine for high-fidelity visualization
12 | 
13 | 
14 | Projects that are closely related to Intel Open Image Denoise
15 | =============================================================
16 | 
17 | -   The [Intel® Embree](https://www.embree.org) Ray Tracing Kernel Framework
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/doc/teaser.html:
--------------------------------------------------------------------------------
 1 | <div class= "title">  
 2 |   <h1> Intel<sup>®</sup> Open Image Denoise </h1>
 3 |   <h2> High-Performance Denoising Library for Ray Tracing </h2>
 4 | </div>
 5 | <div class="teaser-img">
 6 |   <div class="img-compare" style="padding-bottom: 41.875%; /* 536/1280 */" onmousemove="this.getElementsByTagName('div')[0].style.width=event.offsetX+'px'">
 7 |    <img src="images/cabins_2spp_oidn.jpg" style="box-shadow: 0px 0px 24px rgba(0,0,0,0.4);"><span>Denoised</span>
 8 |    <div><img src="images/cabins_2spp_input.jpg"><span>Original</span></div>
 9 |   </div><!-- do not change formatting here! -->
10 |   <p><em>Evermotion [15th Anniversary Collection](https://evermotion.org/shop/show_product/15th-anniversary-collection/16094) scene rendered with [Chaos Corona](https://corona-renderer.com/) and denoised with Intel® Open Image Denoise using prefiltered albedo and normal buffers. Hover over the image (or tap on it) to move the slider between the original and denoised versions.</em></p>
11 | </div>
12 | 
13 | 


--------------------------------------------------------------------------------
/doc/webtemplate.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta http-equiv="Content-Type" content="text/html" charset="utf-8">
 5 |     <title>Intel® Open Image Denoise</title>
 6 |     <link rel="stylesheet" type="text/css" media="screen" href="stylesheet.css">
 7 | $if(highlighting-css)$
 8 |     <style type="text/css">
 9 | $highlighting-css$
10 | div.sourceCode { overflow-x: initial; }
11 |     </style>
12 | $endif$
13 |   </head>
14 | 
15 |   <body>
16 |     <div id="header">
17 |       <div id="header-github">
18 |         <a id="forkme-banner" href="https://github.com/OpenImageDenoise/oidn">View on GitHub</a>
19 |       </div>
20 |       <div id="header-title">
21 |         Intel<sup>®</sup> Open Image Denoise
22 |       </div>
23 | 
24 |       <div id="header-navbar">
25 |         <ul>
26 |           <li$if(select_index)$ id="selected"$endif$><a href="index.html">Overview</a></li>
27 | 	  <li$if(select_documentation)$ id="selected"$endif$><a href="documentation.html">Documentation</a></li>
28 | 	  <li$if(select_gallery)$ id="selected"$endif$><a href="gallery.html">Gallery</a></li>
29 | 	  <li$if(select_downloads)$ id="selected"$endif$><a href="downloads.html">Downloads</a></li>
30 | 	  <li><a href="https://github.com/OpenImageDenoise/oidn/issues">Bugs/Issues</a></li>
31 | 	  <li$if(select_related_projects)$ id="selected"$endif$><a href="related_projects.html">Related Projects</a></li>
32 | 	</ul>
33 |       </div>
34 |       <div id="header-spacing"></div>
35 |     </div>
36 | 
37 |     <div id="content-wrap">
38 |       <div id="content">
39 | 
40 | $body$
41 | 
42 |       </div>
43 |     </div>
44 | 
45 | $if(select_legal)$
46 | $else$
47 |       <div id="footer">
48 |         © 2018–2025 Intel Corporation <a href="legal.html">Disclaimer and Legal Information</a>
49 |         <a href="https://www.intel.com/privacy">Privacy</a>
50 |       </div>
51 | $endif$
52 |   </body>
53 | </html>
54 | 


--------------------------------------------------------------------------------
/readme.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RenderKit/oidn/7d23b193ee0cf3bc3ad03a3ac1886b34f496cc5c/readme.pdf


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==2.7.0
2 | tensorboard==2.19.0
3 | 


--------------------------------------------------------------------------------
/scripts/build_src.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | ## Copyright 2022 Intel Corporation
 4 | ## SPDX-License-Identifier: Apache-2.0
 5 | 
 6 | import re
 7 | import shutil
 8 | 
 9 | from common import *
10 | 
11 | # Detect the version
12 | print('Detecting the version')
13 | version_file = os.path.join(root_dir, 'README.md')
14 | with open(version_file, 'rb') as f:
15 |   version_text = str(f.read())
16 | version = re.findall('v[0-9a-z.-]+', version_text)[0][1:]
17 | 
18 | # Copy the source into a temporary directory
19 | print('Copying the source code')
20 | src_name = f'oidn-{version}'
21 | src_dir = os.path.join(root_dir, src_name)
22 | shutil.copytree(root_dir, src_dir, ignore=shutil.ignore_patterns('.git', '.gitmodules', '__pycache__'))
23 | 
24 | # Create the package
25 | build_dir = os.path.join(root_dir, 'build')
26 | if not os.path.isdir(build_dir):
27 |   os.mkdir(build_dir)
28 | package_filename = os.path.join(build_dir, src_name + '.src' + ('.zip' if OS == 'windows' else '.tar.gz'))
29 | create_package(package_filename, src_dir)
30 | 
31 | # Remove the temporary directory
32 | shutil.rmtree(src_dir)


--------------------------------------------------------------------------------
/scripts/build_weights.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | ## Copyright 2020 Intel Corporation
 4 | ## SPDX-License-Identifier: Apache-2.0
 5 | 
 6 | import argparse
 7 | from common import *
 8 | 
 9 | MODELS = [
10 |   'rt_hdr_alb_nrm',
11 |   'rt_hdr_alb',
12 |   'rt_hdr',
13 |   'rt_ldr_alb_nrm',
14 |   'rt_ldr_alb',
15 |   'rt_ldr',
16 |   'rtlightmap_hdr'
17 | ]
18 | 
19 | # Parse the command-line arguments
20 | parser = argparse.ArgumentParser(description='Builds the weights blobs from the training results.')
21 | parser.usage = '\rIntel(R) Open Image Denoise - Build Weights\n' + parser.format_usage()
22 | parser.add_argument('--results_dir', '-R', type=str, default=os.path.join(root_dir, 'training', 'results'), help='directory of training results')
23 | cfg = parser.parse_args()
24 | 
25 | weights_dir = os.path.join(root_dir, 'weights')
26 | export_cmd = os.path.join(root_dir, 'training', 'export.py')
27 | 
28 | # Export the weights blobs
29 | for model in MODELS:
30 |   tza_filename = os.path.join(weights_dir, model + '.tza')
31 |   run(export_cmd + f' -R {cfg.results_dir} -r {model} -o {tza_filename}')
32 |   print()


--------------------------------------------------------------------------------
/scripts/common.py:
--------------------------------------------------------------------------------
 1 | ## Copyright 2020 Intel Corporation
 2 | ## SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | import os
 5 | import platform
 6 | import subprocess
 7 | import re
 8 | import shutil
 9 | import tarfile
10 | from zipfile import ZipFile
11 | from urllib.request import urlretrieve
12 | 
13 | # Runs a command and checks the return value for success
14 | def run(command):
15 |   status = os.system(command)
16 |   if status != 0:
17 |     print('Error: non-zero return value')
18 |     exit(1)
19 | 
20 | def download_file(url, output_dir):
21 |   print('Downloading file:', url)
22 |   filename = os.path.join(output_dir, os.path.basename(url))
23 |   urlretrieve(url, filename=filename)
24 |   return filename
25 | 
26 | def extract_package(filename, output_dir):
27 |   print('Extracting package:', filename)
28 |   # Detect the package format and open the package
29 |   if re.search(r'(\.tar(\..+)?|tgz)$', filename):
30 |     package = tarfile.open(filename)
31 |     members = package.getnames()
32 |   elif filename.endswith('.zip'):
33 |     package = ZipFile(filename)
34 |     members = package.namelist()
35 |   else:
36 |     raise Exception('unsupported package format')
37 |   # Avoid nesting two top-level directories with the same name
38 |   if os.path.commonpath(members) == os.path.basename(output_dir):
39 |     output_dir = os.path.dirname(output_dir)
40 |   # Create the output directory if it doesn't exist
41 |   if not os.path.isdir(output_dir):
42 |     os.makedirs(output_dir)
43 |   # Extract the package
44 |   package.extractall(output_dir)
45 |   package.close()
46 | 
47 | def create_package(filename, input_dir):
48 |   print('Creating package:', filename)
49 |   if filename.endswith('.tar.gz'):
50 |     with tarfile.open(filename, "w:gz") as package:
51 |       package.add(input_dir, arcname=os.path.basename(input_dir))
52 |   elif filename.endswith('.zip'):
53 |     shutil.make_archive(filename[:-4], 'zip', os.path.dirname(input_dir), os.path.basename(input_dir))
54 |   else:
55 |     raise Exception('unsupported package format')
56 | 
57 | # Detect the OS and architecture
58 | OS = {'Windows' : 'windows', 'Linux' : 'linux', 'Darwin' : 'macos'}[platform.system()]
59 | 
60 | ARCH = platform.machine().lower()
61 | if ARCH == 'amd64':
62 |   ARCH = 'x86_64'
63 | elif ARCH == 'aarch64':
64 |   ARCH = 'arm64'
65 | 
66 | # Get the root directory
67 | root_dir = os.environ.get('OIDN_ROOT_DIR')
68 | if root_dir is None:
69 |   root_dir = os.getcwd()


--------------------------------------------------------------------------------
/scripts/csan.supp.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <ComputeSanitizerOutput>
 3 |   <record>
 4 |     <kind>Api</kind>
 5 |     <what>
 6 |       <error>CUDA_ERROR_OUT_OF_MEMORY</error>
 7 |       <result>2</result>
 8 |     </what>
 9 |   </record>
10 | </ComputeSanitizerOutput>
11 | 


--------------------------------------------------------------------------------
/scripts/protex_scan.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ## Copyright 2018 Intel Corporation
 4 | ## SPDX-License-Identifier: Apache-2.0
 5 | 
 6 | # Fail when individual commands fail (-e), also in intermediate steps in
 7 | # pipelines (-o pipefail).
 8 | set -euo pipefail
 9 | 
10 | # Debug only: print commands before executing them (-x).
11 | # set -x
12 | 
13 | if [ -z "${OIDN_PROTEX_USER_HOME:-}" ]; then
14 |   echo "Error: you must set OIDN_PROTEX_USER_HOME"
15 |   exit 1
16 | fi
17 | 
18 | if [ -z "${OIDN_PROTEX_PROJECT_NAME:-}" ]; then
19 |   echo "Error: you must set OIDN_PROTEX_PROJECT_NAME"
20 |   exit 1
21 | fi
22 | 
23 | if [ -z "${OIDN_PROTEX_BDS:-}" ]; then
24 |   echo "Error: you must set OIDN_PROTEX_BDS"
25 |   exit 1
26 | fi
27 | 
28 | if [ -z "${OIDN_PROTEX_SERVER_URL:-}" ]; then
29 |   echo "Error: you must set OIDN_PROTEX_SERVER_URL"
30 |   exit 1
31 | fi
32 | 
33 | # Root dir defaults to $PWD
34 | ROOT_DIR=${OIDN_ROOT_DIR:-$PWD}
35 | 
36 | export _JAVA_OPTIONS="-Duser.home=${OIDN_PROTEX_USER_HOME}"
37 | 
38 | cd ${ROOT_DIR}
39 | 
40 | ${OIDN_PROTEX_BDS} new-project --server ${OIDN_PROTEX_SERVER_URL} ${OIDN_PROTEX_PROJECT_NAME} |& tee ip_protex.log
41 | if grep -q "command failed" ip_protex.log; then
42 |   exit 1
43 | fi
44 | 
45 | ${OIDN_PROTEX_BDS} analyze --server ${OIDN_PROTEX_SERVER_URL} |& tee -a ip_protex.log
46 | if grep -q "command failed" ip_protex.log; then
47 |   exit 1
48 | fi
49 | 
50 | if grep -E "^Files pending identification: [0-9]+$" ip_protex.log; then
51 |   echo "Protex scan FAILED!"
52 |   exit 1
53 | fi
54 | 
55 | echo "Protex scan PASSED!"
56 | exit 0
57 | 
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/scripts/store-files.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -xe
 2 | ## Copyright 2019 Intel Corporation
 3 | ## SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | project_name=$1
 6 | build_id=$2
 7 | group_name=$3
 8 | files=$4
 9 | STORAGE_DIR=$STORAGE_PATH/$project_name/$build_id/$group_name/
10 | mkdir -p $STORAGE_DIR
11 | cp $files $STORAGE_DIR/
12 | 


--------------------------------------------------------------------------------
/scripts/valgrind.supp:
--------------------------------------------------------------------------------
  1 | {
  2 |    intel_sse2_strrchr_cond
  3 |    Memcheck:Cond
  4 |    fun:__intel_sse2_strrchr
  5 | }
  6 | 
  7 | {
  8 |    dlopen_leak
  9 |    Memcheck:Leak
 10 |    match-leak-kinds: definite,possible
 11 |    ...
 12 |    fun:dl_open_worker
 13 |    ...
 14 | }
 15 | 
 16 | {
 17 |    dlopen_addr
 18 |    Memcheck:Addr8
 19 |    fun:strncmp
 20 |    ...
 21 |    fun:dl_open_worker
 22 | }
 23 | 
 24 | {
 25 |    libsycl_leak
 26 |    Memcheck:Leak
 27 |    match-leak-kinds: definite,possible
 28 |    ...
 29 |    fun:_ZN4sycl3_*
 30 |    ...
 31 | }
 32 | 
 33 | {
 34 |    libcuda_leak
 35 |    Memcheck:Leak
 36 |    match-leak-kinds: definite,possible
 37 |    ...
 38 |    obj:*libcuda.so*
 39 |    ...
 40 | }
 41 | 
 42 | {
 43 |    libamdhip_leak
 44 |    Memcheck:Leak
 45 |    match-leak-kinds: definite,possible
 46 |    ...
 47 |    obj:*libamdhip64.so*
 48 |    ...
 49 | }
 50 | 
 51 | {
 52 |    libamd_comgr_cond
 53 |    Memcheck:Cond
 54 |    ...
 55 |    obj:*libamd_comgr.so*
 56 |    ...
 57 | }
 58 | 
 59 | {
 60 |    libamd_comgr_param
 61 |    Memcheck:Param
 62 |    write(buf)
 63 |    ...
 64 |    obj:*libamd_comgr.so*
 65 |    ...
 66 | }
 67 | 
 68 | {
 69 |    libhsa_leak
 70 |    Memcheck:Leak
 71 |    match-leak-kinds: definite,possible
 72 |    ...
 73 |    obj:*libhsa-runtime64.so*
 74 |    ...
 75 | }
 76 | 
 77 | {
 78 |    libze_leak
 79 |    Memcheck:Leak
 80 |    match-leak-kinds: definite,possible
 81 |    ...
 82 |    obj:*libze_*.so*
 83 |    ...
 84 | }
 85 | 
 86 | {
 87 |    libigd_leak
 88 |    Memcheck:Leak
 89 |    match-leak-kinds: definite,possible
 90 |    ...
 91 |    obj:*libigd?cl.so*
 92 |    ...
 93 | }
 94 | 
 95 | {
 96 |    ocl_leak
 97 |    Memcheck:Leak
 98 |    match-leak-kinds: definite,possible
 99 |    ...
100 |    fun:clGetPlatformIDs
101 |    ...
102 | }
103 | 
104 | {
105 |    ur_leak
106 |    Memcheck:Leak
107 |    match-leak-kinds: definite,possible
108 |    ...
109 |    fun:urInit
110 |    ...
111 | }
112 | 
113 | {
114 |    oidn_sycl_leak
115 |    Memcheck:Leak
116 |    match-leak-kinds: definite,possible
117 |    ...
118 |    fun:oidn_init_module_device_sycl_v*
119 |    ...
120 | }


--------------------------------------------------------------------------------
/third-party-programs-oneTBB.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RenderKit/oidn/7d23b193ee0cf3bc3ad03a3ac1886b34f496cc5c/third-party-programs-oneTBB.txt


--------------------------------------------------------------------------------
/training/.gitignore:
--------------------------------------------------------------------------------
 1 | *.exr
 2 | *.pfm
 3 | *.png
 4 | *.done
 5 | *.pt
 6 | *.pth
 7 | *.tza
 8 | *.json
 9 | *.csv
10 | *.zip
11 | events.out.*
12 | latest
13 | 


--------------------------------------------------------------------------------
/training/compare_image.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | ## Copyright 2018 Intel Corporation
 4 | ## SPDX-License-Identifier: Apache-2.0
 5 | 
 6 | import os
 7 | 
 8 | from config import *
 9 | from util import *
10 | from dataset import *
11 | from image import *
12 | from color import *
13 | 
14 | def main():
15 |   # Parse the command line arguments
16 |   cfg = parse_args(description='Compares two feature images using the specified quality metrics.')
17 | 
18 |   # Load the images
19 |   image1, _ = load_image(cfg.input[0])
20 |   image2, _ = load_image(cfg.input[1])
21 | 
22 |   feature1 = get_image_feature(cfg.input[0])
23 |   feature2 = get_image_feature(cfg.input[1])
24 |   if feature1 != feature2:
25 |     error('cannot compare different features')
26 | 
27 |   # Load metadata for the images if it exists
28 |   tonemap_exposure = cfg.exposure
29 |   if os.path.dirname(cfg.input[0]) == os.path.dirname(cfg.input[1]):
30 |     metadata = load_image_metadata(os.path.commonprefix(cfg.input))
31 |     if metadata:
32 |       tonemap_exposure = metadata['exposure']
33 | 
34 |   # Convert the images to tensors
35 |   image1 = image_to_tensor(image1, batch=True)
36 |   image2 = image_to_tensor(image2, batch=True)
37 | 
38 |   # Transform the images to sRGB
39 |   image1 = transform_feature(image1, feature1, 'srgb', tonemap_exposure)
40 |   image2 = transform_feature(image2, feature2, 'srgb', tonemap_exposure)
41 | 
42 |   # Compute the metrics
43 |   metric_str = ''
44 |   for metric in cfg.metric:
45 |     value = compare_images(image1, image2, metric)
46 |     if metric_str:
47 |       metric_str += ', '
48 |     metric_str += '%s = %.4f' % (metric, value)
49 |   if metric_str:
50 |     print(metric_str)
51 | 
52 | if __name__ == '__main__':
53 |   main()


--------------------------------------------------------------------------------
/training/convert_image.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | ## Copyright 2018 Intel Corporation
 4 | ## SPDX-License-Identifier: Apache-2.0
 5 | 
 6 | import os
 7 | 
 8 | from config import *
 9 | from util import *
10 | from dataset import *
11 | from image import *
12 | 
13 | def main():
14 |   # Parse the command line arguments
15 |   cfg = parse_args(description='Converts a feature image to a different image format.')
16 | 
17 |   # Load the input image
18 |   image, _ = load_image(cfg.input)
19 | 
20 |   # Load metadata for the image if it exists
21 |   tonemap_exposure = cfg.exposure
22 |   metadata = load_image_metadata(cfg.input)
23 |   if metadata:
24 |     tonemap_exposure = metadata['exposure']
25 | 
26 |   # Convert the image to tensor
27 |   image = image_to_tensor(image, batch=True)
28 | 
29 |   # Transform the image
30 |   input_feature  = get_image_feature(cfg.input)
31 |   output_feature = get_image_feature(cfg.output)
32 |   image = transform_feature(image, input_feature, output_feature, tonemap_exposure)
33 | 
34 |   # Save the image
35 |   save_image(cfg.output, tensor_to_image(image))
36 | 
37 | if __name__ == '__main__':
38 |   main()


--------------------------------------------------------------------------------
/training/visualize.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | ## Copyright 2018 Intel Corporation
 4 | ## SPDX-License-Identifier: Apache-2.0
 5 | 
 6 | import os
 7 | 
 8 | from config import *
 9 | from util import *
10 | from result import *
11 | 
12 | def main():
13 |   # Parse the command line arguments
14 |   cfg = parse_args(description='Invokes TensorBoard for visualizing statistics of a training result.')
15 | 
16 |   result_dir = get_result_dir(cfg)
17 |   if not os.path.isdir(result_dir):
18 |     error('result does not exist')
19 | 
20 |   # Run TensorBoard
21 |   log_dir = os.path.join(result_dir, 'log')
22 |   os.system('tensorboard --logdir=' + log_dir)
23 | 
24 | if __name__ == '__main__':
25 |   main()
26 | 


--------------------------------------------------------------------------------